From d751c169e9a6f0f853346f1184881422bd10b3c2 Mon Sep 17 00:00:00 2001
From: Michael Davidson <md@google.com>
Date: Thu, 10 Oct 2013 18:39:54 -0700
Subject: x86, relocs: Add more per-cpu gold special cases

The "gold" linker doesn't seem to put some additional per-cpu cases in
the right place. Add these to the per-cpu check. Without this, the kASLR
patch series fails to correctly apply relocations, and fails to boot.

Signed-off-by: Michael Davidson <md@google.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: http://lkml.kernel.org/r/20131011013954.GA28902@www.outflux.net
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/tools/relocs.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index f7bab68a4b83..71a2533c90d3 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -722,15 +722,23 @@ static void percpu_init(void)
 
 /*
  * Check to see if a symbol lies in the .data..percpu section.
- * For some as yet not understood reason the "__init_begin"
- * symbol which immediately preceeds the .data..percpu section
- * also shows up as it it were part of it so we do an explict
- * check for that symbol name and ignore it.
+ *
+ * The linker incorrectly associates some symbols with the
+ * .data..percpu section so we also need to check the symbol
+ * name to make sure that we classify the symbol correctly.
+ *
+ * The GNU linker incorrectly associates:
+ *	__init_begin
+ *
+ * The "gold" linker incorrectly associates:
+ *	init_per_cpu__irq_stack_union
+ *	init_per_cpu__gdt_page
  */
 static int is_percpu_sym(ElfW(Sym) *sym, const char *symname)
 {
 	return (sym->st_shndx == per_cpu_shndx) &&
-		strcmp(symname, "__init_begin");
+		strcmp(symname, "__init_begin") &&
+		strncmp(symname, "init_per_cpu_", 13);
 }
 
 
-- 
cgit v1.2.3


From dd78b97367bd575918204cc89107c1479d3fc1a7 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 10 Oct 2013 17:18:13 -0700
Subject: x86, boot: Move CPU flags out of cpucheck

Refactor the CPU flags handling out of the cpucheck routines so that
they can be reused by the future ASLR routines (in order to detect CPU
features like RDRAND and RDTSC).

This reworks has_eflag() and has_fpu() to be used on both 32-bit and
64-bit, and refactors the calls to cpuid to make them PIC-safe on 32-bit.

Signed-off-by: Kees Cook <keescook@chromium.org>
Link: http://lkml.kernel.org/r/1381450698-28710-2-git-send-email-keescook@chromium.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/boot/Makefile              |   2 +-
 arch/x86/boot/boot.h                |  10 +---
 arch/x86/boot/compressed/Makefile   |   2 +-
 arch/x86/boot/compressed/cpuflags.c |  12 +++++
 arch/x86/boot/cpucheck.c            |  86 -----------------------------
 arch/x86/boot/cpuflags.c            | 104 ++++++++++++++++++++++++++++++++++++
 arch/x86/boot/cpuflags.h            |  19 +++++++
 7 files changed, 138 insertions(+), 97 deletions(-)
 create mode 100644 arch/x86/boot/compressed/cpuflags.c
 create mode 100644 arch/x86/boot/cpuflags.c
 create mode 100644 arch/x86/boot/cpuflags.h

(limited to 'arch')

diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 379814bc41e3..0da2e37b37c3 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -20,7 +20,7 @@ targets		:= vmlinux.bin setup.bin setup.elf bzImage
 targets		+= fdimage fdimage144 fdimage288 image.iso mtools.conf
 subdir-		:= compressed
 
-setup-y		+= a20.o bioscall.o cmdline.o copy.o cpu.o cpucheck.o
+setup-y		+= a20.o bioscall.o cmdline.o copy.o cpu.o cpuflags.o cpucheck.o
 setup-y		+= early_serial_console.o edd.o header.o main.o mca.o memory.o
 setup-y		+= pm.o pmjump.o printf.o regs.o string.o tty.o video.o
 setup-y		+= video-mode.o version.o
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index ef72baeff484..50f8c5e0f37e 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -26,9 +26,8 @@
 #include <asm/boot.h>
 #include <asm/setup.h>
 #include "bitops.h"
-#include <asm/cpufeature.h>
-#include <asm/processor-flags.h>
 #include "ctype.h"
+#include "cpuflags.h"
 
 /* Useful macros */
 #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
@@ -307,14 +306,7 @@ static inline int cmdline_find_option_bool(const char *option)
 	return __cmdline_find_option_bool(cmd_line_ptr, option);
 }
 
-
 /* cpu.c, cpucheck.c */
-struct cpu_features {
-	int level;		/* Family, or 64 for x86-64 */
-	int model;
-	u32 flags[NCAPINTS];
-};
-extern struct cpu_features cpu;
 int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr);
 int validate_cpu(void);
 
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index dcd90df10ab4..3312f1be9fd9 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -27,7 +27,7 @@ HOST_EXTRACFLAGS += -I$(srctree)/tools/include
 
 VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \
 	$(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o \
-	$(obj)/piggy.o
+	$(obj)/piggy.o $(obj)/cpuflags.o
 
 $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone
 
diff --git a/arch/x86/boot/compressed/cpuflags.c b/arch/x86/boot/compressed/cpuflags.c
new file mode 100644
index 000000000000..931cba6a4bb0
--- /dev/null
+++ b/arch/x86/boot/compressed/cpuflags.c
@@ -0,0 +1,12 @@
+#ifdef CONFIG_RANDOMIZE_BASE
+
+#include "../cpuflags.c"
+
+bool has_cpuflag(int flag)
+{
+	get_flags();
+
+	return test_bit(flag, cpu.flags);
+}
+
+#endif
diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c
index 4d3ff037201f..e1f3c166a512 100644
--- a/arch/x86/boot/cpucheck.c
+++ b/arch/x86/boot/cpucheck.c
@@ -28,8 +28,6 @@
 #include <asm/required-features.h>
 #include <asm/msr-index.h>
 
-struct cpu_features cpu;
-static u32 cpu_vendor[3];
 static u32 err_flags[NCAPINTS];
 
 static const int req_level = CONFIG_X86_MINIMUM_CPU_FAMILY;
@@ -69,90 +67,6 @@ static int is_transmeta(void)
 	       cpu_vendor[2] == A32('M', 'x', '8', '6');
 }
 
-static int has_fpu(void)
-{
-	u16 fcw = -1, fsw = -1;
-	u32 cr0;
-
-	asm("movl %%cr0,%0" : "=r" (cr0));
-	if (cr0 & (X86_CR0_EM|X86_CR0_TS)) {
-		cr0 &= ~(X86_CR0_EM|X86_CR0_TS);
-		asm volatile("movl %0,%%cr0" : : "r" (cr0));
-	}
-
-	asm volatile("fninit ; fnstsw %0 ; fnstcw %1"
-		     : "+m" (fsw), "+m" (fcw));
-
-	return fsw == 0 && (fcw & 0x103f) == 0x003f;
-}
-
-static int has_eflag(u32 mask)
-{
-	u32 f0, f1;
-
-	asm("pushfl ; "
-	    "pushfl ; "
-	    "popl %0 ; "
-	    "movl %0,%1 ; "
-	    "xorl %2,%1 ; "
-	    "pushl %1 ; "
-	    "popfl ; "
-	    "pushfl ; "
-	    "popl %1 ; "
-	    "popfl"
-	    : "=&r" (f0), "=&r" (f1)
-	    : "ri" (mask));
-
-	return !!((f0^f1) & mask);
-}
-
-static void get_flags(void)
-{
-	u32 max_intel_level, max_amd_level;
-	u32 tfms;
-
-	if (has_fpu())
-		set_bit(X86_FEATURE_FPU, cpu.flags);
-
-	if (has_eflag(X86_EFLAGS_ID)) {
-		asm("cpuid"
-		    : "=a" (max_intel_level),
-		      "=b" (cpu_vendor[0]),
-		      "=d" (cpu_vendor[1]),
-		      "=c" (cpu_vendor[2])
-		    : "a" (0));
-
-		if (max_intel_level >= 0x00000001 &&
-		    max_intel_level <= 0x0000ffff) {
-			asm("cpuid"
-			    : "=a" (tfms),
-			      "=c" (cpu.flags[4]),
-			      "=d" (cpu.flags[0])
-			    : "a" (0x00000001)
-			    : "ebx");
-			cpu.level = (tfms >> 8) & 15;
-			cpu.model = (tfms >> 4) & 15;
-			if (cpu.level >= 6)
-				cpu.model += ((tfms >> 16) & 0xf) << 4;
-		}
-
-		asm("cpuid"
-		    : "=a" (max_amd_level)
-		    : "a" (0x80000000)
-		    : "ebx", "ecx", "edx");
-
-		if (max_amd_level >= 0x80000001 &&
-		    max_amd_level <= 0x8000ffff) {
-			u32 eax = 0x80000001;
-			asm("cpuid"
-			    : "+a" (eax),
-			      "=c" (cpu.flags[6]),
-			      "=d" (cpu.flags[1])
-			    : : "ebx");
-		}
-	}
-}
-
 /* Returns a bitmask of which words we have error bits in */
 static int check_flags(void)
 {
diff --git a/arch/x86/boot/cpuflags.c b/arch/x86/boot/cpuflags.c
new file mode 100644
index 000000000000..b02544a2bce0
--- /dev/null
+++ b/arch/x86/boot/cpuflags.c
@@ -0,0 +1,104 @@
+#include <linux/types.h>
+#include "bitops.h"
+
+#include <asm/processor-flags.h>
+#include <asm/required-features.h>
+#include <asm/msr-index.h>
+#include "cpuflags.h"
+
+struct cpu_features cpu;
+u32 cpu_vendor[3];
+
+static bool loaded_flags;
+
+static int has_fpu(void)
+{
+	u16 fcw = -1, fsw = -1;
+	unsigned long cr0;
+
+	asm volatile("mov %%cr0,%0" : "=r" (cr0));
+	if (cr0 & (X86_CR0_EM|X86_CR0_TS)) {
+		cr0 &= ~(X86_CR0_EM|X86_CR0_TS);
+		asm volatile("mov %0,%%cr0" : : "r" (cr0));
+	}
+
+	asm volatile("fninit ; fnstsw %0 ; fnstcw %1"
+		     : "+m" (fsw), "+m" (fcw));
+
+	return fsw == 0 && (fcw & 0x103f) == 0x003f;
+}
+
+int has_eflag(unsigned long mask)
+{
+	unsigned long f0, f1;
+
+	asm volatile("pushf	\n\t"
+		     "pushf	\n\t"
+		     "pop %0	\n\t"
+		     "mov %0,%1	\n\t"
+		     "xor %2,%1	\n\t"
+		     "push %1	\n\t"
+		     "popf	\n\t"
+		     "pushf	\n\t"
+		     "pop %1	\n\t"
+		     "popf"
+		     : "=&r" (f0), "=&r" (f1)
+		     : "ri" (mask));
+
+	return !!((f0^f1) & mask);
+}
+
+/* Handle x86_32 PIC using ebx. */
+#if defined(__i386__) && defined(__PIC__)
+# define EBX_REG "=r"
+#else
+# define EBX_REG "=b"
+#endif
+
+static inline void cpuid(u32 id, u32 *a, u32 *b, u32 *c, u32 *d)
+{
+	asm volatile(".ifnc %%ebx,%3 ; movl  %%ebx,%3 ; .endif	\n\t"
+		     "cpuid					\n\t"
+		     ".ifnc %%ebx,%3 ; xchgl %%ebx,%3 ; .endif	\n\t"
+		    : "=a" (*a), "=c" (*c), "=d" (*d), EBX_REG (*b)
+		    : "a" (id)
+	);
+}
+
+void get_flags(void)
+{
+	u32 max_intel_level, max_amd_level;
+	u32 tfms;
+	u32 ignored;
+
+	if (loaded_flags)
+		return;
+	loaded_flags = true;
+
+	if (has_fpu())
+		set_bit(X86_FEATURE_FPU, cpu.flags);
+
+	if (has_eflag(X86_EFLAGS_ID)) {
+		cpuid(0x0, &max_intel_level, &cpu_vendor[0], &cpu_vendor[2],
+		      &cpu_vendor[1]);
+
+		if (max_intel_level >= 0x00000001 &&
+		    max_intel_level <= 0x0000ffff) {
+			cpuid(0x1, &tfms, &ignored, &cpu.flags[4],
+			      &cpu.flags[0]);
+			cpu.level = (tfms >> 8) & 15;
+			cpu.model = (tfms >> 4) & 15;
+			if (cpu.level >= 6)
+				cpu.model += ((tfms >> 16) & 0xf) << 4;
+		}
+
+		cpuid(0x80000000, &max_amd_level, &ignored, &ignored,
+		      &ignored);
+
+		if (max_amd_level >= 0x80000001 &&
+		    max_amd_level <= 0x8000ffff) {
+			cpuid(0x80000001, &ignored, &ignored, &cpu.flags[6],
+			      &cpu.flags[1]);
+		}
+	}
+}
diff --git a/arch/x86/boot/cpuflags.h b/arch/x86/boot/cpuflags.h
new file mode 100644
index 000000000000..9bb4e25f7317
--- /dev/null
+++ b/arch/x86/boot/cpuflags.h
@@ -0,0 +1,19 @@
+#ifndef BOOT_CPUFLAGS_H
+#define BOOT_CPUFLAGS_H
+
+#include <asm/cpufeature.h>
+#include <asm/processor-flags.h>
+
+struct cpu_features {
+	int level;		/* Family, or 64 for x86-64 */
+	int model;
+	u32 flags[NCAPINTS];
+};
+
+extern struct cpu_features cpu;
+extern u32 cpu_vendor[3];
+
+int has_eflag(unsigned long mask);
+void get_flags(void);
+
+#endif
-- 
cgit v1.2.3


From 8ab3820fd5b2896d66da7bb2a906bc382e63e7bc Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 10 Oct 2013 17:18:14 -0700
Subject: x86, kaslr: Return location from decompress_kernel

This allows decompress_kernel to return a new location for the kernel to
be relocated to. Additionally, enforces CONFIG_PHYSICAL_START as the
minimum relocation position when building with CONFIG_RELOCATABLE.

With CONFIG_RANDOMIZE_BASE set, the choose_kernel_location routine
will select a new location to decompress the kernel, though here it is
presently a no-op. The kernel command line option "nokaslr" is introduced
to bypass these routines.

Signed-off-by: Kees Cook <keescook@chromium.org>
Link: http://lkml.kernel.org/r/1381450698-28710-3-git-send-email-keescook@chromium.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 Documentation/kernel-parameters.txt |  4 ++++
 arch/x86/Kconfig                    | 38 +++++++++++++++++++++++++++++++++----
 arch/x86/boot/compressed/Makefile   |  2 +-
 arch/x86/boot/compressed/aslr.c     | 23 ++++++++++++++++++++++
 arch/x86/boot/compressed/cmdline.c  |  2 +-
 arch/x86/boot/compressed/head_32.S  | 10 ++++++----
 arch/x86/boot/compressed/head_64.S  | 16 ++++++++++------
 arch/x86/boot/compressed/misc.c     |  8 ++++++--
 arch/x86/boot/compressed/misc.h     | 27 ++++++++++++++++++++------
 9 files changed, 106 insertions(+), 24 deletions(-)
 create mode 100644 arch/x86/boot/compressed/aslr.c

(limited to 'arch')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index fcbb736d55fe..773fc4c077b4 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1975,6 +1975,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 	noapic		[SMP,APIC] Tells the kernel to not make use of any
 			IOAPICs that may be present in the system.
 
+	nokaslr		[X86]
+			Disable kernel base offset ASLR (Address Space
+			Layout Randomization) if built into the kernel.
+
 	noautogroup	Disable scheduler automatic task group creation.
 
 	nobats		[PPC] Do not use BATs for mapping kernel lowmem
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ee2fb9d37745..992701d4d4f8 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1722,16 +1722,46 @@ config RELOCATABLE
 
 	  Note: If CONFIG_RELOCATABLE=y, then the kernel runs from the address
 	  it has been loaded at and the compile time physical address
-	  (CONFIG_PHYSICAL_START) is ignored.
+	  (CONFIG_PHYSICAL_START) is used as the minimum location.
 
-# Relocation on x86-32 needs some additional build support
+config RANDOMIZE_BASE
+	bool "Randomize the address of the kernel image"
+	depends on RELOCATABLE
+	depends on !HIBERNATION
+	default n
+	---help---
+	   Randomizes the physical and virtual address at which the
+	   kernel image is decompressed, as a security feature that
+	   deters exploit attempts relying on knowledge of the location
+	   of kernel internals.
+
+	   Entropy is generated using the RDRAND instruction if it
+	   is supported.  If not, then RDTSC is used, if supported. If
+	   neither RDRAND nor RDTSC are supported, then no randomness
+	   is introduced.
+
+	   The kernel will be offset by up to RANDOMIZE_BASE_MAX_OFFSET,
+	   and aligned according to PHYSICAL_ALIGN.
+
+config RANDOMIZE_BASE_MAX_OFFSET
+	hex "Maximum ASLR offset allowed"
+	depends on RANDOMIZE_BASE
+	default "0x10000000"
+	range 0x0 0x10000000
+	---help---
+	 Determines the maximal offset in bytes that will be applied to the
+	 kernel when Address Space Layout Randomization (ASLR) is active.
+	 Must be less than or equal to the actual physical memory on the
+	 system. This must be a power of two.
+
+# Relocation on x86 needs some additional build support
 config X86_NEED_RELOCS
 	def_bool y
-	depends on X86_32 && RELOCATABLE
+	depends on RANDOMIZE_BASE || (X86_32 && RELOCATABLE)
 
 config PHYSICAL_ALIGN
 	hex "Alignment value to which kernel should be aligned"
-	default "0x1000000"
+	default "0x200000"
 	range 0x2000 0x1000000 if X86_32
 	range 0x200000 0x1000000 if X86_64
 	---help---
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 3312f1be9fd9..ae8b5dbbd8c5 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -27,7 +27,7 @@ HOST_EXTRACFLAGS += -I$(srctree)/tools/include
 
 VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \
 	$(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o \
-	$(obj)/piggy.o $(obj)/cpuflags.o
+	$(obj)/piggy.o $(obj)/cpuflags.o $(obj)/aslr.o
 
 $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone
 
diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c
new file mode 100644
index 000000000000..b73cc66d201e
--- /dev/null
+++ b/arch/x86/boot/compressed/aslr.c
@@ -0,0 +1,23 @@
+#include "misc.h"
+
+#ifdef CONFIG_RANDOMIZE_BASE
+
+unsigned char *choose_kernel_location(unsigned char *input,
+				      unsigned long input_size,
+				      unsigned char *output,
+				      unsigned long output_size)
+{
+	unsigned long choice = (unsigned long)output;
+
+	if (cmdline_find_option_bool("nokaslr")) {
+		debug_putstr("KASLR disabled...\n");
+		goto out;
+	}
+
+	/* XXX: choose random location. */
+
+out:
+	return (unsigned char *)choice;
+}
+
+#endif /* CONFIG_RANDOMIZE_BASE */
diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c
index bffd73b45b1f..b68e3033e6b9 100644
--- a/arch/x86/boot/compressed/cmdline.c
+++ b/arch/x86/boot/compressed/cmdline.c
@@ -1,6 +1,6 @@
 #include "misc.h"
 
-#ifdef CONFIG_EARLY_PRINTK
+#if CONFIG_EARLY_PRINTK || CONFIG_RANDOMIZE_BASE
 
 static unsigned long fs;
 static inline void set_fs(unsigned long seg)
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 5d6f6891b188..9116aac232c7 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -117,9 +117,11 @@ preferred_addr:
 	addl    %eax, %ebx
 	notl	%eax
 	andl    %eax, %ebx
-#else
-	movl	$LOAD_PHYSICAL_ADDR, %ebx
+	cmpl	$LOAD_PHYSICAL_ADDR, %ebx
+	jge	1f
 #endif
+	movl	$LOAD_PHYSICAL_ADDR, %ebx
+1:
 
 	/* Target address to relocate to for decompression */
 	addl	$z_extract_offset, %ebx
@@ -191,14 +193,14 @@ relocated:
 	leal	boot_heap(%ebx), %eax
 	pushl	%eax		/* heap area */
 	pushl	%esi		/* real mode pointer */
-	call	decompress_kernel
+	call	decompress_kernel /* returns kernel location in %eax */
 	addl	$24, %esp
 
 /*
  * Jump to the decompressed kernel.
  */
 	xorl	%ebx, %ebx
-	jmp	*%ebp
+	jmp	*%eax
 
 /*
  * Stack and heap for uncompression
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index c337422b575d..c5c1ae0997e7 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -94,9 +94,11 @@ ENTRY(startup_32)
 	addl	%eax, %ebx
 	notl	%eax
 	andl	%eax, %ebx
-#else
-	movl	$LOAD_PHYSICAL_ADDR, %ebx
+	cmpl	$LOAD_PHYSICAL_ADDR, %ebx
+	jge	1f
 #endif
+	movl	$LOAD_PHYSICAL_ADDR, %ebx
+1:
 
 	/* Target address to relocate to for decompression */
 	addl	$z_extract_offset, %ebx
@@ -269,9 +271,11 @@ preferred_addr:
 	addq	%rax, %rbp
 	notq	%rax
 	andq	%rax, %rbp
-#else
-	movq	$LOAD_PHYSICAL_ADDR, %rbp
+	cmpq	$LOAD_PHYSICAL_ADDR, %rbp
+	jge	1f
 #endif
+	movq	$LOAD_PHYSICAL_ADDR, %rbp
+1:
 
 	/* Target address to relocate to for decompression */
 	leaq	z_extract_offset(%rbp), %rbx
@@ -339,13 +343,13 @@ relocated:
 	movl	$z_input_len, %ecx	/* input_len */
 	movq	%rbp, %r8		/* output target address */
 	movq	$z_output_len, %r9	/* decompressed length */
-	call	decompress_kernel
+	call	decompress_kernel	/* returns kernel location in %rax */
 	popq	%rsi
 
 /*
  * Jump to the decompressed kernel.
  */
-	jmp	*%rbp
+	jmp	*%rax
 
 	.code32
 no_longmode:
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 434f077d2c4d..71387685dc16 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -395,7 +395,7 @@ static void parse_elf(void *output)
 	free(phdrs);
 }
 
-asmlinkage void decompress_kernel(void *rmode, memptr heap,
+asmlinkage void *decompress_kernel(void *rmode, memptr heap,
 				  unsigned char *input_data,
 				  unsigned long input_len,
 				  unsigned char *output,
@@ -422,6 +422,10 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
 	free_mem_ptr     = heap;	/* Heap */
 	free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
 
+	output = choose_kernel_location(input_data, input_len,
+					output, output_len);
+
+	/* Validate memory location choices. */
 	if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1))
 		error("Destination address inappropriately aligned");
 #ifdef CONFIG_X86_64
@@ -441,5 +445,5 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
 	parse_elf(output);
 	handle_relocations(output, output_len);
 	debug_putstr("done.\nBooting the kernel.\n");
-	return;
+	return output;
 }
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 674019d8e235..9077af7fd0b8 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -39,23 +39,38 @@ static inline void debug_putstr(const char *s)
 
 #endif
 
-#ifdef CONFIG_EARLY_PRINTK
-
+#if CONFIG_EARLY_PRINTK || CONFIG_RANDOMIZE_BASE
 /* cmdline.c */
 int cmdline_find_option(const char *option, char *buffer, int bufsize);
 int cmdline_find_option_bool(const char *option);
+#endif
 
-/* early_serial_console.c */
-extern int early_serial_base;
-void console_init(void);
 
+#if CONFIG_RANDOMIZE_BASE
+/* aslr.c */
+unsigned char *choose_kernel_location(unsigned char *input,
+				      unsigned long input_size,
+				      unsigned char *output,
+				      unsigned long output_size);
 #else
+static inline
+unsigned char *choose_kernel_location(unsigned char *input,
+				      unsigned long input_size,
+				      unsigned char *output,
+				      unsigned long output_size)
+{
+	return output;
+}
+#endif
 
+#ifdef CONFIG_EARLY_PRINTK
 /* early_serial_console.c */
+extern int early_serial_base;
+void console_init(void);
+#else
 static const int early_serial_base;
 static inline void console_init(void)
 { }
-
 #endif
 
 #endif
-- 
cgit v1.2.3


From 5bfce5ef55cbe78ee2ee6e97f2e26a8a582008f3 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 10 Oct 2013 17:18:15 -0700
Subject: x86, kaslr: Provide randomness functions

Adds potential sources of randomness: RDRAND, RDTSC, or the i8254.

This moves the pre-alternatives inline rdrand function into the header so
both pieces of code can use it. Availability of RDRAND is then controlled
by CONFIG_ARCH_RANDOM, if someone wants to disable it even for kASLR.

Signed-off-by: Kees Cook <keescook@chromium.org>
Link: http://lkml.kernel.org/r/1381450698-28710-4-git-send-email-keescook@chromium.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/boot/compressed/aslr.c   | 53 +++++++++++++++++++++++++++++++++++++++
 arch/x86/boot/compressed/misc.h   |  2 ++
 arch/x86/include/asm/archrandom.h | 21 ++++++++++++++++
 arch/x86/kernel/cpu/rdrand.c      | 14 -----------
 4 files changed, 76 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c
index b73cc66d201e..14b24e0e5496 100644
--- a/arch/x86/boot/compressed/aslr.c
+++ b/arch/x86/boot/compressed/aslr.c
@@ -1,6 +1,59 @@
 #include "misc.h"
 
 #ifdef CONFIG_RANDOMIZE_BASE
+#include <asm/msr.h>
+#include <asm/archrandom.h>
+
+#define I8254_PORT_CONTROL	0x43
+#define I8254_PORT_COUNTER0	0x40
+#define I8254_CMD_READBACK	0xC0
+#define I8254_SELECT_COUNTER0	0x02
+#define I8254_STATUS_NOTREADY	0x40
+static inline u16 i8254(void)
+{
+	u16 status, timer;
+
+	do {
+		outb(I8254_PORT_CONTROL,
+		     I8254_CMD_READBACK | I8254_SELECT_COUNTER0);
+		status = inb(I8254_PORT_COUNTER0);
+		timer  = inb(I8254_PORT_COUNTER0);
+		timer |= inb(I8254_PORT_COUNTER0) << 8;
+	} while (status & I8254_STATUS_NOTREADY);
+
+	return timer;
+}
+
+static unsigned long get_random_long(void)
+{
+	unsigned long random;
+
+	if (has_cpuflag(X86_FEATURE_RDRAND)) {
+		debug_putstr("KASLR using RDRAND...\n");
+		if (rdrand_long(&random))
+			return random;
+	}
+
+	if (has_cpuflag(X86_FEATURE_TSC)) {
+		uint32_t raw;
+
+		debug_putstr("KASLR using RDTSC...\n");
+		rdtscl(raw);
+
+		/* Only use the low bits of rdtsc. */
+		random = raw & 0xffff;
+	} else {
+		debug_putstr("KASLR using i8254...\n");
+		random = i8254();
+	}
+
+	/* Extend timer bits poorly... */
+	random |= (random << 16);
+#ifdef CONFIG_X86_64
+	random |= (random << 32);
+#endif
+	return random;
+}
 
 unsigned char *choose_kernel_location(unsigned char *input,
 				      unsigned long input_size,
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 9077af7fd0b8..0782eb0b6e30 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -52,6 +52,8 @@ unsigned char *choose_kernel_location(unsigned char *input,
 				      unsigned long input_size,
 				      unsigned char *output,
 				      unsigned long output_size);
+/* cpuflags.c */
+bool has_cpuflag(int flag);
 #else
 static inline
 unsigned char *choose_kernel_location(unsigned char *input,
diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h
index 0d9ec770f2f8..e6a92455740e 100644
--- a/arch/x86/include/asm/archrandom.h
+++ b/arch/x86/include/asm/archrandom.h
@@ -39,6 +39,20 @@
 
 #ifdef CONFIG_ARCH_RANDOM
 
+/* Instead of arch_get_random_long() when alternatives haven't run. */
+static inline int rdrand_long(unsigned long *v)
+{
+	int ok;
+	asm volatile("1: " RDRAND_LONG "\n\t"
+		     "jc 2f\n\t"
+		     "decl %0\n\t"
+		     "jnz 1b\n\t"
+		     "2:"
+		     : "=r" (ok), "=a" (*v)
+		     : "0" (RDRAND_RETRY_LOOPS));
+	return ok;
+}
+
 #define GET_RANDOM(name, type, rdrand, nop)			\
 static inline int name(type *v)					\
 {								\
@@ -68,6 +82,13 @@ GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP3);
 
 #endif /* CONFIG_X86_64 */
 
+#else
+
+static inline int rdrand_long(unsigned long *v)
+{
+	return 0;
+}
+
 #endif  /* CONFIG_ARCH_RANDOM */
 
 extern void x86_init_rdrand(struct cpuinfo_x86 *c);
diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c
index 88db010845cb..384df5105fbc 100644
--- a/arch/x86/kernel/cpu/rdrand.c
+++ b/arch/x86/kernel/cpu/rdrand.c
@@ -31,20 +31,6 @@ static int __init x86_rdrand_setup(char *s)
 }
 __setup("nordrand", x86_rdrand_setup);
 
-/* We can't use arch_get_random_long() here since alternatives haven't run */
-static inline int rdrand_long(unsigned long *v)
-{
-	int ok;
-	asm volatile("1: " RDRAND_LONG "\n\t"
-		     "jc 2f\n\t"
-		     "decl %0\n\t"
-		     "jnz 1b\n\t"
-		     "2:"
-		     : "=r" (ok), "=a" (*v)
-		     : "0" (RDRAND_RETRY_LOOPS));
-	return ok;
-}
-
 /*
  * Force a reseed cycle; we are architecturally guaranteed a reseed
  * after no more than 512 128-bit chunks of random data.  This also
-- 
cgit v1.2.3


From 82fa9637a2ba285bcc7c5050c73010b2c1b3d803 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 10 Oct 2013 17:18:16 -0700
Subject: x86, kaslr: Select random position from e820 maps

Counts available alignment positions across all e820 maps, and chooses
one randomly for the new kernel base address, making sure not to collide
with unsafe memory areas.

Signed-off-by: Kees Cook <keescook@chromium.org>
Link: http://lkml.kernel.org/r/1381450698-28710-5-git-send-email-keescook@chromium.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/boot/compressed/aslr.c | 193 +++++++++++++++++++++++++++++++++++++++-
 arch/x86/boot/compressed/misc.c |  10 +--
 arch/x86/boot/compressed/misc.h |   8 ++
 3 files changed, 202 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c
index 14b24e0e5496..05957986d123 100644
--- a/arch/x86/boot/compressed/aslr.c
+++ b/arch/x86/boot/compressed/aslr.c
@@ -3,6 +3,7 @@
 #ifdef CONFIG_RANDOMIZE_BASE
 #include <asm/msr.h>
 #include <asm/archrandom.h>
+#include <asm/e820.h>
 
 #define I8254_PORT_CONTROL	0x43
 #define I8254_PORT_COUNTER0	0x40
@@ -55,20 +56,210 @@ static unsigned long get_random_long(void)
 	return random;
 }
 
+struct mem_vector {
+	unsigned long start;
+	unsigned long size;
+};
+
+#define MEM_AVOID_MAX 5
+struct mem_vector mem_avoid[MEM_AVOID_MAX];
+
+static bool mem_contains(struct mem_vector *region, struct mem_vector *item)
+{
+	/* Item at least partially before region. */
+	if (item->start < region->start)
+		return false;
+	/* Item at least partially after region. */
+	if (item->start + item->size > region->start + region->size)
+		return false;
+	return true;
+}
+
+static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two)
+{
+	/* Item one is entirely before item two. */
+	if (one->start + one->size <= two->start)
+		return false;
+	/* Item one is entirely after item two. */
+	if (one->start >= two->start + two->size)
+		return false;
+	return true;
+}
+
+static void mem_avoid_init(unsigned long input, unsigned long input_size,
+			   unsigned long output, unsigned long output_size)
+{
+	u64 initrd_start, initrd_size;
+	u64 cmd_line, cmd_line_size;
+	unsigned long unsafe, unsafe_len;
+	char *ptr;
+
+	/*
+	 * Avoid the region that is unsafe to overlap during
+	 * decompression (see calculations at top of misc.c).
+	 */
+	unsafe_len = (output_size >> 12) + 32768 + 18;
+	unsafe = (unsigned long)input + input_size - unsafe_len;
+	mem_avoid[0].start = unsafe;
+	mem_avoid[0].size = unsafe_len;
+
+	/* Avoid initrd. */
+	initrd_start  = (u64)real_mode->ext_ramdisk_image << 32;
+	initrd_start |= real_mode->hdr.ramdisk_image;
+	initrd_size  = (u64)real_mode->ext_ramdisk_size << 32;
+	initrd_size |= real_mode->hdr.ramdisk_size;
+	mem_avoid[1].start = initrd_start;
+	mem_avoid[1].size = initrd_size;
+
+	/* Avoid kernel command line. */
+	cmd_line  = (u64)real_mode->ext_cmd_line_ptr << 32;
+	cmd_line |= real_mode->hdr.cmd_line_ptr;
+	/* Calculate size of cmd_line. */
+	ptr = (char *)(unsigned long)cmd_line;
+	for (cmd_line_size = 0; ptr[cmd_line_size++]; )
+		;
+	mem_avoid[2].start = cmd_line;
+	mem_avoid[2].size = cmd_line_size;
+
+	/* Avoid heap memory. */
+	mem_avoid[3].start = (unsigned long)free_mem_ptr;
+	mem_avoid[3].size = BOOT_HEAP_SIZE;
+
+	/* Avoid stack memory. */
+	mem_avoid[4].start = (unsigned long)free_mem_end_ptr;
+	mem_avoid[4].size = BOOT_STACK_SIZE;
+}
+
+/* Does this memory vector overlap a known avoided area? */
+bool mem_avoid_overlap(struct mem_vector *img)
+{
+	int i;
+
+	for (i = 0; i < MEM_AVOID_MAX; i++) {
+		if (mem_overlaps(img, &mem_avoid[i]))
+			return true;
+	}
+
+	return false;
+}
+
+unsigned long slots[CONFIG_RANDOMIZE_BASE_MAX_OFFSET / CONFIG_PHYSICAL_ALIGN];
+unsigned long slot_max = 0;
+
+static void slots_append(unsigned long addr)
+{
+	/* Overflowing the slots list should be impossible. */
+	if (slot_max >= CONFIG_RANDOMIZE_BASE_MAX_OFFSET /
+			CONFIG_PHYSICAL_ALIGN)
+		return;
+
+	slots[slot_max++] = addr;
+}
+
+static unsigned long slots_fetch_random(void)
+{
+	/* Handle case of no slots stored. */
+	if (slot_max == 0)
+		return 0;
+
+	return slots[get_random_long() % slot_max];
+}
+
+static void process_e820_entry(struct e820entry *entry,
+			       unsigned long minimum,
+			       unsigned long image_size)
+{
+	struct mem_vector region, img;
+
+	/* Skip non-RAM entries. */
+	if (entry->type != E820_RAM)
+		return;
+
+	/* Ignore entries entirely above our maximum. */
+	if (entry->addr >= CONFIG_RANDOMIZE_BASE_MAX_OFFSET)
+		return;
+
+	/* Ignore entries entirely below our minimum. */
+	if (entry->addr + entry->size < minimum)
+		return;
+
+	region.start = entry->addr;
+	region.size = entry->size;
+
+	/* Potentially raise address to minimum location. */
+	if (region.start < minimum)
+		region.start = minimum;
+
+	/* Potentially raise address to meet alignment requirements. */
+	region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN);
+
+	/* Did we raise the address above the bounds of this e820 region? */
+	if (region.start > entry->addr + entry->size)
+		return;
+
+	/* Reduce size by any delta from the original address. */
+	region.size -= region.start - entry->addr;
+
+	/* Reduce maximum size to fit end of image within maximum limit. */
+	if (region.start + region.size > CONFIG_RANDOMIZE_BASE_MAX_OFFSET)
+		region.size = CONFIG_RANDOMIZE_BASE_MAX_OFFSET - region.start;
+
+	/* Walk each aligned slot and check for avoided areas. */
+	for (img.start = region.start, img.size = image_size ;
+	     mem_contains(&region, &img) ;
+	     img.start += CONFIG_PHYSICAL_ALIGN) {
+		if (mem_avoid_overlap(&img))
+			continue;
+		slots_append(img.start);
+	}
+}
+
+static unsigned long find_random_addr(unsigned long minimum,
+				      unsigned long size)
+{
+	int i;
+	unsigned long addr;
+
+	/* Make sure minimum is aligned. */
+	minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN);
+
+	/* Verify potential e820 positions, appending to slots list. */
+	for (i = 0; i < real_mode->e820_entries; i++) {
+		process_e820_entry(&real_mode->e820_map[i], minimum, size);
+	}
+
+	return slots_fetch_random();
+}
+
 unsigned char *choose_kernel_location(unsigned char *input,
 				      unsigned long input_size,
 				      unsigned char *output,
 				      unsigned long output_size)
 {
 	unsigned long choice = (unsigned long)output;
+	unsigned long random;
 
 	if (cmdline_find_option_bool("nokaslr")) {
 		debug_putstr("KASLR disabled...\n");
 		goto out;
 	}
 
-	/* XXX: choose random location. */
+	/* Record the various known unsafe memory ranges. */
+	mem_avoid_init((unsigned long)input, input_size,
+		       (unsigned long)output, output_size);
+
+	/* Walk e820 and find a random address. */
+	random = find_random_addr(choice, output_size);
+	if (!random) {
+		debug_putstr("KASLR could not find suitable E820 region...\n");
+		goto out;
+	}
+
+	/* Always enforce the minimum. */
+	if (random < choice)
+		goto out;
 
+	choice = random;
 out:
 	return (unsigned char *)choice;
 }
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 71387685dc16..196eaf373a06 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -112,14 +112,8 @@ struct boot_params *real_mode;		/* Pointer to real-mode data */
 void *memset(void *s, int c, size_t n);
 void *memcpy(void *dest, const void *src, size_t n);
 
-#ifdef CONFIG_X86_64
-#define memptr long
-#else
-#define memptr unsigned
-#endif
-
-static memptr free_mem_ptr;
-static memptr free_mem_end_ptr;
+memptr free_mem_ptr;
+memptr free_mem_end_ptr;
 
 static char *vidmem;
 static int vidport;
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 0782eb0b6e30..24e3e569a13c 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -23,7 +23,15 @@
 #define BOOT_BOOT_H
 #include "../ctype.h"
 
+#ifdef CONFIG_X86_64
+#define memptr long
+#else
+#define memptr unsigned
+#endif
+
 /* misc.c */
+extern memptr free_mem_ptr;
+extern memptr free_mem_end_ptr;
 extern struct boot_params *real_mode;		/* Pointer to real-mode data */
 void __putstr(const char *s);
 #define error_putstr(__x)  __putstr(__x)
-- 
cgit v1.2.3


From f32360ef6608434a032dc7ad262d45e9693c27f3 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 10 Oct 2013 17:18:17 -0700
Subject: x86, kaslr: Report kernel offset on panic

When the system panics, include the kernel offset in the report to assist
in debugging.

Signed-off-by: Kees Cook <keescook@chromium.org>
Link: http://lkml.kernel.org/r/1381450698-28710-6-git-send-email-keescook@chromium.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/kernel/setup.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index f0de6294b955..1708862fc40d 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -823,6 +823,20 @@ static void __init trim_low_memory_range(void)
 	memblock_reserve(0, ALIGN(reserve_low, PAGE_SIZE));
 }
 	
+/*
+ * Dump out kernel offset information on panic.
+ */
+static int
+dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p)
+{
+	pr_emerg("Kernel Offset: 0x%lx from 0x%lx "
+		 "(relocation range: 0x%lx-0x%lx)\n",
+		 (unsigned long)&_text - __START_KERNEL, __START_KERNEL,
+		 __START_KERNEL_map, MODULES_VADDR-1);
+
+	return 0;
+}
+
 /*
  * Determine if we were loaded by an EFI loader.  If so, then we have also been
  * passed the efi memmap, systab, etc., so we should use these data structures
@@ -1242,3 +1256,15 @@ void __init i386_reserve_resources(void)
 }
 
 #endif /* CONFIG_X86_32 */
+
+static struct notifier_block kernel_offset_notifier = {
+	.notifier_call = dump_kernel_offset
+};
+
+static int __init register_kernel_offset_dumper(void)
+{
+	atomic_notifier_chain_register(&panic_notifier_list,
+					&kernel_offset_notifier);
+	return 0;
+}
+__initcall(register_kernel_offset_dumper);
-- 
cgit v1.2.3


From 6145cfe394a7f138f6b64491c5663f97dba12450 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 10 Oct 2013 17:18:18 -0700
Subject: x86, kaslr: Raise the maximum virtual address to -1 GiB on x86_64

On 64-bit, this raises the maximum location to -1 GiB (from -1.5 GiB),
the upper limit currently, since the kernel fixmap page mappings need
to be moved to use the other 1 GiB (which would be the theoretical
limit when building with -mcmodel=kernel).

Signed-off-by: Kees Cook <keescook@chromium.org>
Link: http://lkml.kernel.org/r/1381450698-28710-7-git-send-email-keescook@chromium.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/Kconfig                        | 16 +++++++++++++---
 arch/x86/include/asm/page_64_types.h    | 15 ++++++++++++---
 arch/x86/include/asm/pgtable_64_types.h |  2 +-
 arch/x86/mm/init_32.c                   |  3 +++
 4 files changed, 29 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 992701d4d4f8..51f439953d23 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1746,13 +1746,23 @@ config RANDOMIZE_BASE
 config RANDOMIZE_BASE_MAX_OFFSET
 	hex "Maximum ASLR offset allowed"
 	depends on RANDOMIZE_BASE
-	default "0x10000000"
-	range 0x0 0x10000000
+	range 0x0 0x20000000 if X86_32
+	default "0x20000000" if X86_32
+	range 0x0 0x40000000 if X86_64
+	default "0x40000000" if X86_64
 	---help---
 	 Determines the maximal offset in bytes that will be applied to the
 	 kernel when Address Space Layout Randomization (ASLR) is active.
 	 Must be less than or equal to the actual physical memory on the
-	 system. This must be a power of two.
+	 system. This must be a multiple of CONFIG_PHYSICAL_ALIGN.
+
+	 On 32-bit this is limited to 512MiB.
+
+	 On 64-bit this is limited by how the kernel fixmap page table is
+	 positioned, so this cannot be larger that 1GiB currently. Normally
+	 there is a 512MiB to 1.5GiB split between kernel and modules. When
+	 this is raised above the 512MiB default, the modules area will
+	 shrink to compensate, up to the current maximum 1GiB to 1GiB split.
 
 # Relocation on x86 needs some additional build support
 config X86_NEED_RELOCS
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 43dcd804ebd5..8de6d9cf3b95 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -39,9 +39,18 @@
 #define __VIRTUAL_MASK_SHIFT	47
 
 /*
- * Kernel image size is limited to 512 MB (see level2_kernel_pgt in
- * arch/x86/kernel/head_64.S), and it is mapped here:
+ * Kernel image size is limited to 1GiB due to the fixmap living in the
+ * next 1GiB (see level2_kernel_pgt in arch/x86/kernel/head_64.S). Use
+ * 512MiB by default, leaving 1.5GiB for modules once the page tables
+ * are fully set up. If kernel ASLR is configured, it can extend the
+ * kernel page table mapping, reducing the size of the modules area.
  */
-#define KERNEL_IMAGE_SIZE	(512 * 1024 * 1024)
+#define KERNEL_IMAGE_SIZE_DEFAULT      (512 * 1024 * 1024)
+#if defined(CONFIG_RANDOMIZE_BASE) && \
+	CONFIG_RANDOMIZE_BASE_MAX_OFFSET > KERNEL_IMAGE_SIZE_DEFAULT
+#define KERNEL_IMAGE_SIZE   CONFIG_RANDOMIZE_BASE_MAX_OFFSET
+#else
+#define KERNEL_IMAGE_SIZE      KERNEL_IMAGE_SIZE_DEFAULT
+#endif
 
 #endif /* _ASM_X86_PAGE_64_DEFS_H */
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 2d883440cb9a..c883bf726398 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -58,7 +58,7 @@ typedef struct { pteval_t pte; } pte_t;
 #define VMALLOC_START    _AC(0xffffc90000000000, UL)
 #define VMALLOC_END      _AC(0xffffe8ffffffffff, UL)
 #define VMEMMAP_START	 _AC(0xffffea0000000000, UL)
-#define MODULES_VADDR    _AC(0xffffffffa0000000, UL)
+#define MODULES_VADDR    (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
 #define MODULES_END      _AC(0xffffffffff000000, UL)
 #define MODULES_LEN   (MODULES_END - MODULES_VADDR)
 
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 4287f1ffba7e..5bdc5430597c 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -806,6 +806,9 @@ void __init mem_init(void)
 	BUILD_BUG_ON(VMALLOC_START			>= VMALLOC_END);
 #undef high_memory
 #undef __FIXADDR_TOP
+#ifdef CONFIG_RANDOMIZE_BASE
+	BUILD_BUG_ON(CONFIG_RANDOMIZE_BASE_MAX_OFFSET > KERNEL_IMAGE_SIZE);
+#endif
 
 #ifdef CONFIG_HIGHMEM
 	BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE	> FIXADDR_START);
-- 
cgit v1.2.3


From 6e6a4932b0f569b1a5bb4fcbf5dde1b1a42f01bb Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Sun, 13 Oct 2013 04:08:56 -0700
Subject: x86, boot: Rename get_flags() and check_flags() to *_cpuflags()

When a function is used in more than one file it may not be possible
to immediately tell from context what the intended meaning is.  As
such, it is more important that the naming be self-evident.  Thus,
change get_flags() to get_cpuflags().

For consistency, change check_flags() to check_cpuflags() even though
it is only used in cpucheck.c.

Link: http://lkml.kernel.org/r/1381450698-28710-2-git-send-email-keescook@chromium.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/boot/compressed/cpuflags.c |  2 +-
 arch/x86/boot/cpucheck.c            | 14 +++++++-------
 arch/x86/boot/cpuflags.c            |  2 +-
 arch/x86/boot/cpuflags.h            |  2 +-
 4 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/boot/compressed/cpuflags.c b/arch/x86/boot/compressed/cpuflags.c
index 931cba6a4bb0..aa313466118b 100644
--- a/arch/x86/boot/compressed/cpuflags.c
+++ b/arch/x86/boot/compressed/cpuflags.c
@@ -4,7 +4,7 @@
 
 bool has_cpuflag(int flag)
 {
-	get_flags();
+	get_cpuflags();
 
 	return test_bit(flag, cpu.flags);
 }
diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c
index e1f3c166a512..100a9a10076a 100644
--- a/arch/x86/boot/cpucheck.c
+++ b/arch/x86/boot/cpucheck.c
@@ -68,7 +68,7 @@ static int is_transmeta(void)
 }
 
 /* Returns a bitmask of which words we have error bits in */
-static int check_flags(void)
+static int check_cpuflags(void)
 {
 	u32 err;
 	int i;
@@ -101,8 +101,8 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr)
 	if (has_eflag(X86_EFLAGS_AC))
 		cpu.level = 4;
 
-	get_flags();
-	err = check_flags();
+	get_cpuflags();
+	err = check_cpuflags();
 
 	if (test_bit(X86_FEATURE_LM, cpu.flags))
 		cpu.level = 64;
@@ -121,8 +121,8 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr)
 		eax &= ~(1 << 15);
 		asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx));
 
-		get_flags();	/* Make sure it really did something */
-		err = check_flags();
+		get_cpuflags();	/* Make sure it really did something */
+		err = check_cpuflags();
 	} else if (err == 0x01 &&
 		   !(err_flags[0] & ~(1 << X86_FEATURE_CX8)) &&
 		   is_centaur() && cpu.model >= 6) {
@@ -137,7 +137,7 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr)
 		asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx));
 
 		set_bit(X86_FEATURE_CX8, cpu.flags);
-		err = check_flags();
+		err = check_cpuflags();
 	} else if (err == 0x01 && is_transmeta()) {
 		/* Transmeta might have masked feature bits in word 0 */
 
@@ -152,7 +152,7 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr)
 		    : : "ecx", "ebx");
 		asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx));
 
-		err = check_flags();
+		err = check_cpuflags();
 	}
 
 	if (err_flags_ptr)
diff --git a/arch/x86/boot/cpuflags.c b/arch/x86/boot/cpuflags.c
index b02544a2bce0..a9fcb7cfb241 100644
--- a/arch/x86/boot/cpuflags.c
+++ b/arch/x86/boot/cpuflags.c
@@ -65,7 +65,7 @@ static inline void cpuid(u32 id, u32 *a, u32 *b, u32 *c, u32 *d)
 	);
 }
 
-void get_flags(void)
+void get_cpuflags(void)
 {
 	u32 max_intel_level, max_amd_level;
 	u32 tfms;
diff --git a/arch/x86/boot/cpuflags.h b/arch/x86/boot/cpuflags.h
index 9bb4e25f7317..ea97697e51e4 100644
--- a/arch/x86/boot/cpuflags.h
+++ b/arch/x86/boot/cpuflags.h
@@ -14,6 +14,6 @@ extern struct cpu_features cpu;
 extern u32 cpu_vendor[3];
 
 int has_eflag(unsigned long mask);
-void get_flags(void);
+void get_cpuflags(void);
 
 #endif
-- 
cgit v1.2.3


From aec58bafaf89279522c44ec8ca9211eabb2b6976 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 15 Oct 2013 23:43:14 -0700
Subject: x86/relocs: Add percpu fixup for GNU ld 2.23

The GNU linker tries to put __per_cpu_load into the percpu area,
resulting in a lack of its relocation. Force this symbol to be
relocated. Seen starting with GNU ld 2.23 and later.

Reported-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Michael Davidson <md@google.com>
Cc: Cong Ding <dinggnu@gmail.com>
Link: http://lkml.kernel.org/r/20131016064314.GA2739@www.outflux.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/tools/relocs.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index 71a2533c90d3..11f9285a2ff6 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -729,6 +729,7 @@ static void percpu_init(void)
  *
  * The GNU linker incorrectly associates:
  *	__init_begin
+ *	__per_cpu_load
  *
  * The "gold" linker incorrectly associates:
  *	init_per_cpu__irq_stack_union
@@ -738,6 +739,7 @@ static int is_percpu_sym(ElfW(Sym) *sym, const char *symname)
 {
 	return (sym->st_shndx == per_cpu_shndx) &&
 		strcmp(symname, "__init_begin") &&
+		strcmp(symname, "__per_cpu_load") &&
 		strncmp(symname, "init_per_cpu_", 13);
 }
 
-- 
cgit v1.2.3


From f4fccac05f7f6bacb8e481a84d175e85ffcf9fe2 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Thu, 31 Oct 2013 17:24:59 +0100
Subject: x86/efi: Simplify EFI_DEBUG

... and lose one #ifdef .. #endif sandwich.

Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/x86/platform/efi/efi.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 1d3372ac9c66..f396163b0402 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -51,7 +51,7 @@
 #include <asm/x86_init.h>
 #include <asm/rtc.h>
 
-#define EFI_DEBUG	1
+#define EFI_DEBUG
 
 #define EFI_MIN_RESERVE 5120
 
@@ -398,9 +398,9 @@ int __init efi_memblock_x86_reserve_range(void)
 	return 0;
 }
 
-#if EFI_DEBUG
 static void __init print_efi_memmap(void)
 {
+#ifdef EFI_DEBUG
 	efi_memory_desc_t *md;
 	void *p;
 	int i;
@@ -415,8 +415,8 @@ static void __init print_efi_memmap(void)
 			md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
 			(md->num_pages >> (20 - EFI_PAGE_SHIFT)));
 	}
-}
 #endif  /*  EFI_DEBUG  */
+}
 
 void __init efi_reserve_boot_services(void)
 {
@@ -696,10 +696,7 @@ void __init efi_init(void)
 		x86_platform.set_wallclock = efi_set_rtc_mmss;
 	}
 #endif
-
-#if EFI_DEBUG
 	print_efi_memmap();
-#endif
 }
 
 void __init efi_late_init(void)
-- 
cgit v1.2.3


From 0fd64c23fdf556e9e68580cff03b3505797bbf53 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Thu, 31 Oct 2013 17:25:00 +0100
Subject: x86/mm/pageattr: Lookup address in an arbitrary PGD

This is preparatory work in order to be able to map pages into a
specified PGD and not implicitly and only into init_mm.

Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/x86/mm/pageattr.c | 36 ++++++++++++++++++++++++++----------
 1 file changed, 26 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index bb32480c2d71..c53de62a1170 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -30,6 +30,7 @@
  */
 struct cpa_data {
 	unsigned long	*vaddr;
+	pgd_t		*pgd;
 	pgprot_t	mask_set;
 	pgprot_t	mask_clr;
 	int		numpages;
@@ -322,17 +323,9 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
 	return prot;
 }
 
-/*
- * Lookup the page table entry for a virtual address. Return a pointer
- * to the entry and the level of the mapping.
- *
- * Note: We return pud and pmd either when the entry is marked large
- * or when the present bit is not set. Otherwise we would return a
- * pointer to a nonexisting mapping.
- */
-pte_t *lookup_address(unsigned long address, unsigned int *level)
+static pte_t *__lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
+				      unsigned int *level)
 {
-	pgd_t *pgd = pgd_offset_k(address);
 	pud_t *pud;
 	pmd_t *pmd;
 
@@ -361,8 +354,31 @@ pte_t *lookup_address(unsigned long address, unsigned int *level)
 
 	return pte_offset_kernel(pmd, address);
 }
+
+/*
+ * Lookup the page table entry for a virtual address. Return a pointer
+ * to the entry and the level of the mapping.
+ *
+ * Note: We return pud and pmd either when the entry is marked large
+ * or when the present bit is not set. Otherwise we would return a
+ * pointer to a nonexisting mapping.
+ */
+pte_t *lookup_address(unsigned long address, unsigned int *level)
+{
+        return __lookup_address_in_pgd(pgd_offset_k(address), address, level);
+}
 EXPORT_SYMBOL_GPL(lookup_address);
 
+static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address,
+				  unsigned int *level)
+{
+        if (cpa->pgd)
+		return __lookup_address_in_pgd(cpa->pgd + pgd_index(address),
+					       address, level);
+
+        return lookup_address(address, level);
+}
+
 /*
  * This is necessary because __pa() does not work on some
  * kinds of memory, like vmalloc() or the alloc_remap()
-- 
cgit v1.2.3


From f3f729661e8db476ac427a97de015307aebb7404 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Thu, 31 Oct 2013 17:25:01 +0100
Subject: x86/mm/pageattr: Add a PGD pagetable populating function

This allocates, if necessary, and populates the corresponding PGD entry
with a PUD page. The next population level is a dummy macro which will
be removed by the next patch and it is added here to keep the patch
small and easily reviewable but not break bisection, at the same time.

Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/x86/mm/pageattr.c | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index c53de62a1170..4b47ae0602e1 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -666,6 +666,45 @@ static int split_large_page(pte_t *kpte, unsigned long address)
 	return 0;
 }
 
+#define populate_pud(cpa, addr, pgd, pgprot)	(-1)
+
+/*
+ * Restrictions for kernel page table do not necessarily apply when mapping in
+ * an alternate PGD.
+ */
+static int populate_pgd(struct cpa_data *cpa, unsigned long addr)
+{
+	pgprot_t pgprot = __pgprot(_KERNPG_TABLE);
+	bool allocd_pgd = false;
+	pgd_t *pgd_entry;
+	pud_t *pud = NULL;	/* shut up gcc */
+	int ret;
+
+	pgd_entry = cpa->pgd + pgd_index(addr);
+
+	/*
+	 * Allocate a PUD page and hand it down for mapping.
+	 */
+	if (pgd_none(*pgd_entry)) {
+		pud = (pud_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
+		if (!pud)
+			return -1;
+
+		set_pgd(pgd_entry, __pgd(__pa(pud) | _KERNPG_TABLE));
+		allocd_pgd = true;
+	}
+
+	pgprot_val(pgprot) &= ~pgprot_val(cpa->mask_clr);
+	pgprot_val(pgprot) |=  pgprot_val(cpa->mask_set);
+
+	ret = populate_pud(cpa, addr, pgd_entry, pgprot);
+	if (ret < 0)
+		return ret;
+
+	cpa->numpages = ret;
+	return 0;
+}
+
 static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr,
 			       int primary)
 {
-- 
cgit v1.2.3


From 4b23538d88c87d9c693ad87c8c808e92a505a6e6 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Thu, 31 Oct 2013 17:25:02 +0100
Subject: x86/mm/pageattr: Add a PUD pagetable populating function

Add the next level of the pagetable populating function, we handle
chunks around a 1G boundary by mapping them with the lower level
functions - otherwise we use 1G pages for the mappings, thus using as
less amount of pagetable pages as possible.

Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/x86/mm/pageattr.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 86 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 4b47ae0602e1..81deca77b871 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -666,7 +666,92 @@ static int split_large_page(pte_t *kpte, unsigned long address)
 	return 0;
 }
 
-#define populate_pud(cpa, addr, pgd, pgprot)	(-1)
+static int alloc_pmd_page(pud_t *pud)
+{
+	pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
+	if (!pmd)
+		return -1;
+
+	set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
+	return 0;
+}
+
+#define populate_pmd(cpa, start, end, pages, pud, pgprot)	(-1)
+
+static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd,
+			pgprot_t pgprot)
+{
+	pud_t *pud;
+	unsigned long end;
+	int cur_pages = 0;
+
+	end = start + (cpa->numpages << PAGE_SHIFT);
+
+	/*
+	 * Not on a Gb page boundary? => map everything up to it with
+	 * smaller pages.
+	 */
+	if (start & (PUD_SIZE - 1)) {
+		unsigned long pre_end;
+		unsigned long next_page = (start + PUD_SIZE) & PUD_MASK;
+
+		pre_end   = min_t(unsigned long, end, next_page);
+		cur_pages = (pre_end - start) >> PAGE_SHIFT;
+		cur_pages = min_t(int, (int)cpa->numpages, cur_pages);
+
+		pud = pud_offset(pgd, start);
+
+		/*
+		 * Need a PMD page?
+		 */
+		if (pud_none(*pud))
+			if (alloc_pmd_page(pud))
+				return -1;
+
+		cur_pages = populate_pmd(cpa, start, pre_end, cur_pages,
+					 pud, pgprot);
+		if (cur_pages < 0)
+			return cur_pages;
+
+		start = pre_end;
+	}
+
+	/* We mapped them all? */
+	if (cpa->numpages == cur_pages)
+		return cur_pages;
+
+	pud = pud_offset(pgd, start);
+
+	/*
+	 * Map everything starting from the Gb boundary, possibly with 1G pages
+	 */
+	while (end - start >= PUD_SIZE) {
+		set_pud(pud, __pud(cpa->pfn | _PAGE_PSE | massage_pgprot(pgprot)));
+
+		start	  += PUD_SIZE;
+		cpa->pfn  += PUD_SIZE;
+		cur_pages += PUD_SIZE >> PAGE_SHIFT;
+		pud++;
+	}
+
+	/* Map trailing leftover */
+	if (start < end) {
+		int tmp;
+
+		pud = pud_offset(pgd, start);
+		if (pud_none(*pud))
+			if (alloc_pmd_page(pud))
+				return -1;
+
+		tmp = populate_pmd(cpa, start, end, cpa->numpages - cur_pages,
+				   pud, pgprot);
+		if (tmp < 0)
+			return cur_pages;
+
+		cur_pages += tmp;
+	}
+	return cur_pages;
+}
 
 /*
  * Restrictions for kernel page table do not necessarily apply when mapping in
-- 
cgit v1.2.3


From f900a4b8ab0f462d89a9fcb6173cac1403415b16 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Thu, 31 Oct 2013 17:25:03 +0100
Subject: x86/mm/pageattr: Add a PMD pagetable populating function

Handle PMD-level mappings the same as PUD ones.

Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/x86/mm/pageattr.c | 82 +++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 81 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 81deca77b871..968398b023c0 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -666,6 +666,16 @@ static int split_large_page(pte_t *kpte, unsigned long address)
 	return 0;
 }
 
+static int alloc_pte_page(pmd_t *pmd)
+{
+	pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
+	if (!pte)
+		return -1;
+
+	set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
+	return 0;
+}
+
 static int alloc_pmd_page(pud_t *pud)
 {
 	pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
@@ -676,7 +686,77 @@ static int alloc_pmd_page(pud_t *pud)
 	return 0;
 }
 
-#define populate_pmd(cpa, start, end, pages, pud, pgprot)	(-1)
+#define populate_pte(cpa, start, end, pages, pmd, pgprot)	do {} while (0)
+
+static int populate_pmd(struct cpa_data *cpa,
+			unsigned long start, unsigned long end,
+			unsigned num_pages, pud_t *pud, pgprot_t pgprot)
+{
+	unsigned int cur_pages = 0;
+	pmd_t *pmd;
+
+	/*
+	 * Not on a 2M boundary?
+	 */
+	if (start & (PMD_SIZE - 1)) {
+		unsigned long pre_end = start + (num_pages << PAGE_SHIFT);
+		unsigned long next_page = (start + PMD_SIZE) & PMD_MASK;
+
+		pre_end   = min_t(unsigned long, pre_end, next_page);
+		cur_pages = (pre_end - start) >> PAGE_SHIFT;
+		cur_pages = min_t(unsigned int, num_pages, cur_pages);
+
+		/*
+		 * Need a PTE page?
+		 */
+		pmd = pmd_offset(pud, start);
+		if (pmd_none(*pmd))
+			if (alloc_pte_page(pmd))
+				return -1;
+
+		populate_pte(cpa, start, pre_end, cur_pages, pmd, pgprot);
+
+		start = pre_end;
+	}
+
+	/*
+	 * We mapped them all?
+	 */
+	if (num_pages == cur_pages)
+		return cur_pages;
+
+	while (end - start >= PMD_SIZE) {
+
+		/*
+		 * We cannot use a 1G page so allocate a PMD page if needed.
+		 */
+		if (pud_none(*pud))
+			if (alloc_pmd_page(pud))
+				return -1;
+
+		pmd = pmd_offset(pud, start);
+
+		set_pmd(pmd, __pmd(cpa->pfn | _PAGE_PSE | massage_pgprot(pgprot)));
+
+		start	  += PMD_SIZE;
+		cpa->pfn  += PMD_SIZE;
+		cur_pages += PMD_SIZE >> PAGE_SHIFT;
+	}
+
+	/*
+	 * Map trailing 4K pages.
+	 */
+	if (start < end) {
+		pmd = pmd_offset(pud, start);
+		if (pmd_none(*pmd))
+			if (alloc_pte_page(pmd))
+				return -1;
+
+		populate_pte(cpa, start, end, num_pages - cur_pages,
+			     pmd, pgprot);
+	}
+	return num_pages;
+}
 
 static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd,
 			pgprot_t pgprot)
-- 
cgit v1.2.3


From c6b6f363f7b24aa448994e3a65c4d5b3116acfcc Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Thu, 31 Oct 2013 17:25:04 +0100
Subject: x86/mm/pageattr: Add a PTE pagetable populating function

Handle last level by unconditionally writing the PTEs into the PTE page
while paying attention to the NX bit.

Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/x86/mm/pageattr.c | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 968398b023c0..2a1308a8c072 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -686,7 +686,27 @@ static int alloc_pmd_page(pud_t *pud)
 	return 0;
 }
 
-#define populate_pte(cpa, start, end, pages, pmd, pgprot)	do {} while (0)
+static void populate_pte(struct cpa_data *cpa,
+			 unsigned long start, unsigned long end,
+			 unsigned num_pages, pmd_t *pmd, pgprot_t pgprot)
+{
+	pte_t *pte;
+
+	pte = pte_offset_kernel(pmd, start);
+
+	while (num_pages-- && start < end) {
+
+		/* deal with the NX bit */
+		if (!(pgprot_val(pgprot) & _PAGE_NX))
+			cpa->pfn &= ~_PAGE_NX;
+
+		set_pte(pte, pfn_pte(cpa->pfn >> PAGE_SHIFT, pgprot));
+
+		start	 += PAGE_SIZE;
+		cpa->pfn += PAGE_SIZE;
+		pte++;
+	}
+}
 
 static int populate_pmd(struct cpa_data *cpa,
 			unsigned long start, unsigned long end,
-- 
cgit v1.2.3


From 0bb8aeee7b73b21e09d3ea12f2120d974f70b669 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Thu, 31 Oct 2013 17:25:05 +0100
Subject: x86/mm/pageattr: Add a PUD error unwinding path

In case we encounter an error during the mapping of a region, we want to
unwind what we've established so far exactly the way we did the mapping.
This is the PUD part kept deliberately small for easier review.

Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/x86/mm/pageattr.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 58 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 2a1308a8c072..1cbdbbc35b47 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -666,6 +666,51 @@ static int split_large_page(pte_t *kpte, unsigned long address)
 	return 0;
 }
 
+#define unmap_pmd_range(pud, start, pre_end)		do {} while (0)
+
+static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
+{
+	pud_t *pud = pud_offset(pgd, start);
+
+	/*
+	 * Not on a GB page boundary?
+	 */
+	if (start & (PUD_SIZE - 1)) {
+		unsigned long next_page = (start + PUD_SIZE) & PUD_MASK;
+		unsigned long pre_end	= min_t(unsigned long, end, next_page);
+
+		unmap_pmd_range(pud, start, pre_end);
+
+		start = pre_end;
+		pud++;
+	}
+
+	/*
+	 * Try to unmap in 1G chunks?
+	 */
+	while (end - start >= PUD_SIZE) {
+
+		if (pud_large(*pud))
+			pud_clear(pud);
+		else
+			unmap_pmd_range(pud, start, start + PUD_SIZE);
+
+		start += PUD_SIZE;
+		pud++;
+	}
+
+	/*
+	 * 2M leftovers?
+	 */
+	if (start < end)
+		unmap_pmd_range(pud, start, end);
+
+	/*
+	 * No need to try to free the PUD page because we'll free it in
+	 * populate_pgd's error path
+	 */
+}
+
 static int alloc_pte_page(pmd_t *pmd)
 {
 	pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
@@ -883,9 +928,20 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr)
 	pgprot_val(pgprot) |=  pgprot_val(cpa->mask_set);
 
 	ret = populate_pud(cpa, addr, pgd_entry, pgprot);
-	if (ret < 0)
-		return ret;
+	if (ret < 0) {
+		unmap_pud_range(pgd_entry, addr,
+				addr + (cpa->numpages << PAGE_SHIFT));
 
+		if (allocd_pgd) {
+			/*
+			 * If I allocated this PUD page, I can just as well
+			 * free it in this error path.
+			 */
+			pgd_clear(pgd_entry);
+			free_page((unsigned long)pud);
+		}
+		return ret;
+	}
 	cpa->numpages = ret;
 	return 0;
 }
-- 
cgit v1.2.3


From 52a628fb454d13d944bb3c8a89a314cc3affa417 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Thu, 31 Oct 2013 17:25:06 +0100
Subject: x86/mm/pageattr: Add last levels of error path

We try to free the pagetable pages once we've unmapped our portion.

Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/x86/mm/pageattr.c | 94 +++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 93 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 1cbdbbc35b47..db8ace29514f 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -666,7 +666,99 @@ static int split_large_page(pte_t *kpte, unsigned long address)
 	return 0;
 }
 
-#define unmap_pmd_range(pud, start, pre_end)		do {} while (0)
+static bool try_to_free_pte_page(pte_t *pte)
+{
+	int i;
+
+	for (i = 0; i < PTRS_PER_PTE; i++)
+		if (!pte_none(pte[i]))
+			return false;
+
+	free_page((unsigned long)pte);
+	return true;
+}
+
+static bool try_to_free_pmd_page(pmd_t *pmd)
+{
+	int i;
+
+	for (i = 0; i < PTRS_PER_PMD; i++)
+		if (!pmd_none(pmd[i]))
+			return false;
+
+	free_page((unsigned long)pmd);
+	return true;
+}
+
+static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end)
+{
+	pte_t *pte = pte_offset_kernel(pmd, start);
+
+	while (start < end) {
+		set_pte(pte, __pte(0));
+
+		start += PAGE_SIZE;
+		pte++;
+	}
+
+	if (try_to_free_pte_page((pte_t *)pmd_page_vaddr(*pmd))) {
+		pmd_clear(pmd);
+		return true;
+	}
+	return false;
+}
+
+static void __unmap_pmd_range(pud_t *pud, pmd_t *pmd,
+			      unsigned long start, unsigned long end)
+{
+	if (unmap_pte_range(pmd, start, end))
+		if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud)))
+			pud_clear(pud);
+}
+
+static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end)
+{
+	pmd_t *pmd = pmd_offset(pud, start);
+
+	/*
+	 * Not on a 2MB page boundary?
+	 */
+	if (start & (PMD_SIZE - 1)) {
+		unsigned long next_page = (start + PMD_SIZE) & PMD_MASK;
+		unsigned long pre_end = min_t(unsigned long, end, next_page);
+
+		__unmap_pmd_range(pud, pmd, start, pre_end);
+
+		start = pre_end;
+		pmd++;
+	}
+
+	/*
+	 * Try to unmap in 2M chunks.
+	 */
+	while (end - start >= PMD_SIZE) {
+		if (pmd_large(*pmd))
+			pmd_clear(pmd);
+		else
+			__unmap_pmd_range(pud, pmd, start, start + PMD_SIZE);
+
+		start += PMD_SIZE;
+		pmd++;
+	}
+
+	/*
+	 * 4K leftovers?
+	 */
+	if (start < end)
+		return __unmap_pmd_range(pud, pmd, start, end);
+
+	/*
+	 * Try again to free the PMD page if haven't succeeded above.
+	 */
+	if (!pud_none(*pud))
+		if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud)))
+			pud_clear(pud);
+}
 
 static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
 {
-- 
cgit v1.2.3


From 82f0712ca0f947170e785300b5c39d9c25e2f6ff Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Thu, 31 Oct 2013 17:25:07 +0100
Subject: x86/mm/cpa: Map in an arbitrary pgd

Add the ability to map pages in an arbitrary pgd. This wires in the
remaining stuff so that there's a new interface with which you can map a
region into an arbitrary PGD.

Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/x86/mm/pageattr.c | 53 +++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 46 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index db8ace29514f..b3b19f46c016 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -453,7 +453,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
 	 * Check for races, another CPU might have split this page
 	 * up already:
 	 */
-	tmp = lookup_address(address, &level);
+	tmp = _lookup_address_cpa(cpa, address, &level);
 	if (tmp != kpte)
 		goto out_unlock;
 
@@ -559,7 +559,8 @@ out_unlock:
 }
 
 static int
-__split_large_page(pte_t *kpte, unsigned long address, struct page *base)
+__split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
+		   struct page *base)
 {
 	pte_t *pbase = (pte_t *)page_address(base);
 	unsigned long pfn, pfninc = 1;
@@ -572,7 +573,7 @@ __split_large_page(pte_t *kpte, unsigned long address, struct page *base)
 	 * Check for races, another CPU might have split this page
 	 * up for us already:
 	 */
-	tmp = lookup_address(address, &level);
+	tmp = _lookup_address_cpa(cpa, address, &level);
 	if (tmp != kpte) {
 		spin_unlock(&pgd_lock);
 		return 1;
@@ -648,7 +649,8 @@ __split_large_page(pte_t *kpte, unsigned long address, struct page *base)
 	return 0;
 }
 
-static int split_large_page(pte_t *kpte, unsigned long address)
+static int split_large_page(struct cpa_data *cpa, pte_t *kpte,
+			    unsigned long address)
 {
 	struct page *base;
 
@@ -660,7 +662,7 @@ static int split_large_page(pte_t *kpte, unsigned long address)
 	if (!base)
 		return -ENOMEM;
 
-	if (__split_large_page(kpte, address, base))
+	if (__split_large_page(cpa, kpte, address, base))
 		__free_page(base);
 
 	return 0;
@@ -1041,6 +1043,9 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr)
 static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr,
 			       int primary)
 {
+	if (cpa->pgd)
+		return populate_pgd(cpa, vaddr);
+
 	/*
 	 * Ignore all non primary paths.
 	 */
@@ -1085,7 +1090,7 @@ static int __change_page_attr(struct cpa_data *cpa, int primary)
 	else
 		address = *cpa->vaddr;
 repeat:
-	kpte = lookup_address(address, &level);
+	kpte = _lookup_address_cpa(cpa, address, &level);
 	if (!kpte)
 		return __cpa_process_fault(cpa, address, primary);
 
@@ -1149,7 +1154,7 @@ repeat:
 	/*
 	 * We have to split the large page:
 	 */
-	err = split_large_page(kpte, address);
+	err = split_large_page(cpa, kpte, address);
 	if (!err) {
 		/*
 	 	 * Do a global flush tlb after splitting the large page
@@ -1298,6 +1303,8 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
 	int ret, cache, checkalias;
 	unsigned long baddr = 0;
 
+	memset(&cpa, 0, sizeof(cpa));
+
 	/*
 	 * Check, if we are requested to change a not supported
 	 * feature:
@@ -1744,6 +1751,7 @@ static int __set_pages_p(struct page *page, int numpages)
 {
 	unsigned long tempaddr = (unsigned long) page_address(page);
 	struct cpa_data cpa = { .vaddr = &tempaddr,
+				.pgd = NULL,
 				.numpages = numpages,
 				.mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW),
 				.mask_clr = __pgprot(0),
@@ -1762,6 +1770,7 @@ static int __set_pages_np(struct page *page, int numpages)
 {
 	unsigned long tempaddr = (unsigned long) page_address(page);
 	struct cpa_data cpa = { .vaddr = &tempaddr,
+				.pgd = NULL,
 				.numpages = numpages,
 				.mask_set = __pgprot(0),
 				.mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW),
@@ -1822,6 +1831,36 @@ bool kernel_page_present(struct page *page)
 
 #endif /* CONFIG_DEBUG_PAGEALLOC */
 
+int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
+			    unsigned numpages, unsigned long page_flags)
+{
+	int retval = -EINVAL;
+
+	struct cpa_data cpa = {
+		.vaddr = &address,
+		.pfn = pfn,
+		.pgd = pgd,
+		.numpages = numpages,
+		.mask_set = __pgprot(0),
+		.mask_clr = __pgprot(0),
+		.flags = 0,
+	};
+
+	if (!(__supported_pte_mask & _PAGE_NX))
+		goto out;
+
+	if (!(page_flags & _PAGE_NX))
+		cpa.mask_clr = __pgprot(_PAGE_NX);
+
+	cpa.mask_set = __pgprot(_PAGE_PRESENT | page_flags);
+
+	retval = __change_page_attr_set_clr(&cpa, 0);
+	__flush_tlb_all();
+
+out:
+	return retval;
+}
+
 /*
  * The testcases use internal knowledge of the implementation that shouldn't
  * be exposed to the rest of the kernel. Include these directly here.
-- 
cgit v1.2.3


From d2f7cbe7b26a74dbbbf8f325b2a6fd01bc34032c Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Thu, 31 Oct 2013 17:25:08 +0100
Subject: x86/efi: Runtime services virtual mapping

We map the EFI regions needed for runtime services non-contiguously,
with preserved alignment on virtual addresses starting from -4G down
for a total max space of 64G. This way, we provide for stable runtime
services addresses across kernels so that a kexec'd kernel can still use
them.

Thus, they're mapped in a separate pagetable so that we don't pollute
the kernel namespace.

Add an efi= kernel command line parameter for passing miscellaneous
options and chicken bits from the command line.

While at it, add a chicken bit called "efi=old_map" which can be used as
a fallback to the old runtime services mapping method in case there's
some b0rkage with a particular EFI implementation (haha, it is hard to
hold up the sarcasm here...).

Also, add the UEFI RT VA space to Documentation/x86/x86_64/mm.txt.

Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 Documentation/kernel-parameters.txt  |   6 ++
 Documentation/x86/x86_64/mm.txt      |   7 +++
 arch/x86/include/asm/efi.h           |  64 ++++++++++++++------
 arch/x86/include/asm/pgtable_types.h |   3 +-
 arch/x86/platform/efi/efi.c          |  94 +++++++++++++++++++++---------
 arch/x86/platform/efi/efi_32.c       |   9 ++-
 arch/x86/platform/efi/efi_64.c       | 109 +++++++++++++++++++++++++++++++++++
 arch/x86/platform/efi/efi_stub_64.S  |  54 +++++++++++++++++
 include/linux/efi.h                  |   1 +
 9 files changed, 300 insertions(+), 47 deletions(-)

(limited to 'arch')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 7a0f202d482e..ed43e92b0e7e 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -835,6 +835,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 	edd=		[EDD]
 			Format: {"off" | "on" | "skip[mbr]"}
 
+	efi=		[EFI]
+			Format: { "old_map" }
+			old_map [X86-64]: switch to the old ioremap-based EFI
+			runtime services mapping. 32-bit still uses this one by
+			default.
+
 	efi_no_storage_paranoia [EFI; X86]
 			Using this parameter you can use more than 50% of
 			your efi variable storage. Use this parameter only if
diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
index 881582f75c9c..c584a51add15 100644
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -28,4 +28,11 @@ reference.
 Current X86-64 implementations only support 40 bits of address space,
 but we support up to 46 bits. This expands into MBZ space in the page tables.
 
+->trampoline_pgd:
+
+We map EFI runtime services in the aforementioned PGD in the virtual
+range of 64Gb (arbitrarily set, can be raised if needed)
+
+0xffffffef00000000 - 0xffffffff00000000
+
 -Andi Kleen, Jul 2004
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 65c6e6e3a552..89a05b0507b9 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -1,6 +1,24 @@
 #ifndef _ASM_X86_EFI_H
 #define _ASM_X86_EFI_H
 
+/*
+ * We map the EFI regions needed for runtime services non-contiguously,
+ * with preserved alignment on virtual addresses starting from -4G down
+ * for a total max space of 64G. This way, we provide for stable runtime
+ * services addresses across kernels so that a kexec'd kernel can still
+ * use them.
+ *
+ * This is the main reason why we're doing stable VA mappings for RT
+ * services.
+ *
+ * This flag is used in conjuction with a chicken bit called
+ * "efi=old_map" which can be used as a fallback to the old runtime
+ * services mapping method in case there's some b0rkage with a
+ * particular EFI implementation (haha, it is hard to hold up the
+ * sarcasm here...).
+ */
+#define EFI_OLD_MEMMAP		EFI_ARCH_1
+
 #ifdef CONFIG_X86_32
 
 #define EFI_LOADER_SIGNATURE	"EL32"
@@ -69,24 +87,31 @@ extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3,
 	efi_call6((f), (u64)(a1), (u64)(a2), (u64)(a3),		\
 		  (u64)(a4), (u64)(a5), (u64)(a6))
 
+#define _efi_call_virtX(x, f, ...)					\
+({									\
+	efi_status_t __s;						\
+									\
+	efi_sync_low_kernel_mappings();					\
+	preempt_disable();						\
+	__s = efi_call##x((void *)efi.systab->runtime->f, __VA_ARGS__);	\
+	preempt_enable();						\
+	__s;								\
+})
+
 #define efi_call_virt0(f)				\
-	efi_call0((efi.systab->runtime->f))
-#define efi_call_virt1(f, a1)					\
-	efi_call1((efi.systab->runtime->f), (u64)(a1))
-#define efi_call_virt2(f, a1, a2)					\
-	efi_call2((efi.systab->runtime->f), (u64)(a1), (u64)(a2))
-#define efi_call_virt3(f, a1, a2, a3)					\
-	efi_call3((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
-		  (u64)(a3))
-#define efi_call_virt4(f, a1, a2, a3, a4)				\
-	efi_call4((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
-		  (u64)(a3), (u64)(a4))
-#define efi_call_virt5(f, a1, a2, a3, a4, a5)				\
-	efi_call5((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
-		  (u64)(a3), (u64)(a4), (u64)(a5))
-#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6)			\
-	efi_call6((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
-		  (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6))
+	_efi_call_virtX(0, f)
+#define efi_call_virt1(f, a1)				\
+	_efi_call_virtX(1, f, (u64)(a1))
+#define efi_call_virt2(f, a1, a2)			\
+	_efi_call_virtX(2, f, (u64)(a1), (u64)(a2))
+#define efi_call_virt3(f, a1, a2, a3)			\
+	_efi_call_virtX(3, f, (u64)(a1), (u64)(a2), (u64)(a3))
+#define efi_call_virt4(f, a1, a2, a3, a4)		\
+	_efi_call_virtX(4, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4))
+#define efi_call_virt5(f, a1, a2, a3, a4, a5)		\
+	_efi_call_virtX(5, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4), (u64)(a5))
+#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6)	\
+	_efi_call_virtX(6, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6))
 
 extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
 				 u32 type, u64 attribute);
@@ -95,12 +120,17 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
 
 extern int add_efi_memmap;
 extern unsigned long x86_efi_facility;
+extern struct efi_scratch efi_scratch;
 extern void efi_set_executable(efi_memory_desc_t *md, bool executable);
 extern int efi_memblock_x86_reserve_range(void);
 extern void efi_call_phys_prelog(void);
 extern void efi_call_phys_epilog(void);
 extern void efi_unmap_memmap(void);
 extern void efi_memory_uc(u64 addr, unsigned long size);
+extern void __init efi_map_region(efi_memory_desc_t *md);
+extern void efi_sync_low_kernel_mappings(void);
+extern void efi_setup_page_tables(void);
+extern void __init old_map_region(efi_memory_desc_t *md);
 
 #ifdef CONFIG_EFI
 
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index f4843e031131..028e28b6fc2c 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -379,7 +379,8 @@ static inline void update_page_count(int level, unsigned long pages) { }
  */
 extern pte_t *lookup_address(unsigned long address, unsigned int *level);
 extern phys_addr_t slow_virt_to_phys(void *__address);
-
+extern int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
+				   unsigned numpages, unsigned long page_flags);
 #endif	/* !__ASSEMBLY__ */
 
 #endif /* _ASM_X86_PGTABLE_DEFS_H */
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index f396163b0402..b453069236fd 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -12,6 +12,8 @@
  *	Bibo Mao <bibo.mao@intel.com>
  *	Chandramouli Narayanan <mouli@linux.intel.com>
  *	Huang Ying <ying.huang@intel.com>
+ * Copyright (C) 2013 SuSE Labs
+ *	Borislav Petkov <bp@suse.de> - runtime services VA mapping
  *
  * Copied from efi_32.c to eliminate the duplicated code between EFI
  * 32/64 support code. --ying 2007-10-26
@@ -745,21 +747,56 @@ void efi_memory_uc(u64 addr, unsigned long size)
 	set_memory_uc(addr, npages);
 }
 
+void __init old_map_region(efi_memory_desc_t *md)
+{
+	u64 start_pfn, end_pfn, end;
+	unsigned long size;
+	void *va;
+
+	start_pfn = PFN_DOWN(md->phys_addr);
+	size	  = md->num_pages << PAGE_SHIFT;
+	end	  = md->phys_addr + size;
+	end_pfn   = PFN_UP(end);
+
+	if (pfn_range_is_mapped(start_pfn, end_pfn)) {
+		va = __va(md->phys_addr);
+
+		if (!(md->attribute & EFI_MEMORY_WB))
+			efi_memory_uc((u64)(unsigned long)va, size);
+	} else
+		va = efi_ioremap(md->phys_addr, size,
+				 md->type, md->attribute);
+
+	md->virt_addr = (u64) (unsigned long) va;
+	if (!va)
+		pr_err("ioremap of 0x%llX failed!\n",
+		       (unsigned long long)md->phys_addr);
+}
+
 /*
  * This function will switch the EFI runtime services to virtual mode.
- * Essentially, look through the EFI memmap and map every region that
- * has the runtime attribute bit set in its memory descriptor and update
- * that memory descriptor with the virtual address obtained from ioremap().
- * This enables the runtime services to be called without having to
+ * Essentially, we look through the EFI memmap and map every region that
+ * has the runtime attribute bit set in its memory descriptor into the
+ * ->trampoline_pgd page table using a top-down VA allocation scheme.
+ *
+ * The old method which used to update that memory descriptor with the
+ * virtual address obtained from ioremap() is still supported when the
+ * kernel is booted with efi=old_map on its command line. Same old
+ * method enabled the runtime services to be called without having to
  * thunk back into physical mode for every invocation.
+ *
+ * The new method does a pagetable switch in a preemption-safe manner
+ * so that we're in a different address space when calling a runtime
+ * function. For function arguments passing we do copy the PGDs of the
+ * kernel page table into ->trampoline_pgd prior to each call.
  */
 void __init efi_enter_virtual_mode(void)
 {
 	efi_memory_desc_t *md, *prev_md = NULL;
-	efi_status_t status;
+	void *p, *new_memmap = NULL;
 	unsigned long size;
-	u64 end, systab, start_pfn, end_pfn;
-	void *p, *va, *new_memmap = NULL;
+	efi_status_t status;
+	u64 end, systab;
 	int count = 0;
 
 	efi.systab = NULL;
@@ -768,7 +805,6 @@ void __init efi_enter_virtual_mode(void)
 	 * We don't do virtual mode, since we don't do runtime services, on
 	 * non-native EFI
 	 */
-
 	if (!efi_is_native()) {
 		efi_unmap_memmap();
 		return;
@@ -799,6 +835,7 @@ void __init efi_enter_virtual_mode(void)
 			continue;
 		}
 		prev_md = md;
+
 	}
 
 	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
@@ -808,33 +845,18 @@ void __init efi_enter_virtual_mode(void)
 		    md->type != EFI_BOOT_SERVICES_DATA)
 			continue;
 
+		efi_map_region(md);
+
 		size = md->num_pages << EFI_PAGE_SHIFT;
 		end = md->phys_addr + size;
 
-		start_pfn = PFN_DOWN(md->phys_addr);
-		end_pfn = PFN_UP(end);
-		if (pfn_range_is_mapped(start_pfn, end_pfn)) {
-			va = __va(md->phys_addr);
-
-			if (!(md->attribute & EFI_MEMORY_WB))
-				efi_memory_uc((u64)(unsigned long)va, size);
-		} else
-			va = efi_ioremap(md->phys_addr, size,
-					 md->type, md->attribute);
-
-		md->virt_addr = (u64) (unsigned long) va;
-
-		if (!va) {
-			pr_err("ioremap of 0x%llX failed!\n",
-			       (unsigned long long)md->phys_addr);
-			continue;
-		}
-
 		systab = (u64) (unsigned long) efi_phys.systab;
 		if (md->phys_addr <= systab && systab < end) {
 			systab += md->virt_addr - md->phys_addr;
+
 			efi.systab = (efi_system_table_t *) (unsigned long) systab;
 		}
+
 		new_memmap = krealloc(new_memmap,
 				      (count + 1) * memmap.desc_size,
 				      GFP_KERNEL);
@@ -845,6 +867,9 @@ void __init efi_enter_virtual_mode(void)
 
 	BUG_ON(!efi.systab);
 
+	efi_setup_page_tables();
+	efi_sync_low_kernel_mappings();
+
 	status = phys_efi_set_virtual_address_map(
 		memmap.desc_size * count,
 		memmap.desc_size,
@@ -877,7 +902,8 @@ void __init efi_enter_virtual_mode(void)
 	efi.query_variable_info = virt_efi_query_variable_info;
 	efi.update_capsule = virt_efi_update_capsule;
 	efi.query_capsule_caps = virt_efi_query_capsule_caps;
-	if (__supported_pte_mask & _PAGE_NX)
+
+	if (efi_enabled(EFI_OLD_MEMMAP) && (__supported_pte_mask & _PAGE_NX))
 		runtime_code_page_mkexec();
 
 	kfree(new_memmap);
@@ -1007,3 +1033,15 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size)
 	return EFI_SUCCESS;
 }
 EXPORT_SYMBOL_GPL(efi_query_variable_store);
+
+static int __init parse_efi_cmdline(char *str)
+{
+	if (*str == '=')
+		str++;
+
+	if (!strncmp(str, "old_map", 7))
+		set_bit(EFI_OLD_MEMMAP, &x86_efi_facility);
+
+	return 0;
+}
+early_param("efi", parse_efi_cmdline);
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index 40e446941dd7..e94557cf5487 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -37,9 +37,16 @@
  * claim EFI runtime service handler exclusively and to duplicate a memory in
  * low memory space say 0 - 3G.
  */
-
 static unsigned long efi_rt_eflags;
 
+void efi_sync_low_kernel_mappings(void) {}
+void efi_setup_page_tables(void) {}
+
+void __init efi_map_region(efi_memory_desc_t *md)
+{
+	old_map_region(md);
+}
+
 void efi_call_phys_prelog(void)
 {
 	struct desc_ptr gdt_descr;
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 39a0e7f1f0a3..bf286c386d33 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -38,10 +38,28 @@
 #include <asm/efi.h>
 #include <asm/cacheflush.h>
 #include <asm/fixmap.h>
+#include <asm/realmode.h>
 
 static pgd_t *save_pgd __initdata;
 static unsigned long efi_flags __initdata;
 
+/*
+ * We allocate runtime services regions bottom-up, starting from -4G, i.e.
+ * 0xffff_ffff_0000_0000 and limit EFI VA mapping space to 64G.
+ */
+static u64 efi_va	= -4 * (1UL << 30);
+#define EFI_VA_END	(-68 * (1UL << 30))
+
+/*
+ * Scratch space used for switching the pagetable in the EFI stub
+ */
+struct efi_scratch {
+	u64 r15;
+	u64 prev_cr3;
+	pgd_t *efi_pgt;
+	bool use_pgd;
+};
+
 static void __init early_code_mapping_set_exec(int executable)
 {
 	efi_memory_desc_t *md;
@@ -65,6 +83,9 @@ void __init efi_call_phys_prelog(void)
 	int pgd;
 	int n_pgds;
 
+	if (!efi_enabled(EFI_OLD_MEMMAP))
+		return;
+
 	early_code_mapping_set_exec(1);
 	local_irq_save(efi_flags);
 
@@ -86,6 +107,10 @@ void __init efi_call_phys_epilog(void)
 	 */
 	int pgd;
 	int n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE);
+
+	if (!efi_enabled(EFI_OLD_MEMMAP))
+		return;
+
 	for (pgd = 0; pgd < n_pgds; pgd++)
 		set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), save_pgd[pgd]);
 	kfree(save_pgd);
@@ -94,6 +119,90 @@ void __init efi_call_phys_epilog(void)
 	early_code_mapping_set_exec(0);
 }
 
+/*
+ * Add low kernel mappings for passing arguments to EFI functions.
+ */
+void efi_sync_low_kernel_mappings(void)
+{
+	unsigned num_pgds;
+	pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd);
+
+	if (efi_enabled(EFI_OLD_MEMMAP))
+		return;
+
+	num_pgds = pgd_index(MODULES_END - 1) - pgd_index(PAGE_OFFSET);
+
+	memcpy(pgd + pgd_index(PAGE_OFFSET),
+		init_mm.pgd + pgd_index(PAGE_OFFSET),
+		sizeof(pgd_t) * num_pgds);
+}
+
+void efi_setup_page_tables(void)
+{
+	efi_scratch.efi_pgt = (pgd_t *)(unsigned long)real_mode_header->trampoline_pgd;
+
+	if (!efi_enabled(EFI_OLD_MEMMAP))
+		efi_scratch.use_pgd = true;
+}
+
+static void __init __map_region(efi_memory_desc_t *md, u64 va)
+{
+	pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd);
+	unsigned long pf = 0, size;
+	u64 end;
+
+	if (!(md->attribute & EFI_MEMORY_WB))
+		pf |= _PAGE_PCD;
+
+	size = md->num_pages << PAGE_SHIFT;
+	end  = va + size;
+
+	if (kernel_map_pages_in_pgd(pgd, md->phys_addr, va, md->num_pages, pf))
+		pr_warn("Error mapping PA 0x%llx -> VA 0x%llx!\n",
+			   md->phys_addr, va);
+}
+
+void __init efi_map_region(efi_memory_desc_t *md)
+{
+	unsigned long size = md->num_pages << PAGE_SHIFT;
+	u64 pa = md->phys_addr;
+
+	if (efi_enabled(EFI_OLD_MEMMAP))
+		return old_map_region(md);
+
+	/*
+	 * Make sure the 1:1 mappings are present as a catch-all for b0rked
+	 * firmware which doesn't update all internal pointers after switching
+	 * to virtual mode and would otherwise crap on us.
+	 */
+	__map_region(md, md->phys_addr);
+
+	efi_va -= size;
+
+	/* Is PA 2M-aligned? */
+	if (!(pa & (PMD_SIZE - 1))) {
+		efi_va &= PMD_MASK;
+	} else {
+		u64 pa_offset = pa & (PMD_SIZE - 1);
+		u64 prev_va = efi_va;
+
+		/* get us the same offset within this 2M page */
+		efi_va = (efi_va & PMD_MASK) + pa_offset;
+
+		if (efi_va > prev_va)
+			efi_va -= PMD_SIZE;
+	}
+
+	if (efi_va < EFI_VA_END) {
+		pr_warn(FW_WARN "VA address range overflow!\n");
+		return;
+	}
+
+	/* Do the VA map */
+	__map_region(md, efi_va);
+	md->virt_addr = efi_va;
+}
+
 void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size,
 				 u32 type, u64 attribute)
 {
diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S
index 4c07ccab8146..88073b140298 100644
--- a/arch/x86/platform/efi/efi_stub_64.S
+++ b/arch/x86/platform/efi/efi_stub_64.S
@@ -34,10 +34,47 @@
 	mov %rsi, %cr0;			\
 	mov (%rsp), %rsp
 
+	/* stolen from gcc */
+	.macro FLUSH_TLB_ALL
+	movq %r15, efi_scratch(%rip)
+	movq %r14, efi_scratch+8(%rip)
+	movq %cr4, %r15
+	movq %r15, %r14
+	andb $0x7f, %r14b
+	movq %r14, %cr4
+	movq %r15, %cr4
+	movq efi_scratch+8(%rip), %r14
+	movq efi_scratch(%rip), %r15
+	.endm
+
+	.macro SWITCH_PGT
+	cmpb $0, efi_scratch+24(%rip)
+	je 1f
+	movq %r15, efi_scratch(%rip)		# r15
+	# save previous CR3
+	movq %cr3, %r15
+	movq %r15, efi_scratch+8(%rip)		# prev_cr3
+	movq efi_scratch+16(%rip), %r15		# EFI pgt
+	movq %r15, %cr3
+	1:
+	.endm
+
+	.macro RESTORE_PGT
+	cmpb $0, efi_scratch+24(%rip)
+	je 2f
+	movq efi_scratch+8(%rip), %r15
+	movq %r15, %cr3
+	movq efi_scratch(%rip), %r15
+	FLUSH_TLB_ALL
+	2:
+	.endm
+
 ENTRY(efi_call0)
 	SAVE_XMM
 	subq $32, %rsp
+	SWITCH_PGT
 	call *%rdi
+	RESTORE_PGT
 	addq $32, %rsp
 	RESTORE_XMM
 	ret
@@ -47,7 +84,9 @@ ENTRY(efi_call1)
 	SAVE_XMM
 	subq $32, %rsp
 	mov  %rsi, %rcx
+	SWITCH_PGT
 	call *%rdi
+	RESTORE_PGT
 	addq $32, %rsp
 	RESTORE_XMM
 	ret
@@ -57,7 +96,9 @@ ENTRY(efi_call2)
 	SAVE_XMM
 	subq $32, %rsp
 	mov  %rsi, %rcx
+	SWITCH_PGT
 	call *%rdi
+	RESTORE_PGT
 	addq $32, %rsp
 	RESTORE_XMM
 	ret
@@ -68,7 +109,9 @@ ENTRY(efi_call3)
 	subq $32, %rsp
 	mov  %rcx, %r8
 	mov  %rsi, %rcx
+	SWITCH_PGT
 	call *%rdi
+	RESTORE_PGT
 	addq $32, %rsp
 	RESTORE_XMM
 	ret
@@ -80,7 +123,9 @@ ENTRY(efi_call4)
 	mov %r8, %r9
 	mov %rcx, %r8
 	mov %rsi, %rcx
+	SWITCH_PGT
 	call *%rdi
+	RESTORE_PGT
 	addq $32, %rsp
 	RESTORE_XMM
 	ret
@@ -93,7 +138,9 @@ ENTRY(efi_call5)
 	mov %r8, %r9
 	mov %rcx, %r8
 	mov %rsi, %rcx
+	SWITCH_PGT
 	call *%rdi
+	RESTORE_PGT
 	addq $48, %rsp
 	RESTORE_XMM
 	ret
@@ -109,8 +156,15 @@ ENTRY(efi_call6)
 	mov %r8, %r9
 	mov %rcx, %r8
 	mov %rsi, %rcx
+	SWITCH_PGT
 	call *%rdi
+	RESTORE_PGT
 	addq $48, %rsp
 	RESTORE_XMM
 	ret
 ENDPROC(efi_call6)
+
+	.data
+ENTRY(efi_scratch)
+	.fill 3,8,0
+	.byte 0
diff --git a/include/linux/efi.h b/include/linux/efi.h
index bc5687d0f315..6c0ca528300c 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -653,6 +653,7 @@ extern int __init efi_setup_pcdp_console(char *);
 #define EFI_RUNTIME_SERVICES	3	/* Can we use runtime services? */
 #define EFI_MEMMAP		4	/* Can we use EFI memory map? */
 #define EFI_64BIT		5	/* Is the firmware 64-bit? */
+#define EFI_ARCH_1		6	/* First arch-specific bit */
 
 #ifdef CONFIG_EFI
 # ifdef CONFIG_X86
-- 
cgit v1.2.3


From ee41143027706d9f342dfe05487a00b20887fde7 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Thu, 31 Oct 2013 17:25:09 +0100
Subject: x86/efi: Check krealloc return value

Check it just in case. We might just as well panic there because runtime
won't be functioning anyway.

Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/x86/platform/efi/efi.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index b453069236fd..3fac4dee492f 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -860,6 +860,9 @@ void __init efi_enter_virtual_mode(void)
 		new_memmap = krealloc(new_memmap,
 				      (count + 1) * memmap.desc_size,
 				      GFP_KERNEL);
+		if (!new_memmap)
+			goto err_out;
+
 		memcpy(new_memmap + (count * memmap.desc_size), md,
 		       memmap.desc_size);
 		count++;
@@ -914,6 +917,11 @@ void __init efi_enter_virtual_mode(void)
 			 EFI_VARIABLE_BOOTSERVICE_ACCESS |
 			 EFI_VARIABLE_RUNTIME_ACCESS,
 			 0, NULL);
+
+	return;
+
+ err_out:
+	pr_err("Error reallocating memory, EFI runtime non-functional!\n");
 }
 
 /*
-- 
cgit v1.2.3


From a653f3563c51c7bb7de63d607bef09d3baddaeb8 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 11 Nov 2013 14:28:39 -0800
Subject: x86, kaslr: Mix entropy sources together as needed

Depending on availability, mix the RDRAND and RDTSC entropy together with
XOR. Only when neither is available should the i8254 be used. Update
the Kconfig documentation to reflect this. Additionally, since bits
used for entropy is masked elsewhere, drop the needless masking in
the get_random_long(). Similarly, use the entire TSC, not just the low
32 bits.

Finally, to improve the starting entropy, do a simple hashing of a
build-time versions string and the boot-time boot_params structure for
some additional level of unpredictability.

Signed-off-by: Kees Cook <keescook@chromium.org>
Link: http://lkml.kernel.org/r/20131111222839.GA28616@www.outflux.net
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/Kconfig                | 14 +++++---
 arch/x86/boot/compressed/aslr.c | 73 +++++++++++++++++++++++++++++++----------
 2 files changed, 65 insertions(+), 22 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 51f439953d23..596cd9edeb9c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1735,13 +1735,17 @@ config RANDOMIZE_BASE
 	   deters exploit attempts relying on knowledge of the location
 	   of kernel internals.
 
-	   Entropy is generated using the RDRAND instruction if it
-	   is supported.  If not, then RDTSC is used, if supported. If
-	   neither RDRAND nor RDTSC are supported, then no randomness
-	   is introduced.
+	   Entropy is generated using the RDRAND instruction if it is
+	   supported. If RDTSC is supported, it is used as well. If
+	   neither RDRAND nor RDTSC are supported, then randomness is
+	   read from the i8254 timer.
 
 	   The kernel will be offset by up to RANDOMIZE_BASE_MAX_OFFSET,
-	   and aligned according to PHYSICAL_ALIGN.
+	   and aligned according to PHYSICAL_ALIGN. Since the kernel is
+	   built using 2GiB addressing, and PHYSICAL_ALGIN must be at a
+	   minimum of 2MiB, only 10 bits of entropy is theoretically
+	   possible. At best, due to page table layouts, 64-bit can use
+	   9 bits of entropy and 32-bit uses 8 bits.
 
 config RANDOMIZE_BASE_MAX_OFFSET
 	hex "Maximum ASLR offset allowed"
diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c
index 05957986d123..8746487fa916 100644
--- a/arch/x86/boot/compressed/aslr.c
+++ b/arch/x86/boot/compressed/aslr.c
@@ -5,6 +5,17 @@
 #include <asm/archrandom.h>
 #include <asm/e820.h>
 
+#include <generated/compile.h>
+#include <linux/module.h>
+#include <linux/uts.h>
+#include <linux/utsname.h>
+#include <generated/utsrelease.h>
+#include <linux/version.h>
+
+/* Simplified build-specific string for starting entropy. */
+static const char *build_str = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
+		LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
+
 #define I8254_PORT_CONTROL	0x43
 #define I8254_PORT_COUNTER0	0x40
 #define I8254_CMD_READBACK	0xC0
@@ -25,34 +36,62 @@ static inline u16 i8254(void)
 	return timer;
 }
 
+static unsigned long rotate_xor(unsigned long hash, const void *area,
+				size_t size)
+{
+	size_t i;
+	unsigned long *ptr = (unsigned long *)area;
+
+	for (i = 0; i < size / sizeof(hash); i++) {
+		/* Rotate by odd number of bits and XOR. */
+		hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7);
+		hash ^= ptr[i];
+	}
+
+	return hash;
+}
+
+/* Attempt to create a simple but unpredictable starting entropy. */
+static unsigned long get_random_boot(void)
+{
+	unsigned long hash = 0;
+
+	hash = rotate_xor(hash, build_str, sizeof(build_str));
+	hash = rotate_xor(hash, real_mode, sizeof(*real_mode));
+
+	return hash;
+}
+
 static unsigned long get_random_long(void)
 {
-	unsigned long random;
+	unsigned long raw, random = get_random_boot();
+	bool use_i8254 = true;
+
+	debug_putstr("KASLR using");
 
 	if (has_cpuflag(X86_FEATURE_RDRAND)) {
-		debug_putstr("KASLR using RDRAND...\n");
-		if (rdrand_long(&random))
-			return random;
+		debug_putstr(" RDRAND");
+		if (rdrand_long(&raw)) {
+			random ^= raw;
+			use_i8254 = false;
+		}
 	}
 
 	if (has_cpuflag(X86_FEATURE_TSC)) {
-		uint32_t raw;
+		debug_putstr(" RDTSC");
+		rdtscll(raw);
 
-		debug_putstr("KASLR using RDTSC...\n");
-		rdtscl(raw);
+		random ^= raw;
+		use_i8254 = false;
+	}
 
-		/* Only use the low bits of rdtsc. */
-		random = raw & 0xffff;
-	} else {
-		debug_putstr("KASLR using i8254...\n");
-		random = i8254();
+	if (use_i8254) {
+		debug_putstr(" i8254");
+		random ^= i8254();
 	}
 
-	/* Extend timer bits poorly... */
-	random |= (random << 16);
-#ifdef CONFIG_X86_64
-	random |= (random << 32);
-#endif
+	debug_putstr("...\n");
+
 	return random;
 }
 
-- 
cgit v1.2.3


From e8236c4d9338d52d0f2fcecc0b792ac0542e4ee9 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 11 Nov 2013 22:45:20 -0800
Subject: x86, kaslr: Add a circular multiply for better bit diffusion

If we don't have RDRAND (in which case nothing else *should* matter),
most sources have a highly biased entropy distribution.  Use a
circular multiply to diffuse the entropic bits.  A circular multiply
is a good operation for this: it is cheap on standard hardware and
because it is symmetric (unlike an ordinary multiply) it doesn't
introduce its own bias.

Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Link: http://lkml.kernel.org/r/20131111222839.GA28616@www.outflux.net
---
 arch/x86/boot/compressed/aslr.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c
index 8746487fa916..38a07cc4fbac 100644
--- a/arch/x86/boot/compressed/aslr.c
+++ b/arch/x86/boot/compressed/aslr.c
@@ -64,6 +64,11 @@ static unsigned long get_random_boot(void)
 
 static unsigned long get_random_long(void)
 {
+#ifdef CONFIG_X86_64
+	const unsigned long mix_const = 0x5d6008cbf3848dd3UL;
+#else
+	const unsigned long mix_const = 0x3f39e593UL;
+#endif
 	unsigned long raw, random = get_random_boot();
 	bool use_i8254 = true;
 
@@ -90,6 +95,12 @@ static unsigned long get_random_long(void)
 		random ^= i8254();
 	}
 
+	/* Circular multiply for better bit diffusion */
+	asm("mul %3"
+	    : "=a" (random), "=d" (raw)
+	    : "a" (random), "rm" (mix_const));
+	random += raw;
+
 	debug_putstr("...\n");
 
 	return random;
-- 
cgit v1.2.3


From 327f7d72454aecdc7a4a1c847a291a3f224b730f Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 12 Nov 2013 08:56:07 -0800
Subject: x86, kaslr: Use char array to gain sizeof sanity

The build_str needs to be char [] not char * for the sizeof() to report
the string length.

Reported-by: Mathias Krause <minipli@googlemail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: http://lkml.kernel.org/r/20131112165607.GA5921@www.outflux.net
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/boot/compressed/aslr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c
index 38a07cc4fbac..84be1752dcd8 100644
--- a/arch/x86/boot/compressed/aslr.c
+++ b/arch/x86/boot/compressed/aslr.c
@@ -13,7 +13,7 @@
 #include <linux/version.h>
 
 /* Simplified build-specific string for starting entropy. */
-static const char *build_str = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
+static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
 		LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
 
 #define I8254_PORT_CONTROL	0x43
-- 
cgit v1.2.3


From f4cb1cc18f364d761d5614eb6293cccc6647f259 Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Sat, 16 Nov 2013 12:37:01 -0800
Subject: x86-64, copy_user: Remove zero byte check before copy user buffer.

Operation of rep movsb instruction handles zero byte copy. As pointed out by
Linus, there is no need to check zero size in kernel. Removing this redundant
check saves a few cycles in copy user functions.

Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Link: http://lkml.kernel.org/r/1384634221-6006-1-git-send-email-fenghua.yu@intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/lib/copy_user_64.S | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index a30ca15be21c..ffe4eb9f09eb 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -236,8 +236,6 @@ ENDPROC(copy_user_generic_unrolled)
 ENTRY(copy_user_generic_string)
 	CFI_STARTPROC
 	ASM_STAC
-	andl %edx,%edx
-	jz 4f
 	cmpl $8,%edx
 	jb 2f		/* less than 8 bytes, go to byte copy loop */
 	ALIGN_DESTINATION
@@ -249,7 +247,7 @@ ENTRY(copy_user_generic_string)
 2:	movl %edx,%ecx
 3:	rep
 	movsb
-4:	xorl %eax,%eax
+	xorl %eax,%eax
 	ASM_CLAC
 	ret
 
@@ -279,12 +277,10 @@ ENDPROC(copy_user_generic_string)
 ENTRY(copy_user_enhanced_fast_string)
 	CFI_STARTPROC
 	ASM_STAC
-	andl %edx,%edx
-	jz 2f
 	movl %edx,%ecx
 1:	rep
 	movsb
-2:	xorl %eax,%eax
+	xorl %eax,%eax
 	ASM_CLAC
 	ret
 
-- 
cgit v1.2.3


From 5305ca10e7f44333e46c1ce57fb87306cb437891 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Fri, 15 Nov 2013 14:14:00 -0800
Subject: x86/mm: Unify pte_to_pgoff() and pgoff_to_pte() helpers

Use unified pte_bitop() helper to manipulate bits in pte/pgoff
bitfields and convert pte_to_pgoff()/pgoff_to_pte() to inlines.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/pgtable-2level.h | 100 ++++++++++++++++++++--------------
 1 file changed, 59 insertions(+), 41 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h
index 3bf2dd0cf61f..0d193e234647 100644
--- a/arch/x86/include/asm/pgtable-2level.h
+++ b/arch/x86/include/asm/pgtable-2level.h
@@ -55,6 +55,13 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp)
 #define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp)
 #endif
 
+/* Bit manipulation helper on pte/pgoff entry */
+static inline unsigned long pte_bitop(unsigned long value, unsigned int rightshift,
+				      unsigned long mask, unsigned int leftshift)
+{
+	return ((value >> rightshift) & mask) << leftshift;
+}
+
 #ifdef CONFIG_MEM_SOFT_DIRTY
 
 /*
@@ -71,31 +78,34 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp)
 #define PTE_FILE_BITS2		(PTE_FILE_SHIFT3 - PTE_FILE_SHIFT2 - 1)
 #define PTE_FILE_BITS3		(PTE_FILE_SHIFT4 - PTE_FILE_SHIFT3 - 1)
 
-#define pte_to_pgoff(pte)						\
-	((((pte).pte_low >> (PTE_FILE_SHIFT1))				\
-	  & ((1U << PTE_FILE_BITS1) - 1)))				\
-	+ ((((pte).pte_low >> (PTE_FILE_SHIFT2))			\
-	    & ((1U << PTE_FILE_BITS2) - 1))				\
-	   << (PTE_FILE_BITS1))						\
-	+ ((((pte).pte_low >> (PTE_FILE_SHIFT3))			\
-	    & ((1U << PTE_FILE_BITS3) - 1))				\
-	   << (PTE_FILE_BITS1 + PTE_FILE_BITS2))			\
-	+ ((((pte).pte_low >> (PTE_FILE_SHIFT4)))			\
-	    << (PTE_FILE_BITS1 + PTE_FILE_BITS2 + PTE_FILE_BITS3))
-
-#define pgoff_to_pte(off)						\
-	((pte_t) { .pte_low =						\
-	 ((((off)) & ((1U << PTE_FILE_BITS1) - 1)) << PTE_FILE_SHIFT1)	\
-	 + ((((off) >> PTE_FILE_BITS1)					\
-	     & ((1U << PTE_FILE_BITS2) - 1))				\
-	    << PTE_FILE_SHIFT2)						\
-	 + ((((off) >> (PTE_FILE_BITS1 + PTE_FILE_BITS2))		\
-	     & ((1U << PTE_FILE_BITS3) - 1))				\
-	    << PTE_FILE_SHIFT3)						\
-	 + ((((off) >>							\
-	      (PTE_FILE_BITS1 + PTE_FILE_BITS2 + PTE_FILE_BITS3)))	\
-	    << PTE_FILE_SHIFT4)						\
-	 + _PAGE_FILE })
+#define PTE_FILE_MASK1		((1U << PTE_FILE_BITS1) - 1)
+#define PTE_FILE_MASK2		((1U << PTE_FILE_BITS2) - 1)
+#define PTE_FILE_MASK3		((1U << PTE_FILE_BITS3) - 1)
+
+#define PTE_FILE_LSHIFT2	(PTE_FILE_BITS1)
+#define PTE_FILE_LSHIFT3	(PTE_FILE_BITS1 + PTE_FILE_BITS2)
+#define PTE_FILE_LSHIFT4	(PTE_FILE_BITS1 + PTE_FILE_BITS2 + PTE_FILE_BITS3)
+
+static __always_inline pgoff_t pte_to_pgoff(pte_t pte)
+{
+	return (pgoff_t)
+		(pte_bitop(pte.pte_low, PTE_FILE_SHIFT1, PTE_FILE_MASK1,  0)		    +
+		 pte_bitop(pte.pte_low, PTE_FILE_SHIFT2, PTE_FILE_MASK2,  PTE_FILE_LSHIFT2) +
+		 pte_bitop(pte.pte_low, PTE_FILE_SHIFT3, PTE_FILE_MASK3,  PTE_FILE_LSHIFT3) +
+		 pte_bitop(pte.pte_low, PTE_FILE_SHIFT4,           -1UL,  PTE_FILE_LSHIFT4));
+}
+
+static __always_inline pte_t pgoff_to_pte(pgoff_t off)
+{
+	return (pte_t){
+		.pte_low =
+			pte_bitop(off,                0, PTE_FILE_MASK1,  PTE_FILE_SHIFT1) +
+			pte_bitop(off, PTE_FILE_LSHIFT2, PTE_FILE_MASK2,  PTE_FILE_SHIFT2) +
+			pte_bitop(off, PTE_FILE_LSHIFT3, PTE_FILE_MASK3,  PTE_FILE_SHIFT3) +
+			pte_bitop(off, PTE_FILE_LSHIFT4,           -1UL,  PTE_FILE_SHIFT4) +
+			_PAGE_FILE,
+	};
+}
 
 #else /* CONFIG_MEM_SOFT_DIRTY */
 
@@ -115,22 +125,30 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp)
 #define PTE_FILE_BITS1		(PTE_FILE_SHIFT2 - PTE_FILE_SHIFT1 - 1)
 #define PTE_FILE_BITS2		(PTE_FILE_SHIFT3 - PTE_FILE_SHIFT2 - 1)
 
-#define pte_to_pgoff(pte)						\
-	((((pte).pte_low >> PTE_FILE_SHIFT1)				\
-	  & ((1U << PTE_FILE_BITS1) - 1))				\
-	 + ((((pte).pte_low >> PTE_FILE_SHIFT2)				\
-	     & ((1U << PTE_FILE_BITS2) - 1)) << PTE_FILE_BITS1)		\
-	 + (((pte).pte_low >> PTE_FILE_SHIFT3)				\
-	    << (PTE_FILE_BITS1 + PTE_FILE_BITS2)))
-
-#define pgoff_to_pte(off)						\
-	((pte_t) { .pte_low =						\
-	 (((off) & ((1U << PTE_FILE_BITS1) - 1)) << PTE_FILE_SHIFT1)	\
-	 + ((((off) >> PTE_FILE_BITS1) & ((1U << PTE_FILE_BITS2) - 1))	\
-	    << PTE_FILE_SHIFT2)						\
-	 + (((off) >> (PTE_FILE_BITS1 + PTE_FILE_BITS2))		\
-	    << PTE_FILE_SHIFT3)						\
-	 + _PAGE_FILE })
+#define PTE_FILE_MASK1		((1U << PTE_FILE_BITS1) - 1)
+#define PTE_FILE_MASK2		((1U << PTE_FILE_BITS2) - 1)
+
+#define PTE_FILE_LSHIFT2	(PTE_FILE_BITS1)
+#define PTE_FILE_LSHIFT3	(PTE_FILE_BITS1 + PTE_FILE_BITS2)
+
+static __always_inline pgoff_t pte_to_pgoff(pte_t pte)
+{
+	return (pgoff_t)
+		(pte_bitop(pte.pte_low, PTE_FILE_SHIFT1, PTE_FILE_MASK1,  0)		    +
+		 pte_bitop(pte.pte_low, PTE_FILE_SHIFT2, PTE_FILE_MASK2,  PTE_FILE_LSHIFT2) +
+		 pte_bitop(pte.pte_low, PTE_FILE_SHIFT3,           -1UL,  PTE_FILE_LSHIFT3));
+}
+
+static __always_inline pte_t pgoff_to_pte(pgoff_t off)
+{
+	return (pte_t){
+		.pte_low =
+			pte_bitop(off,                0, PTE_FILE_MASK1,  PTE_FILE_SHIFT1) +
+			pte_bitop(off, PTE_FILE_LSHIFT2, PTE_FILE_MASK2,  PTE_FILE_SHIFT2) +
+			pte_bitop(off, PTE_FILE_LSHIFT3,           -1UL,  PTE_FILE_SHIFT3) +
+			_PAGE_FILE,
+	};
+}
 
 #endif /* CONFIG_MEM_SOFT_DIRTY */
 
-- 
cgit v1.2.3


From fd8526ad14c182605e42b64646344b95befd9f94 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Tue, 19 Nov 2013 15:17:50 +0200
Subject: x86/mm: Implement ASLR for hugetlb mappings

Matthew noticed that hugetlb mappings don't participate in ASLR on x86-64:

  %  for i in `seq 3`; do
  > tools/testing/selftests/vm/map_hugetlb | grep address
  > done
  Returned address is 0x2aaaaac00000
  Returned address is 0x2aaaaac00000
  Returned address is 0x2aaaaac00000

/proc/PID/maps entries for the mapping are always the same
(except inode number):

  2aaaaac00000-2aaabac00000 rw-p 00000000 00:0c 8200              /anon_hugepage (deleted)
  2aaaaac00000-2aaabac00000 rw-p 00000000 00:0c 256               /anon_hugepage (deleted)
  2aaaaac00000-2aaabac00000 rw-p 00000000 00:0c 7180              /anon_hugepage (deleted)

The reason is the generic hugetlb_get_unmapped_area() function
which is used on x86-64.  It doesn't support randomization and
use bottom-up unmapped area lookup, instead of usual top-down
on x86-64.

x86 has arch-specific hugetlb_get_unmapped_area(), but it's used
only on x86-32.

Let's use arch-specific hugetlb_get_unmapped_area() on x86-64
too. That adds ASLR and switches hugetlb mappings to use top-down
unmapped area lookup:

  %  for i in `seq 3`; do
  > tools/testing/selftests/vm/map_hugetlb | grep address
  > done
  Returned address is 0x7f4f08a00000
  Returned address is 0x7fdda4200000
  Returned address is 0x7febe0000000

/proc/PID/maps entries:

  7f4f08a00000-7f4f18a00000 rw-p 00000000 00:0c 1168              /anon_hugepage (deleted)
  7fdda4200000-7fddb4200000 rw-p 00000000 00:0c 7092              /anon_hugepage (deleted)
  7febe0000000-7febf0000000 rw-p 00000000 00:0c 7183              /anon_hugepage (deleted)

Unmapped area lookup policy for hugetlb mappings is consistent
with normal mappings now -- the only difference is alignment
requirements for huge pages.

libhugetlbfs test-suite didn't detect any regressions with the
patch applied (although it shows few failures on my machine
regardless the patch).

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Matthew Wilcox <willy@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mel Gorman <mgorman@suse.de>
Link: http://lkml.kernel.org/r/20131119131750.EA45CE0090@blue.fi.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/page.h    | 1 +
 arch/x86/include/asm/page_32.h | 4 ----
 arch/x86/mm/hugetlbpage.c      | 9 +++------
 3 files changed, 4 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index c87892442e53..775873d3be55 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -71,6 +71,7 @@ extern bool __virt_addr_valid(unsigned long kaddr);
 #include <asm-generic/getorder.h>
 
 #define __HAVE_ARCH_GATE_AREA 1
+#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
 
 #endif	/* __KERNEL__ */
 #endif /* _ASM_X86_PAGE_H */
diff --git a/arch/x86/include/asm/page_32.h b/arch/x86/include/asm/page_32.h
index 4d550d04b609..904f528cc8e8 100644
--- a/arch/x86/include/asm/page_32.h
+++ b/arch/x86/include/asm/page_32.h
@@ -5,10 +5,6 @@
 
 #ifndef __ASSEMBLY__
 
-#ifdef CONFIG_HUGETLB_PAGE
-#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
-#endif
-
 #define __phys_addr_nodebug(x)	((x) - PAGE_OFFSET)
 #ifdef CONFIG_DEBUG_VIRTUAL
 extern unsigned long __phys_addr(unsigned long);
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 9d980d88b747..8c9f647ff9e1 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -87,9 +87,7 @@ int pmd_huge_support(void)
 }
 #endif
 
-/* x86_64 also uses this file */
-
-#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+#ifdef CONFIG_HUGETLB_PAGE
 static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
 		unsigned long addr, unsigned long len,
 		unsigned long pgoff, unsigned long flags)
@@ -99,7 +97,7 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
 
 	info.flags = 0;
 	info.length = len;
-	info.low_limit = TASK_UNMAPPED_BASE;
+	info.low_limit = current->mm->mmap_legacy_base;
 	info.high_limit = TASK_SIZE;
 	info.align_mask = PAGE_MASK & ~huge_page_mask(h);
 	info.align_offset = 0;
@@ -172,8 +170,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 		return hugetlb_get_unmapped_area_topdown(file, addr, len,
 				pgoff, flags);
 }
-
-#endif /*HAVE_ARCH_HUGETLB_UNMAPPED_AREA*/
+#endif /* CONFIG_HUGETLB_PAGE */
 
 #ifdef CONFIG_X86_64
 static __init int setup_hugepagesz(char *opt)
-- 
cgit v1.2.3


From 3d78e945b6249d4ef2308192343f8b203b1d7ea5 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Sat, 9 Nov 2013 18:44:19 +0100
Subject: uprobes/powerpc: Kill arch_uprobe->ainsn

powerpc has both arch_uprobe->insn and arch_uprobe->ainsn to
make the generic code happy. This is no longer needed after
the previous change, powerpc can just use "u32 insn".

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Acked-by: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
---
 arch/powerpc/include/asm/uprobes.h | 5 ++---
 arch/powerpc/kernel/uprobes.c      | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/uprobes.h b/arch/powerpc/include/asm/uprobes.h
index 75c6ecdb8f37..7422a999a39a 100644
--- a/arch/powerpc/include/asm/uprobes.h
+++ b/arch/powerpc/include/asm/uprobes.h
@@ -36,9 +36,8 @@ typedef ppc_opcode_t uprobe_opcode_t;
 
 struct arch_uprobe {
 	union {
-		u8	insn[MAX_UINSN_BYTES];
-		u8	ixol[MAX_UINSN_BYTES];
-		u32	ainsn;
+		u32	insn;
+		u32	ixol;
 	};
 };
 
diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c
index 59f419b935f2..003b20964ea0 100644
--- a/arch/powerpc/kernel/uprobes.c
+++ b/arch/powerpc/kernel/uprobes.c
@@ -186,7 +186,7 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
 	 * emulate_step() returns 1 if the insn was successfully emulated.
 	 * For all other cases, we need to single-step in hardware.
 	 */
-	ret = emulate_step(regs, auprobe->ainsn);
+	ret = emulate_step(regs, auprobe->insn);
 	if (ret > 0)
 		return true;
 
-- 
cgit v1.2.3


From 661c80192d21269c7fc566f1d547510b0c867677 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Wed, 20 Nov 2013 12:50:51 -0800
Subject: x86-64, copy_user: Use leal to produce 32-bit results

When we are using lea to produce a 32-bit result, we can use the leal
form, rather than using leaq and worry about truncation elsewhere.

Make the leal explicit, both to be more obvious and since that is what
gcc generates and thus is less likely to trigger obscure gas bugs.

Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/1384634221-6006-1-git-send-email-fenghua.yu@intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/lib/copy_user_64.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index ffe4eb9f09eb..dee945d55594 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -186,7 +186,7 @@ ENTRY(copy_user_generic_unrolled)
 30:	shll $6,%ecx
 	addl %ecx,%edx
 	jmp 60f
-40:	lea (%rdx,%rcx,8),%rdx
+40:	leal (%rdx,%rcx,8),%edx
 	jmp 60f
 50:	movl %ecx,%edx
 60:	jmp copy_user_handle_tail /* ecx is zerorest also */
@@ -252,7 +252,7 @@ ENTRY(copy_user_generic_string)
 	ret
 
 	.section .fixup,"ax"
-11:	lea (%rdx,%rcx,8),%rcx
+11:	leal (%rdx,%rcx,8),%ecx
 12:	movl %ecx,%edx		/* ecx is zerorest also */
 	jmp copy_user_handle_tail
 	.previous
-- 
cgit v1.2.3


From cdf8d75818e3cd30746d38f5fbcdb3745fd267a3 Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Thu, 14 Nov 2013 10:14:37 +0000
Subject: metag: dma: remove dead code in dma_alloc_init()

Meta has 2 levels of page table so the pmd folds into the pud which
folds into the pgd. Therefore the !pmd check in dma_alloc_init() is dead
code since it essentially checks whether:
  (init_mm->pgd + 0x770) == 0

Remove the check.

Reported-by: Chen Gang <gang.chen@asianux.com>
Signed-off-by: James Hogan <james.hogan@imgtec.com>
---
 arch/metag/kernel/dma.c | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'arch')

diff --git a/arch/metag/kernel/dma.c b/arch/metag/kernel/dma.c
index db589ad5dbc4..c700d625067a 100644
--- a/arch/metag/kernel/dma.c
+++ b/arch/metag/kernel/dma.c
@@ -399,11 +399,6 @@ static int __init dma_alloc_init(void)
 		pgd = pgd_offset(&init_mm, CONSISTENT_START);
 		pud = pud_alloc(&init_mm, pgd, CONSISTENT_START);
 		pmd = pmd_alloc(&init_mm, pud, CONSISTENT_START);
-		if (!pmd) {
-			pr_err("%s: no pmd tables\n", __func__);
-			ret = -ENOMEM;
-			break;
-		}
 		WARN_ON(!pmd_none(*pmd));
 
 		pte = pte_alloc_kernel(pmd, CONSISTENT_START);
-- 
cgit v1.2.3


From a4df02a217e9787a4b967197d9d9030c3e3c1088 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Tue, 25 Jun 2013 21:15:24 +0200
Subject: m68k: Mark functions only called from setup_arch() __init

Some functions that are only called (indirectly) from setup_arch() lack
__init annotations.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/amiga/config.c    |  2 +-
 arch/m68k/apollo/config.c   | 10 +++++-----
 arch/m68k/bvme6000/config.c |  2 +-
 arch/m68k/emu/natfeat.c     |  3 ++-
 arch/m68k/kernel/setup_mm.c |  4 ++--
 arch/m68k/mac/psc.c         |  2 +-
 arch/m68k/mvme147/config.c  |  2 +-
 arch/m68k/mvme16x/config.c  |  2 +-
 arch/m68k/q40/config.c      |  4 ++--
 arch/m68k/sun3/dvma.c       |  6 ++----
 arch/m68k/sun3/mmu_emu.c    |  3 ++-
 arch/m68k/sun3/sun3dvma.c   |  3 ++-
 12 files changed, 22 insertions(+), 21 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c
index b819390e29cd..93ab423758da 100644
--- a/arch/m68k/amiga/config.c
+++ b/arch/m68k/amiga/config.c
@@ -140,7 +140,7 @@ static struct resource ram_resource[NUM_MEMINFO];
      *  Parse an Amiga-specific record in the bootinfo
      */
 
-int amiga_parse_bootinfo(const struct bi_record *record)
+int __init amiga_parse_bootinfo(const struct bi_record *record)
 {
 	int unknown = 0;
 	const unsigned long *data = record->data;
diff --git a/arch/m68k/apollo/config.c b/arch/m68k/apollo/config.c
index 3ea56b90e718..2bdde9685a29 100644
--- a/arch/m68k/apollo/config.c
+++ b/arch/m68k/apollo/config.c
@@ -1,3 +1,4 @@
+#include <linux/init.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
@@ -43,8 +44,8 @@ static const char *apollo_models[] = {
 	[APOLLO_DN4500-APOLLO_DN3000] = "DN4500 (Roadrunner)"
 };
 
-int apollo_parse_bootinfo(const struct bi_record *record) {
-
+int __init apollo_parse_bootinfo(const struct bi_record *record)
+{
 	int unknown = 0;
 	const unsigned long *data = record->data;
 
@@ -60,9 +61,8 @@ int apollo_parse_bootinfo(const struct bi_record *record) {
 	return unknown;
 }
 
-void dn_setup_model(void) {
-
-
+static void __init dn_setup_model(void)
+{
 	printk("Apollo hardware found: ");
 	printk("[%s]\n", apollo_models[apollo_model - APOLLO_DN3000]);
 
diff --git a/arch/m68k/bvme6000/config.c b/arch/m68k/bvme6000/config.c
index 8943aa4c18e6..1b9e31e0130f 100644
--- a/arch/m68k/bvme6000/config.c
+++ b/arch/m68k/bvme6000/config.c
@@ -50,7 +50,7 @@ void bvme6000_set_vectors (void);
 static irq_handler_t tick_handler;
 
 
-int bvme6000_parse_bootinfo(const struct bi_record *bi)
+int __init bvme6000_parse_bootinfo(const struct bi_record *bi)
 {
 	if (bi->tag == BI_VME_TYPE)
 		return 0;
diff --git a/arch/m68k/emu/natfeat.c b/arch/m68k/emu/natfeat.c
index 121a6660ad4e..71b78ecee75c 100644
--- a/arch/m68k/emu/natfeat.c
+++ b/arch/m68k/emu/natfeat.c
@@ -9,6 +9,7 @@
  * the GNU General Public License (GPL), incorporated herein by reference.
  */
 
+#include <linux/init.h>
 #include <linux/types.h>
 #include <linux/console.h>
 #include <linux/string.h>
@@ -70,7 +71,7 @@ static void nf_poweroff(void)
 		nf_call(id);
 }
 
-void nf_init(void)
+void __init nf_init(void)
 {
 	unsigned long id, version;
 	char buf[256];
diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c
index e67e53159573..99aa9887d5a8 100644
--- a/arch/m68k/kernel/setup_mm.c
+++ b/arch/m68k/kernel/setup_mm.c
@@ -74,9 +74,9 @@ unsigned long m68k_memoffset;
 struct mem_info m68k_memory[NUM_MEMINFO];
 EXPORT_SYMBOL(m68k_memory);
 
-struct mem_info m68k_ramdisk;
+static struct mem_info m68k_ramdisk __initdata;
 
-static char m68k_command_line[CL_SIZE];
+static char m68k_command_line[CL_SIZE] __initdata;
 
 void (*mach_sched_init) (irq_handler_t handler) __initdata = NULL;
 /* machine dependent irq functions */
diff --git a/arch/m68k/mac/psc.c b/arch/m68k/mac/psc.c
index 6f026fc302fa..cc9f3f097be7 100644
--- a/arch/m68k/mac/psc.c
+++ b/arch/m68k/mac/psc.c
@@ -54,7 +54,7 @@ static void psc_debug_dump(void)
  * expanded to cover what I think are the other 7 channels.
  */
 
-static void psc_dma_die_die_die(void)
+static __init void psc_dma_die_die_die(void)
 {
 	int i;
 
diff --git a/arch/m68k/mvme147/config.c b/arch/m68k/mvme147/config.c
index 1c6262803b94..5c99154c67c8 100644
--- a/arch/m68k/mvme147/config.c
+++ b/arch/m68k/mvme147/config.c
@@ -51,7 +51,7 @@ static int bcd2int (unsigned char b);
 irq_handler_t tick_handler;
 
 
-int mvme147_parse_bootinfo(const struct bi_record *bi)
+int __init mvme147_parse_bootinfo(const struct bi_record *bi)
 {
 	if (bi->tag == BI_VME_TYPE || bi->tag == BI_VME_BRDINFO)
 		return 0;
diff --git a/arch/m68k/mvme16x/config.c b/arch/m68k/mvme16x/config.c
index 080a342458a1..60d8a1bc837d 100644
--- a/arch/m68k/mvme16x/config.c
+++ b/arch/m68k/mvme16x/config.c
@@ -60,7 +60,7 @@ unsigned short mvme16x_config;
 EXPORT_SYMBOL(mvme16x_config);
 
 
-int mvme16x_parse_bootinfo(const struct bi_record *bi)
+int __init mvme16x_parse_bootinfo(const struct bi_record *bi)
 {
 	if (bi->tag == BI_VME_TYPE || bi->tag == BI_VME_BRDINFO)
 		return 0;
diff --git a/arch/m68k/q40/config.c b/arch/m68k/q40/config.c
index 078bb744b5fe..e90fe903613e 100644
--- a/arch/m68k/q40/config.c
+++ b/arch/m68k/q40/config.c
@@ -154,7 +154,7 @@ static unsigned int serports[] =
 	0x3f8,0x2f8,0x3e8,0x2e8,0
 };
 
-static void q40_disable_irqs(void)
+static void __init q40_disable_irqs(void)
 {
 	unsigned i, j;
 
@@ -198,7 +198,7 @@ void __init config_q40(void)
 }
 
 
-int q40_parse_bootinfo(const struct bi_record *rec)
+int __init q40_parse_bootinfo(const struct bi_record *rec)
 {
 	return 1;
 }
diff --git a/arch/m68k/sun3/dvma.c b/arch/m68k/sun3/dvma.c
index d522eaab4551..d95506e06c2a 100644
--- a/arch/m68k/sun3/dvma.c
+++ b/arch/m68k/sun3/dvma.c
@@ -7,6 +7,7 @@
  *
  */
 
+#include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/bootmem.h>
@@ -62,10 +63,7 @@ int dvma_map_iommu(unsigned long kaddr, unsigned long baddr,
 
 }
 
-void sun3_dvma_init(void)
+void __init sun3_dvma_init(void)
 {
-
 	memset(ptelist, 0, sizeof(ptelist));
-
-
 }
diff --git a/arch/m68k/sun3/mmu_emu.c b/arch/m68k/sun3/mmu_emu.c
index 8edc510a21be..3f258e230ba5 100644
--- a/arch/m68k/sun3/mmu_emu.c
+++ b/arch/m68k/sun3/mmu_emu.c
@@ -6,6 +6,7 @@
 ** Started 1/16/98 @ 2:22 am
 */
 
+#include <linux/init.h>
 #include <linux/mman.h>
 #include <linux/mm.h>
 #include <linux/kernel.h>
@@ -122,7 +123,7 @@ void print_pte_vaddr (unsigned long vaddr)
 /*
  * Initialise the MMU emulator.
  */
-void mmu_emu_init(unsigned long bootmem_end)
+void __init mmu_emu_init(unsigned long bootmem_end)
 {
 	unsigned long seg, num;
 	int i,j;
diff --git a/arch/m68k/sun3/sun3dvma.c b/arch/m68k/sun3/sun3dvma.c
index cab54482ca34..ca57966ec3a2 100644
--- a/arch/m68k/sun3/sun3dvma.c
+++ b/arch/m68k/sun3/sun3dvma.c
@@ -6,6 +6,7 @@
  * Contains common routines for sun3/sun3x DVMA management.
  */
 
+#include <linux/init.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/gfp.h>
@@ -245,7 +246,7 @@ static inline int free_baddr(unsigned long baddr)
 
 }
 
-void dvma_init(void)
+void __init dvma_init(void)
 {
 
 	struct hole *hole;
-- 
cgit v1.2.3


From 7b3e8de9b3e9c0ffd5cb1600e6075bede4272868 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Tue, 25 Jun 2013 20:33:44 +0200
Subject: m68k/sun3: Dynamically allocate the table to track IOMMU use

As Sun 3 kernels cannot be multi-platform due to the different Sun 3 MMU
type, it made sense to statically allocate the table to track IOMMU use.

However, Sun 3x kernels can be multi-platform. Furthermore, Sun 3x uses
a larger table than Sun 3 (8192 bytes instead of 512 bytes).

Hence switch to dynamic allocation of this table using the bootmem
allocator to avoid wasting 8192 bytes when not running on a Sun 3x.
As this allocator returns zeroed memory, there's no need to explicitly
initialize the table to zeroes.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/sun3/sun3dvma.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/sun3/sun3dvma.c b/arch/m68k/sun3/sun3dvma.c
index ca57966ec3a2..b37521a5259d 100644
--- a/arch/m68k/sun3/sun3dvma.c
+++ b/arch/m68k/sun3/sun3dvma.c
@@ -6,6 +6,7 @@
  * Contains common routines for sun3/sun3x DVMA management.
  */
 
+#include <linux/bootmem.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
@@ -31,7 +32,7 @@ static inline void dvma_unmap_iommu(unsigned long a, int b)
 extern void sun3_dvma_init(void);
 #endif
 
-static unsigned long iommu_use[IOMMU_TOTAL_ENTRIES];
+static unsigned long *iommu_use;
 
 #define dvma_index(baddr) ((baddr - DVMA_START) >> DVMA_PAGE_SHIFT)
 
@@ -266,7 +267,7 @@ void __init dvma_init(void)
 
 	list_add(&(hole->list), &hole_list);
 
-	memset(iommu_use, 0, sizeof(iommu_use));
+	iommu_use = alloc_bootmem(IOMMU_TOTAL_ENTRIES * sizeof(unsigned long));
 
 	dvma_unmap_iommu(DVMA_START, DVMA_SIZE);
 
-- 
cgit v1.2.3


From f16b89bc3cdf11266b657f2f74f4db255e77b76c Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Wed, 26 Jun 2013 13:18:09 +0200
Subject: m68k/mac: Fix comment about iop_*_present flags setup timing

This is no longer done from iop_init()

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/mac/iop.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/mac/iop.c b/arch/m68k/mac/iop.c
index 7d8d46127ad9..ed0d9b0473ea 100644
--- a/arch/m68k/mac/iop.c
+++ b/arch/m68k/mac/iop.c
@@ -118,9 +118,9 @@
 
 /*#define DEBUG_IOP*/
 
-/* Set to non-zero if the IOPs are present. Set by iop_init() */
+/* Non-zero if the IOPs are present */
 
-int iop_scc_present,iop_ism_present;
+int iop_scc_present, iop_ism_present;
 
 /* structure for tracking channel listeners */
 
-- 
cgit v1.2.3


From 51b9310f0a33d5280a93228475fa95e38fbcba36 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Tue, 17 Sep 2013 09:04:34 +0200
Subject: m68k/defconfig: Make NFS_V4 modular instead of builtin

This reduces the kernel image size by ca. 100 KiB, while still allowing
NFS root.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/configs/amiga_defconfig    | 2 +-
 arch/m68k/configs/apollo_defconfig   | 2 +-
 arch/m68k/configs/atari_defconfig    | 2 +-
 arch/m68k/configs/bvme6000_defconfig | 2 +-
 arch/m68k/configs/hp300_defconfig    | 2 +-
 arch/m68k/configs/mac_defconfig      | 2 +-
 arch/m68k/configs/multi_defconfig    | 2 +-
 arch/m68k/configs/mvme147_defconfig  | 2 +-
 arch/m68k/configs/mvme16x_defconfig  | 2 +-
 arch/m68k/configs/q40_defconfig      | 2 +-
 arch/m68k/configs/sun3_defconfig     | 2 +-
 arch/m68k/configs/sun3x_defconfig    | 2 +-
 12 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index 19325e117eea..04432b7ce157 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -385,7 +385,7 @@ CONFIG_QNX6FS_FS=m
 CONFIG_SYSV_FS=m
 CONFIG_UFS_FS=m
 CONFIG_NFS_FS=y
-CONFIG_NFS_V4=y
+CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index 14dc6ccda7f4..a018fd7ed7bc 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -342,7 +342,7 @@ CONFIG_QNX6FS_FS=m
 CONFIG_SYSV_FS=m
 CONFIG_UFS_FS=m
 CONFIG_NFS_FS=y
-CONFIG_NFS_V4=y
+CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index 6d5370c914b2..fa33639b4107 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -360,7 +360,7 @@ CONFIG_QNX6FS_FS=m
 CONFIG_SYSV_FS=m
 CONFIG_UFS_FS=m
 CONFIG_NFS_FS=y
-CONFIG_NFS_V4=y
+CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index c015ddb6fd80..ca310d586689 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -334,7 +334,7 @@ CONFIG_QNX6FS_FS=m
 CONFIG_SYSV_FS=m
 CONFIG_UFS_FS=m
 CONFIG_NFS_FS=y
-CONFIG_NFS_V4=y
+CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index ec7382d8afff..aa98958c230e 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -344,7 +344,7 @@ CONFIG_QNX6FS_FS=m
 CONFIG_SYSV_FS=m
 CONFIG_UFS_FS=m
 CONFIG_NFS_FS=y
-CONFIG_NFS_V4=y
+CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index 7d46fbec7042..ac2c153f8a4c 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -367,7 +367,7 @@ CONFIG_QNX6FS_FS=m
 CONFIG_SYSV_FS=m
 CONFIG_UFS_FS=m
 CONFIG_NFS_FS=y
-CONFIG_NFS_V4=y
+CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index b17a8837f0e1..61cdb096ca43 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -445,7 +445,7 @@ CONFIG_QNX6FS_FS=m
 CONFIG_SYSV_FS=m
 CONFIG_UFS_FS=m
 CONFIG_NFS_FS=y
-CONFIG_NFS_V4=y
+CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index 5586c6529fce..f89613f9c82b 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -334,7 +334,7 @@ CONFIG_QNX6FS_FS=m
 CONFIG_SYSV_FS=m
 CONFIG_UFS_FS=m
 CONFIG_NFS_FS=y
-CONFIG_NFS_V4=y
+CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index e5e8262bbacd..d086acd6aa26 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -334,7 +334,7 @@ CONFIG_QNX6FS_FS=m
 CONFIG_SYSV_FS=m
 CONFIG_UFS_FS=m
 CONFIG_NFS_FS=y
-CONFIG_NFS_V4=y
+CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index be1496ed9b66..3662899c0f88 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -358,7 +358,7 @@ CONFIG_QNX6FS_FS=m
 CONFIG_SYSV_FS=m
 CONFIG_UFS_FS=m
 CONFIG_NFS_FS=y
-CONFIG_NFS_V4=y
+CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index 54674d61e001..bb87f17ef477 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -336,7 +336,7 @@ CONFIG_QNX6FS_FS=m
 CONFIG_SYSV_FS=m
 CONFIG_UFS_FS=m
 CONFIG_NFS_FS=y
-CONFIG_NFS_V4=y
+CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index 832d9539f441..b75a0b177b24 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -336,7 +336,7 @@ CONFIG_QNX6FS_FS=m
 CONFIG_SYSV_FS=m
 CONFIG_UFS_FS=m
 CONFIG_NFS_FS=y
-CONFIG_NFS_V4=y
+CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=m
-- 
cgit v1.2.3


From 6074a13934202c7a68c77fd5f37da8dce731f181 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Wed, 26 Jun 2013 13:15:58 +0200
Subject: m68k/setup: Use pr_*() and __func__ instead of plain printk()

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/kernel/setup_mm.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c
index 99aa9887d5a8..9f16a15fa287 100644
--- a/arch/m68k/kernel/setup_mm.c
+++ b/arch/m68k/kernel/setup_mm.c
@@ -161,7 +161,8 @@ static void __init m68k_parse_bootinfo(const struct bi_record *record)
 				m68k_memory[m68k_num_memory].size = data[1];
 				m68k_num_memory++;
 			} else
-				printk("m68k_parse_bootinfo: too many memory chunks\n");
+				pr_warn("%s: too many memory chunks\n",
+					__func__);
 			break;
 
 		case BI_RAMDISK:
@@ -197,8 +198,8 @@ static void __init m68k_parse_bootinfo(const struct bi_record *record)
 				unknown = 1;
 		}
 		if (unknown)
-			printk("m68k_parse_bootinfo: unknown tag 0x%04x ignored\n",
-			       record->tag);
+			pr_warn("%s: unknown tag 0x%04x ignored\n", __func__,
+				   record->tag);
 		record = (struct bi_record *)((unsigned long)record +
 					      record->size);
 	}
@@ -206,8 +207,8 @@ static void __init m68k_parse_bootinfo(const struct bi_record *record)
 	m68k_realnum_memory = m68k_num_memory;
 #ifdef CONFIG_SINGLE_MEMORY_CHUNK
 	if (m68k_num_memory > 1) {
-		printk("Ignoring last %i chunks of physical memory\n",
-		       (m68k_num_memory - 1));
+		pr_warn("%s: ignoring last %i chunks of physical memory\n",
+			__func__, (m68k_num_memory - 1));
 		m68k_num_memory = 1;
 	}
 #endif
@@ -247,7 +248,7 @@ void __init setup_arch(char **cmdline_p)
 		asm (".chip 68060; movec %%pcr,%0; .chip 68k"
 		     : "=d" (pcr));
 		if (((pcr >> 8) & 0xff) <= 5) {
-			printk("Enabling workaround for errata I14\n");
+			pr_warn("Enabling workaround for errata I14\n");
 			asm (".chip 68060; movec %0,%%pcr; .chip 68k"
 			     : : "d" (pcr | 0x20));
 		}
@@ -353,7 +354,7 @@ void __init setup_arch(char **cmdline_p)
 				     BOOTMEM_DEFAULT);
 		initrd_start = (unsigned long)phys_to_virt(m68k_ramdisk.addr);
 		initrd_end = initrd_start + m68k_ramdisk.size;
-		printk("initrd: %08lx - %08lx\n", initrd_start, initrd_end);
+		pr_info("initrd: %08lx - %08lx\n", initrd_start, initrd_end);
 	}
 #endif
 
@@ -538,9 +539,9 @@ void check_bugs(void)
 {
 #ifndef CONFIG_M68KFPU_EMU
 	if (m68k_fputype == 0) {
-		printk(KERN_EMERG "*** YOU DO NOT HAVE A FLOATING POINT UNIT, "
+		pr_emerg("*** YOU DO NOT HAVE A FLOATING POINT UNIT, "
 			"WHICH IS REQUIRED BY LINUX/M68K ***\n");
-		printk(KERN_EMERG "Upgrade your hardware or join the FPU "
+		pr_emerg("Upgrade your hardware or join the FPU "
 			"emulation project\n");
 		panic("no FPU");
 	}
-- 
cgit v1.2.3


From c293738e6d8dfb9c941759855b5161fde449644d Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Sat, 29 Jun 2013 10:40:20 +0200
Subject: zorro: Do not allocate zorro_autocon[] statically

Currently the array of Zorro devices is allocated statically, wasting
up to 4.5 KiB when running an Amiga or multi-platform kernel on a machine
with no or a handful of Zorro expansion cards. Convert it to conditional
dynamic memory allocation to fix this.

amiga_parse_bootinfo() still needs to store some information about the
detected Zorro devices, at a time even the bootmem allocator is not yet
available.  This is now handled using a much smaller array (typically less
than 0.5 KiB), which is __initdata and thus freed later.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/amiga/config.c   |  6 +++---
 arch/m68k/amiga/platform.c |  4 ++--
 drivers/zorro/zorro.c      | 19 ++++++++++++++++---
 include/linux/zorro.h      | 18 +++++++++++++++++-
 4 files changed, 38 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c
index 93ab423758da..d956ddbfebe1 100644
--- a/arch/m68k/amiga/config.c
+++ b/arch/m68k/amiga/config.c
@@ -174,12 +174,12 @@ int __init amiga_parse_bootinfo(const struct bi_record *record)
 #ifdef CONFIG_ZORRO
 		if (zorro_num_autocon < ZORRO_NUM_AUTO) {
 			const struct ConfigDev *cd = (struct ConfigDev *)data;
-			struct zorro_dev *dev = &zorro_autocon[zorro_num_autocon++];
+			struct zorro_dev_init *dev = &zorro_autocon_init[zorro_num_autocon++];
 			dev->rom = cd->cd_Rom;
 			dev->slotaddr = cd->cd_SlotAddr;
 			dev->slotsize = cd->cd_SlotSize;
-			dev->resource.start = (unsigned long)cd->cd_BoardAddr;
-			dev->resource.end = dev->resource.start + cd->cd_BoardSize - 1;
+			dev->boardaddr = (u32)cd->cd_BoardAddr;
+			dev->boardsize = cd->cd_BoardSize;
 		} else
 			printk("amiga_parse_bootinfo: too many AutoConfig devices\n");
 #endif /* CONFIG_ZORRO */
diff --git a/arch/m68k/amiga/platform.c b/arch/m68k/amiga/platform.c
index dacd9f911f71..7847b2b1b5b6 100644
--- a/arch/m68k/amiga/platform.c
+++ b/arch/m68k/amiga/platform.c
@@ -67,8 +67,8 @@ static int __init z_dev_present(zorro_id id)
 	unsigned int i;
 
 	for (i = 0; i < zorro_num_autocon; i++)
-		if (zorro_autocon[i].rom.er_Manufacturer == ZORRO_MANUF(id) &&
-		    zorro_autocon[i].rom.er_Product == ZORRO_PROD(id))
+		if (zorro_autocon_init[i].rom.er_Manufacturer == ZORRO_MANUF(id) &&
+		    zorro_autocon_init[i].rom.er_Product == ZORRO_PROD(id))
 			return 1;
 
 	return 0;
diff --git a/drivers/zorro/zorro.c b/drivers/zorro/zorro.c
index 858c9714b2f3..10c7f77aec08 100644
--- a/drivers/zorro/zorro.c
+++ b/drivers/zorro/zorro.c
@@ -29,7 +29,8 @@
      */
 
 unsigned int zorro_num_autocon;
-struct zorro_dev zorro_autocon[ZORRO_NUM_AUTO];
+struct zorro_dev_init zorro_autocon_init[ZORRO_NUM_AUTO] __initdata;
+struct zorro_dev *zorro_autocon;
 
 
     /*
@@ -38,6 +39,7 @@ struct zorro_dev zorro_autocon[ZORRO_NUM_AUTO];
 
 struct zorro_bus {
 	struct device dev;
+	struct zorro_dev devices[0];
 };
 
 
@@ -125,16 +127,20 @@ static struct resource __init *zorro_find_parent_resource(
 static int __init amiga_zorro_probe(struct platform_device *pdev)
 {
 	struct zorro_bus *bus;
+	struct zorro_dev_init *zi;
 	struct zorro_dev *z;
 	struct resource *r;
 	unsigned int i;
 	int error;
 
 	/* Initialize the Zorro bus */
-	bus = kzalloc(sizeof(*bus), GFP_KERNEL);
+	bus = kzalloc(sizeof(*bus) +
+		      zorro_num_autocon * sizeof(bus->devices[0]),
+		      GFP_KERNEL);
 	if (!bus)
 		return -ENOMEM;
 
+	zorro_autocon = bus->devices;
 	bus->dev.parent = &pdev->dev;
 	dev_set_name(&bus->dev, "zorro");
 	error = device_register(&bus->dev);
@@ -151,15 +157,22 @@ static int __init amiga_zorro_probe(struct platform_device *pdev)
 
 	/* First identify all devices ... */
 	for (i = 0; i < zorro_num_autocon; i++) {
+		zi = &zorro_autocon_init[i];
 		z = &zorro_autocon[i];
+
+		z->rom = zi->rom;
 		z->id = (z->rom.er_Manufacturer<<16) | (z->rom.er_Product<<8);
 		if (z->id == ZORRO_PROD_GVP_EPC_BASE) {
 			/* GVP quirk */
-			unsigned long magic = zorro_resource_start(z)+0x8000;
+			unsigned long magic = zi->boardaddr + 0x8000;
 			z->id |= *(u16 *)ZTWO_VADDR(magic) & GVP_PRODMASK;
 		}
+		z->slotaddr = zi->slotaddr;
+		z->slotsize = zi->slotsize;
 		sprintf(z->name, "Zorro device %08x", z->id);
 		zorro_name_device(z);
+		z->resource.start = zi->boardaddr;
+		z->resource.end = zi->boardaddr + zi->boardsize - 1;
 		z->resource.name = z->name;
 		r = zorro_find_parent_resource(pdev, z);
 		error = request_resource(r, &z->resource);
diff --git a/include/linux/zorro.h b/include/linux/zorro.h
index dff42025649b..661cbd2a86ee 100644
--- a/include/linux/zorro.h
+++ b/include/linux/zorro.h
@@ -175,7 +175,23 @@ static inline struct zorro_driver *zorro_dev_driver(const struct zorro_dev *z)
 
 
 extern unsigned int zorro_num_autocon;	/* # of autoconfig devices found */
-extern struct zorro_dev zorro_autocon[ZORRO_NUM_AUTO];
+extern struct zorro_dev *zorro_autocon;
+
+
+    /*
+     * Minimal information about a Zorro device, passed from bootinfo
+     * Only available temporarily, i.e. until initmem has been freed!
+     */
+
+struct zorro_dev_init {
+	struct ExpansionRom rom;
+	u16 slotaddr;
+	u16 slotsize;
+	u32 boardaddr;
+	u32 boardsize;
+};
+
+extern struct zorro_dev_init zorro_autocon_init[ZORRO_NUM_AUTO] __initdata;
 
 
     /*
-- 
cgit v1.2.3


From 6112ea0862facaeaeab504ee01c0d04bcd22daaf Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Sun, 9 Jan 2011 11:03:43 +0100
Subject: zorro: ZTWO_VADDR() should return "void __iomem *"

ZTWO_VADDR() converts from physical to virtual I/O addresses, so it should
return "void __iomem *" instead of "unsigned long".

This allows to drop several casts, but requires adding a few casts to
accomodate legacy driver frameworks that store "unsigned long" I/O
addresses.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/amiga/chipram.c             | 2 +-
 arch/m68k/amiga/config.c              | 2 +-
 arch/m68k/include/asm/amigahw.h       | 2 +-
 drivers/block/z2ram.c                 | 4 ++--
 drivers/ide/buddha.c                  | 2 +-
 drivers/net/ethernet/8390/hydra.c     | 2 +-
 drivers/net/ethernet/8390/zorro8390.c | 4 ++--
 drivers/net/ethernet/amd/a2065.c      | 4 ++--
 drivers/net/ethernet/amd/ariadne.c    | 4 ++--
 drivers/parport/parport_mfc3.c        | 2 +-
 drivers/scsi/a2091.c                  | 2 +-
 drivers/scsi/a3000.c                  | 2 +-
 drivers/scsi/a4000t.c                 | 2 +-
 drivers/scsi/gvp11.c                  | 2 +-
 drivers/scsi/zorro7xx.c               | 2 +-
 drivers/video/amifb.c                 | 2 +-
 drivers/video/cirrusfb.c              | 4 ++--
 17 files changed, 22 insertions(+), 22 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/amiga/chipram.c b/arch/m68k/amiga/chipram.c
index 99449fbf9a72..ba03cec3f711 100644
--- a/arch/m68k/amiga/chipram.c
+++ b/arch/m68k/amiga/chipram.c
@@ -87,7 +87,7 @@ void *amiga_chip_alloc_res(unsigned long size, struct resource *res)
 
 	atomic_sub(size, &chipavail);
 	pr_debug("amiga_chip_alloc_res: returning %pR\n", res);
-	return (void *)ZTWO_VADDR(res->start);
+	return ZTWO_VADDR(res->start);
 }
 
 void amiga_chip_free(void *ptr)
diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c
index d956ddbfebe1..acd9c1640cfc 100644
--- a/arch/m68k/amiga/config.c
+++ b/arch/m68k/amiga/config.c
@@ -618,7 +618,7 @@ static int __init amiga_savekmsg_setup(char *arg)
 
 	/* Just steal the block, the chipram allocator isn't functional yet */
 	amiga_chip_size -= SAVEKMSG_MAXMEM;
-	savekmsg = (void *)ZTWO_VADDR(CHIP_PHYSADDR + amiga_chip_size);
+	savekmsg = ZTWO_VADDR(CHIP_PHYSADDR + amiga_chip_size);
 	savekmsg->magic1 = SAVEKMSG_MAGIC1;
 	savekmsg->magic2 = SAVEKMSG_MAGIC2;
 	savekmsg->magicptr = ZTWO_PADDR(savekmsg);
diff --git a/arch/m68k/include/asm/amigahw.h b/arch/m68k/include/asm/amigahw.h
index 7a19b5686a4a..3bef0b2ad945 100644
--- a/arch/m68k/include/asm/amigahw.h
+++ b/arch/m68k/include/asm/amigahw.h
@@ -266,7 +266,7 @@ struct CIA {
 
 #define zTwoBase (0x80000000)
 #define ZTWO_PADDR(x) (((unsigned long)(x))-zTwoBase)
-#define ZTWO_VADDR(x) (((unsigned long)(x))+zTwoBase)
+#define ZTWO_VADDR(x) ((void __iomem *)(((unsigned long)(x))+zTwoBase))
 
 #define CUSTOM_PHYSADDR     (0xdff000)
 #define amiga_custom ((*(volatile struct CUSTOM *)(zTwoBase+CUSTOM_PHYSADDR)))
diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c
index 5a95baf4b104..8b2a60cee3a0 100644
--- a/drivers/block/z2ram.c
+++ b/drivers/block/z2ram.c
@@ -116,8 +116,8 @@ get_z2ram( void )
 	if ( test_bit( i, zorro_unused_z2ram ) )
 	{
 	    z2_count++;
-	    z2ram_map[ z2ram_size++ ] = 
-		ZTWO_VADDR( Z2RAM_START ) + ( i << Z2RAM_CHUNKSHIFT );
+	    z2ram_map[z2ram_size++] = (unsigned long)ZTWO_VADDR(Z2RAM_START) +
+				      (i << Z2RAM_CHUNKSHIFT);
 	    clear_bit( i, zorro_unused_z2ram );
 	}
     }
diff --git a/drivers/ide/buddha.c b/drivers/ide/buddha.c
index b1d38590ac01..46eaf58d881b 100644
--- a/drivers/ide/buddha.c
+++ b/drivers/ide/buddha.c
@@ -198,7 +198,7 @@ fail_base2:
 				continue;
 			}
 		}	  
-		buddha_board = ZTWO_VADDR(board);
+		buddha_board = (unsigned long)ZTWO_VADDR(board);
 		
 		/* write to BUDDHA_IRQ_MR to enable the board IRQ */
 		/* X-Surf doesn't have this.  IRQs are always on */
diff --git a/drivers/net/ethernet/8390/hydra.c b/drivers/net/ethernet/8390/hydra.c
index fb3dd4399cf3..f615fdec0f1b 100644
--- a/drivers/net/ethernet/8390/hydra.c
+++ b/drivers/net/ethernet/8390/hydra.c
@@ -113,7 +113,7 @@ static const struct net_device_ops hydra_netdev_ops = {
 static int hydra_init(struct zorro_dev *z)
 {
     struct net_device *dev;
-    unsigned long board = ZTWO_VADDR(z->resource.start);
+    unsigned long board = (unsigned long)ZTWO_VADDR(z->resource.start);
     unsigned long ioaddr = board+HYDRA_NIC_BASE;
     const char name[] = "NE2000";
     int start_page, stop_page;
diff --git a/drivers/net/ethernet/8390/zorro8390.c b/drivers/net/ethernet/8390/zorro8390.c
index 85ec4c2d2645..ae2a12b7db62 100644
--- a/drivers/net/ethernet/8390/zorro8390.c
+++ b/drivers/net/ethernet/8390/zorro8390.c
@@ -287,7 +287,7 @@ static const struct net_device_ops zorro8390_netdev_ops = {
 };
 
 static int zorro8390_init(struct net_device *dev, unsigned long board,
-			  const char *name, unsigned long ioaddr)
+			  const char *name, void __iomem *ioaddr)
 {
 	int i;
 	int err;
@@ -354,7 +354,7 @@ static int zorro8390_init(struct net_device *dev, unsigned long board,
 	start_page = NESM_START_PG;
 	stop_page = NESM_STOP_PG;
 
-	dev->base_addr = ioaddr;
+	dev->base_addr = (unsigned long)ioaddr;
 	dev->irq = IRQ_AMIGA_PORTS;
 
 	/* Install the Interrupt handler */
diff --git a/drivers/net/ethernet/amd/a2065.c b/drivers/net/ethernet/amd/a2065.c
index 0866e7627433..f492a19328e5 100644
--- a/drivers/net/ethernet/amd/a2065.c
+++ b/drivers/net/ethernet/amd/a2065.c
@@ -713,8 +713,8 @@ static int a2065_init_one(struct zorro_dev *z,
 	dev->dev_addr[3] = (z->rom.er_SerialNumber >> 16) & 0xff;
 	dev->dev_addr[4] = (z->rom.er_SerialNumber >> 8) & 0xff;
 	dev->dev_addr[5] = z->rom.er_SerialNumber & 0xff;
-	dev->base_addr = ZTWO_VADDR(base_addr);
-	dev->mem_start = ZTWO_VADDR(mem_start);
+	dev->base_addr = (unsigned long)ZTWO_VADDR(base_addr);
+	dev->mem_start = (unsigned long)ZTWO_VADDR(mem_start);
 	dev->mem_end = dev->mem_start + A2065_RAM_SIZE;
 
 	priv->ll = (volatile struct lance_regs *)dev->base_addr;
diff --git a/drivers/net/ethernet/amd/ariadne.c b/drivers/net/ethernet/amd/ariadne.c
index c178eb4c8166..33822cb69366 100644
--- a/drivers/net/ethernet/amd/ariadne.c
+++ b/drivers/net/ethernet/amd/ariadne.c
@@ -747,8 +747,8 @@ static int ariadne_init_one(struct zorro_dev *z,
 	dev->dev_addr[3] = (z->rom.er_SerialNumber >> 16) & 0xff;
 	dev->dev_addr[4] = (z->rom.er_SerialNumber >> 8) & 0xff;
 	dev->dev_addr[5] = z->rom.er_SerialNumber & 0xff;
-	dev->base_addr = ZTWO_VADDR(base_addr);
-	dev->mem_start = ZTWO_VADDR(mem_start);
+	dev->base_addr = (unsigned long)ZTWO_VADDR(base_addr);
+	dev->mem_start = (unsigned long)ZTWO_VADDR(mem_start);
 	dev->mem_end = dev->mem_start + ARIADNE_RAM_SIZE;
 
 	dev->netdev_ops = &ariadne_netdev_ops;
diff --git a/drivers/parport/parport_mfc3.c b/drivers/parport/parport_mfc3.c
index 7578d79b3688..2f650f68af14 100644
--- a/drivers/parport/parport_mfc3.c
+++ b/drivers/parport/parport_mfc3.c
@@ -300,7 +300,7 @@ static int __init parport_mfc3_init(void)
 		if (!request_mem_region(piabase, sizeof(struct pia), "PIA"))
 			continue;
 
-		pp = (struct pia *)ZTWO_VADDR(piabase);
+		pp = ZTWO_VADDR(piabase);
 		pp->crb = 0;
 		pp->pddrb = 255; /* all data pins output */
 		pp->crb = PIA_DDR|32|8;
diff --git a/drivers/scsi/a2091.c b/drivers/scsi/a2091.c
index 30fa38a0ad39..9176bfbd5745 100644
--- a/drivers/scsi/a2091.c
+++ b/drivers/scsi/a2091.c
@@ -201,7 +201,7 @@ static int a2091_probe(struct zorro_dev *z, const struct zorro_device_id *ent)
 	instance->irq = IRQ_AMIGA_PORTS;
 	instance->unique_id = z->slotaddr;
 
-	regs = (struct a2091_scsiregs *)ZTWO_VADDR(z->resource.start);
+	regs = ZTWO_VADDR(z->resource.start);
 	regs->DAWR = DAWR_A2091;
 
 	wdregs.SASR = &regs->SASR;
diff --git a/drivers/scsi/a3000.c b/drivers/scsi/a3000.c
index c0f4f4290dd6..dd5b64726ddc 100644
--- a/drivers/scsi/a3000.c
+++ b/drivers/scsi/a3000.c
@@ -220,7 +220,7 @@ static int __init amiga_a3000_scsi_probe(struct platform_device *pdev)
 
 	instance->irq = IRQ_AMIGA_PORTS;
 
-	regs = (struct a3000_scsiregs *)ZTWO_VADDR(res->start);
+	regs = ZTWO_VADDR(res->start);
 	regs->DAWR = DAWR_A3000;
 
 	wdregs.SASR = &regs->SASR;
diff --git a/drivers/scsi/a4000t.c b/drivers/scsi/a4000t.c
index 70c521f79f7c..f5a2ab41543b 100644
--- a/drivers/scsi/a4000t.c
+++ b/drivers/scsi/a4000t.c
@@ -56,7 +56,7 @@ static int __init amiga_a4000t_scsi_probe(struct platform_device *pdev)
 	scsi_addr = res->start + A4000T_SCSI_OFFSET;
 
 	/* Fill in the required pieces of hostdata */
-	hostdata->base = (void __iomem *)ZTWO_VADDR(scsi_addr);
+	hostdata->base = ZTWO_VADDR(scsi_addr);
 	hostdata->clock = 50;
 	hostdata->chip710 = 1;
 	hostdata->dmode_extra = DMODE_FC2;
diff --git a/drivers/scsi/gvp11.c b/drivers/scsi/gvp11.c
index 2203ac281103..3b6f83ffddc4 100644
--- a/drivers/scsi/gvp11.c
+++ b/drivers/scsi/gvp11.c
@@ -310,7 +310,7 @@ static int gvp11_probe(struct zorro_dev *z, const struct zorro_device_id *ent)
 	if (!request_mem_region(address, 256, "wd33c93"))
 		return -EBUSY;
 
-	regs = (struct gvp11_scsiregs *)(ZTWO_VADDR(address));
+	regs = ZTWO_VADDR(address);
 
 	error = check_wd33c93(regs);
 	if (error)
diff --git a/drivers/scsi/zorro7xx.c b/drivers/scsi/zorro7xx.c
index cbf3476c68cd..aff31991aea9 100644
--- a/drivers/scsi/zorro7xx.c
+++ b/drivers/scsi/zorro7xx.c
@@ -104,7 +104,7 @@ static int zorro7xx_init_one(struct zorro_dev *z,
 	if (ioaddr > 0x01000000)
 		hostdata->base = ioremap(ioaddr, zorro_resource_len(z));
 	else
-		hostdata->base = (void __iomem *)ZTWO_VADDR(ioaddr);
+		hostdata->base = ZTWO_VADDR(ioaddr);
 
 	hostdata->clock = 50;
 	hostdata->chip710 = 1;
diff --git a/drivers/video/amifb.c b/drivers/video/amifb.c
index 0dac36ce09d6..518f790ef88a 100644
--- a/drivers/video/amifb.c
+++ b/drivers/video/amifb.c
@@ -3710,7 +3710,7 @@ default_chipset:
 	if (!videomemory) {
 		dev_warn(&pdev->dev,
 			 "Unable to map videomem cached writethrough\n");
-		info->screen_base = (char *)ZTWO_VADDR(info->fix.smem_start);
+		info->screen_base = ZTWO_VADDR(info->fix.smem_start);
 	} else
 		info->screen_base = (char *)videomemory;
 
diff --git a/drivers/video/cirrusfb.c b/drivers/video/cirrusfb.c
index 5aab9b9dc210..d992aa5eb3f0 100644
--- a/drivers/video/cirrusfb.c
+++ b/drivers/video/cirrusfb.c
@@ -2256,7 +2256,7 @@ static int cirrusfb_zorro_register(struct zorro_dev *z,
 
 	info->fix.mmio_start = regbase;
 	cinfo->regbase = regbase > 16 * MB_ ? ioremap(regbase, 64 * 1024)
-					    : (caddr_t)ZTWO_VADDR(regbase);
+					    : ZTWO_VADDR(regbase);
 	if (!cinfo->regbase) {
 		dev_err(info->device, "Cannot map registers\n");
 		error = -EIO;
@@ -2266,7 +2266,7 @@ static int cirrusfb_zorro_register(struct zorro_dev *z,
 	info->fix.smem_start = rambase;
 	info->screen_size = ramsize;
 	info->screen_base = rambase > 16 * MB_ ? ioremap(rambase, ramsize)
-					       : (caddr_t)ZTWO_VADDR(rambase);
+					       : ZTWO_VADDR(rambase);
 	if (!info->screen_base) {
 		dev_err(info->device, "Cannot map video RAM\n");
 		error = -EIO;
-- 
cgit v1.2.3


From bd9ba8f40ee30edf31cc0845d8838bc43d172ef3 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Fri, 4 Oct 2013 09:38:53 +0200
Subject: zorro/UAPI: Use proper types (endianness/size) in <linux/zorro.h>

Fix member definitions for non-native userspace handling:
  - All multi-byte values are big-endian, hence use __be*,
  - All pointers are 32-bit pointers under AmigaOS, but unused (except for
    cd_BoardAddr) under Linux, hence use __be32.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/amiga/config.c           |  9 ++++----
 arch/m68k/amiga/platform.c         |  9 +++++---
 drivers/net/ethernet/amd/a2065.c   |  9 +++++---
 drivers/net/ethernet/amd/ariadne.c |  9 +++++---
 drivers/zorro/proc.c               | 10 +++++----
 drivers/zorro/zorro-sysfs.c        | 22 +++++++++++++-----
 drivers/zorro/zorro.c              |  4 +++-
 include/uapi/linux/zorro.h         | 46 +++++++++++++++++++-------------------
 8 files changed, 72 insertions(+), 46 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c
index acd9c1640cfc..65b5e937ebba 100644
--- a/arch/m68k/amiga/config.c
+++ b/arch/m68k/amiga/config.c
@@ -28,6 +28,7 @@
 #include <linux/keyboard.h>
 
 #include <asm/bootinfo.h>
+#include <asm/byteorder.h>
 #include <asm/setup.h>
 #include <asm/pgtable.h>
 #include <asm/amigahw.h>
@@ -176,10 +177,10 @@ int __init amiga_parse_bootinfo(const struct bi_record *record)
 			const struct ConfigDev *cd = (struct ConfigDev *)data;
 			struct zorro_dev_init *dev = &zorro_autocon_init[zorro_num_autocon++];
 			dev->rom = cd->cd_Rom;
-			dev->slotaddr = cd->cd_SlotAddr;
-			dev->slotsize = cd->cd_SlotSize;
-			dev->boardaddr = (u32)cd->cd_BoardAddr;
-			dev->boardsize = cd->cd_BoardSize;
+			dev->slotaddr = be16_to_cpu(cd->cd_SlotAddr);
+			dev->slotsize = be16_to_cpu(cd->cd_SlotSize);
+			dev->boardaddr = be32_to_cpu(cd->cd_BoardAddr);
+			dev->boardsize = be32_to_cpu(cd->cd_BoardSize);
 		} else
 			printk("amiga_parse_bootinfo: too many AutoConfig devices\n");
 #endif /* CONFIG_ZORRO */
diff --git a/arch/m68k/amiga/platform.c b/arch/m68k/amiga/platform.c
index 7847b2b1b5b6..d34029d7b058 100644
--- a/arch/m68k/amiga/platform.c
+++ b/arch/m68k/amiga/platform.c
@@ -13,6 +13,7 @@
 
 #include <asm/amigahw.h>
 #include <asm/amigayle.h>
+#include <asm/byteorder.h>
 
 
 #ifdef CONFIG_ZORRO
@@ -66,10 +67,12 @@ static int __init z_dev_present(zorro_id id)
 {
 	unsigned int i;
 
-	for (i = 0; i < zorro_num_autocon; i++)
-		if (zorro_autocon_init[i].rom.er_Manufacturer == ZORRO_MANUF(id) &&
-		    zorro_autocon_init[i].rom.er_Product == ZORRO_PROD(id))
+	for (i = 0; i < zorro_num_autocon; i++) {
+		const struct ExpansionRom *rom = &zorro_autocon_init[i].rom;
+		if (be16_to_cpu(rom->er_Manufacturer) == ZORRO_MANUF(id) &&
+		    rom->er_Product == ZORRO_PROD(id))
 			return 1;
+	}
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/amd/a2065.c b/drivers/net/ethernet/amd/a2065.c
index f492a19328e5..56139184b801 100644
--- a/drivers/net/ethernet/amd/a2065.c
+++ b/drivers/net/ethernet/amd/a2065.c
@@ -57,6 +57,7 @@
 #include <linux/zorro.h>
 #include <linux/bitops.h>
 
+#include <asm/byteorder.h>
 #include <asm/irq.h>
 #include <asm/amigaints.h>
 #include <asm/amigahw.h>
@@ -678,6 +679,7 @@ static int a2065_init_one(struct zorro_dev *z,
 	unsigned long base_addr = board + A2065_LANCE;
 	unsigned long mem_start = board + A2065_RAM;
 	struct resource *r1, *r2;
+	u32 serial;
 	int err;
 
 	r1 = request_mem_region(base_addr, sizeof(struct lance_regs),
@@ -702,6 +704,7 @@ static int a2065_init_one(struct zorro_dev *z,
 	r1->name = dev->name;
 	r2->name = dev->name;
 
+	serial = be32_to_cpu(z->rom.er_SerialNumber);
 	dev->dev_addr[0] = 0x00;
 	if (z->id != ZORRO_PROD_AMERISTAR_A2065) {	/* Commodore */
 		dev->dev_addr[1] = 0x80;
@@ -710,9 +713,9 @@ static int a2065_init_one(struct zorro_dev *z,
 		dev->dev_addr[1] = 0x00;
 		dev->dev_addr[2] = 0x9f;
 	}
-	dev->dev_addr[3] = (z->rom.er_SerialNumber >> 16) & 0xff;
-	dev->dev_addr[4] = (z->rom.er_SerialNumber >> 8) & 0xff;
-	dev->dev_addr[5] = z->rom.er_SerialNumber & 0xff;
+	dev->dev_addr[3] = (serial >> 16) & 0xff;
+	dev->dev_addr[4] = (serial >> 8) & 0xff;
+	dev->dev_addr[5] = serial & 0xff;
 	dev->base_addr = (unsigned long)ZTWO_VADDR(base_addr);
 	dev->mem_start = (unsigned long)ZTWO_VADDR(mem_start);
 	dev->mem_end = dev->mem_start + A2065_RAM_SIZE;
diff --git a/drivers/net/ethernet/amd/ariadne.c b/drivers/net/ethernet/amd/ariadne.c
index 33822cb69366..b08101b31b8b 100644
--- a/drivers/net/ethernet/amd/ariadne.c
+++ b/drivers/net/ethernet/amd/ariadne.c
@@ -51,6 +51,7 @@
 #include <linux/zorro.h>
 #include <linux/bitops.h>
 
+#include <asm/byteorder.h>
 #include <asm/amigaints.h>
 #include <asm/amigahw.h>
 #include <asm/irq.h>
@@ -718,6 +719,7 @@ static int ariadne_init_one(struct zorro_dev *z,
 	struct resource *r1, *r2;
 	struct net_device *dev;
 	struct ariadne_private *priv;
+	u32 serial;
 	int err;
 
 	r1 = request_mem_region(base_addr, sizeof(struct Am79C960), "Am79C960");
@@ -741,12 +743,13 @@ static int ariadne_init_one(struct zorro_dev *z,
 	r1->name = dev->name;
 	r2->name = dev->name;
 
+	serial = be32_to_cpu(z->rom.er_SerialNumber);
 	dev->dev_addr[0] = 0x00;
 	dev->dev_addr[1] = 0x60;
 	dev->dev_addr[2] = 0x30;
-	dev->dev_addr[3] = (z->rom.er_SerialNumber >> 16) & 0xff;
-	dev->dev_addr[4] = (z->rom.er_SerialNumber >> 8) & 0xff;
-	dev->dev_addr[5] = z->rom.er_SerialNumber & 0xff;
+	dev->dev_addr[3] = (serial >> 16) & 0xff;
+	dev->dev_addr[4] = (serial >> 8) & 0xff;
+	dev->dev_addr[5] = serial & 0xff;
 	dev->base_addr = (unsigned long)ZTWO_VADDR(base_addr);
 	dev->mem_start = (unsigned long)ZTWO_VADDR(mem_start);
 	dev->mem_end = dev->mem_start + ARIADNE_RAM_SIZE;
diff --git a/drivers/zorro/proc.c b/drivers/zorro/proc.c
index ea1ce822a8e0..6ac2579da0eb 100644
--- a/drivers/zorro/proc.c
+++ b/drivers/zorro/proc.c
@@ -14,6 +14,8 @@
 #include <linux/seq_file.h>
 #include <linux/init.h>
 #include <linux/export.h>
+
+#include <asm/byteorder.h>
 #include <asm/uaccess.h>
 #include <asm/amigahw.h>
 #include <asm/setup.h>
@@ -41,10 +43,10 @@ proc_bus_zorro_read(struct file *file, char __user *buf, size_t nbytes, loff_t *
 	/* Construct a ConfigDev */
 	memset(&cd, 0, sizeof(cd));
 	cd.cd_Rom = z->rom;
-	cd.cd_SlotAddr = z->slotaddr;
-	cd.cd_SlotSize = z->slotsize;
-	cd.cd_BoardAddr = (void *)zorro_resource_start(z);
-	cd.cd_BoardSize = zorro_resource_len(z);
+	cd.cd_SlotAddr = cpu_to_be16(z->slotaddr);
+	cd.cd_SlotSize = cpu_to_be16(z->slotsize);
+	cd.cd_BoardAddr = cpu_to_be32(zorro_resource_start(z));
+	cd.cd_BoardSize = cpu_to_be32(zorro_resource_len(z));
 
 	if (copy_to_user(buf, (void *)&cd + pos, nbytes))
 		return -EFAULT;
diff --git a/drivers/zorro/zorro-sysfs.c b/drivers/zorro/zorro-sysfs.c
index 26f7184ef9e1..36b210f9b6b2 100644
--- a/drivers/zorro/zorro-sysfs.c
+++ b/drivers/zorro/zorro-sysfs.c
@@ -16,6 +16,8 @@
 #include <linux/stat.h>
 #include <linux/string.h>
 
+#include <asm/byteorder.h>
+
 #include "zorro.h"
 
 
@@ -33,10 +35,20 @@ static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL);
 
 zorro_config_attr(id, id, "0x%08x\n");
 zorro_config_attr(type, rom.er_Type, "0x%02x\n");
-zorro_config_attr(serial, rom.er_SerialNumber, "0x%08x\n");
 zorro_config_attr(slotaddr, slotaddr, "0x%04x\n");
 zorro_config_attr(slotsize, slotsize, "0x%04x\n");
 
+static ssize_t
+show_serial(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct zorro_dev *z;
+
+	z = to_zorro_dev(dev);
+	return sprintf(buf, "0x%08x\n", be32_to_cpu(z->rom.er_SerialNumber));
+}
+
+static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL);
+
 static ssize_t zorro_show_resource(struct device *dev, struct device_attribute *attr, char *buf)
 {
 	struct zorro_dev *z = to_zorro_dev(dev);
@@ -60,10 +72,10 @@ static ssize_t zorro_read_config(struct file *filp, struct kobject *kobj,
 	/* Construct a ConfigDev */
 	memset(&cd, 0, sizeof(cd));
 	cd.cd_Rom = z->rom;
-	cd.cd_SlotAddr = z->slotaddr;
-	cd.cd_SlotSize = z->slotsize;
-	cd.cd_BoardAddr = (void *)zorro_resource_start(z);
-	cd.cd_BoardSize = zorro_resource_len(z);
+	cd.cd_SlotAddr = cpu_to_be16(z->slotaddr);
+	cd.cd_SlotSize = cpu_to_be16(z->slotsize);
+	cd.cd_BoardAddr = cpu_to_be32(zorro_resource_start(z));
+	cd.cd_BoardSize = cpu_to_be32(zorro_resource_len(z));
 
 	return memory_read_from_buffer(buf, count, &off, &cd, sizeof(cd));
 }
diff --git a/drivers/zorro/zorro.c b/drivers/zorro/zorro.c
index 450abf100f06..707c1a5a0317 100644
--- a/drivers/zorro/zorro.c
+++ b/drivers/zorro/zorro.c
@@ -18,6 +18,7 @@
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 
+#include <asm/byteorder.h>
 #include <asm/setup.h>
 #include <asm/amigahw.h>
 
@@ -161,7 +162,8 @@ static int __init amiga_zorro_probe(struct platform_device *pdev)
 		z = &zorro_autocon[i];
 
 		z->rom = zi->rom;
-		z->id = (z->rom.er_Manufacturer<<16) | (z->rom.er_Product<<8);
+		z->id = (be16_to_cpu(z->rom.er_Manufacturer) << 16) |
+			(z->rom.er_Product << 8);
 		if (z->id == ZORRO_PROD_GVP_EPC_BASE) {
 			/* GVP quirk */
 			unsigned long magic = zi->boardaddr + 0x8000;
diff --git a/include/uapi/linux/zorro.h b/include/uapi/linux/zorro.h
index 36a033e23d31..59d021b242ed 100644
--- a/include/uapi/linux/zorro.h
+++ b/include/uapi/linux/zorro.h
@@ -63,26 +63,26 @@ enum GVP_flags {
 
 
 struct Node {
-	struct  Node *ln_Succ;	/* Pointer to next (successor) */
-	struct  Node *ln_Pred;	/* Pointer to previous (predecessor) */
-	__u8    ln_Type;
-	__s8    ln_Pri;		/* Priority, for sorting */
-	__s8    *ln_Name;	/* ID string, null terminated */
+	__be32 ln_Succ;		/* Pointer to next (successor) */
+	__be32 ln_Pred;		/* Pointer to previous (predecessor) */
+	__u8   ln_Type;
+	__s8   ln_Pri;		/* Priority, for sorting */
+	__be32 ln_Name;		/* ID string, null terminated */
 } __packed;
 
 struct ExpansionRom {
 	/* -First 16 bytes of the expansion ROM */
-	__u8  er_Type;		/* Board type, size and flags */
-	__u8  er_Product;	/* Product number, assigned by manufacturer */
-	__u8  er_Flags;		/* Flags */
-	__u8  er_Reserved03;	/* Must be zero ($ff inverted) */
-	__u16 er_Manufacturer;	/* Unique ID, ASSIGNED BY COMMODORE-AMIGA! */
-	__u32 er_SerialNumber;	/* Available for use by manufacturer */
-	__u16 er_InitDiagVec;	/* Offset to optional "DiagArea" structure */
-	__u8  er_Reserved0c;
-	__u8  er_Reserved0d;
-	__u8  er_Reserved0e;
-	__u8  er_Reserved0f;
+	__u8   er_Type;		/* Board type, size and flags */
+	__u8   er_Product;	/* Product number, assigned by manufacturer */
+	__u8   er_Flags;		/* Flags */
+	__u8   er_Reserved03;	/* Must be zero ($ff inverted) */
+	__be16 er_Manufacturer;	/* Unique ID, ASSIGNED BY COMMODORE-AMIGA! */
+	__be32 er_SerialNumber;	/* Available for use by manufacturer */
+	__be16 er_InitDiagVec;	/* Offset to optional "DiagArea" structure */
+	__u8   er_Reserved0c;
+	__u8   er_Reserved0d;
+	__u8   er_Reserved0e;
+	__u8   er_Reserved0f;
 } __packed;
 
 /* er_Type board type bits */
@@ -99,13 +99,13 @@ struct ConfigDev {
 	__u8		cd_Flags;	/* (read/write) */
 	__u8		cd_Pad;		/* reserved */
 	struct ExpansionRom cd_Rom;	/* copy of board's expansion ROM */
-	void		*cd_BoardAddr;	/* where in memory the board was placed */
-	__u32		cd_BoardSize;	/* size of board in bytes */
-	__u16		cd_SlotAddr;	/* which slot number (PRIVATE) */
-	__u16		cd_SlotSize;	/* number of slots (PRIVATE) */
-	void		*cd_Driver;	/* pointer to node of driver */
-	struct ConfigDev *cd_NextCD;	/* linked list of drivers to config */
-	__u32		cd_Unused[4];	/* for whatever the driver wants */
+	__be32		cd_BoardAddr;	/* where in memory the board was placed */
+	__be32		cd_BoardSize;	/* size of board in bytes */
+	__be16		cd_SlotAddr;	/* which slot number (PRIVATE) */
+	__be16		cd_SlotSize;	/* number of slots (PRIVATE) */
+	__be32		cd_Driver;	/* pointer to node of driver */
+	__be32		cd_NextCD;	/* linked list of drivers to config */
+	__be32		cd_Unused[4];	/* for whatever the driver wants */
 } __packed;
 
 #define ZORRO_NUM_AUTO		16
-- 
cgit v1.2.3


From 29a202035753dd0e5810caaefe885ed8934bfd46 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Mon, 9 Sep 2013 09:00:50 +0200
Subject: m68k: The bootinfo is located right after the kernel

Since the introduction of init sections (which are located after BSS), the
bootinfo is no longer located right after the BSS, but after all kernel
sections.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/include/asm/bootinfo.h | 2 +-
 arch/m68k/kernel/head.S          | 2 +-
 arch/m68k/kernel/setup_mm.c      | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/include/asm/bootinfo.h b/arch/m68k/include/asm/bootinfo.h
index 67e7a78ad96b..fbb6150f00f3 100644
--- a/arch/m68k/include/asm/bootinfo.h
+++ b/arch/m68k/include/asm/bootinfo.h
@@ -34,7 +34,7 @@
      *  This way I hope to keep all future changes back/forewards compatible.
      *  Thus, keep your fingers crossed...
      *
-     *  This structure is copied right after the kernel bss by the bootstrap
+     *  This structure is copied right after the kernel by the bootstrap
      *  routine.
      */
 
diff --git a/arch/m68k/kernel/head.S b/arch/m68k/kernel/head.S
index ac85f16534af..fb348caa734e 100644
--- a/arch/m68k/kernel/head.S
+++ b/arch/m68k/kernel/head.S
@@ -1532,7 +1532,7 @@ L(cache_done):
 
 /*
  * Find a tag record in the bootinfo structure
- * The bootinfo structure is located right after the kernel bss
+ * The bootinfo structure is located right after the kernel
  * Returns: d0: size (-1 if not found)
  *          a0: data pointer (end-of-records if not found)
  */
diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c
index 9f16a15fa287..18423a843379 100644
--- a/arch/m68k/kernel/setup_mm.c
+++ b/arch/m68k/kernel/setup_mm.c
@@ -220,7 +220,7 @@ void __init setup_arch(char **cmdline_p)
 	int i;
 #endif
 
-	/* The bootinfo is located right after the kernel bss */
+	/* The bootinfo is located right after the kernel */
 	if (!CPU_IS_COLDFIRE)
 		m68k_parse_bootinfo((const struct bi_record *)_end);
 
-- 
cgit v1.2.3


From 7ca1e52dc816e2c1f6663d9d114f25f0b22f98f1 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Mon, 14 Oct 2013 11:45:07 +0200
Subject: m68k: head.S - Correct date and spelling

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/kernel/head.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/m68k/kernel/head.S b/arch/m68k/kernel/head.S
index fb348caa734e..8d14b0f9a36f 100644
--- a/arch/m68k/kernel/head.S
+++ b/arch/m68k/kernel/head.S
@@ -23,7 +23,7 @@
 ** 98/04/25 Phil Blundell: added HP300 support
 ** 1998/08/30 David Kilzer: Added support for font_desc structures
 **            for linux-2.1.115
-** 9/02/11  Richard Zidlicky: added Q40 support (initial vesion 99/01/01)
+** 1999/02/11  Richard Zidlicky: added Q40 support (initial version 99/01/01)
 ** 2004/05/13 Kars de Jong: Finalised HP300 support
 **
 ** This file is subject to the terms and conditions of the GNU General Public
-- 
cgit v1.2.3


From 958903d6f08c977778bf5e1f18eb7fd27b30ca72 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Tue, 1 Oct 2013 20:42:32 +0200
Subject: m68k: Drop remainings and API of BOOTINFO_COMPAT_1_0

Drop remainings and API for backwards compatibility with bootinfo interface
version 1.0.  This was used when booting a 2.1.x or newer kernel on Amiga,
Atari, or Mac using a bootstrap for kernel 2.0.x.

Everybody upgraded his bootstrap a long time ago, so this can go.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/include/asm/bootinfo.h | 112 ---------------------------------------
 arch/m68k/mac/config.c           |   1 -
 arch/m68k/mac/misc.c             |   1 -
 3 files changed, 114 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/include/asm/bootinfo.h b/arch/m68k/include/asm/bootinfo.h
index fbb6150f00f3..44f97beb5215 100644
--- a/arch/m68k/include/asm/bootinfo.h
+++ b/arch/m68k/include/asm/bootinfo.h
@@ -263,116 +263,4 @@ struct bootversion {
 #define Q40_BOOTI_VERSION      MK_BI_VERSION( 2, 0 )
 #define HP300_BOOTI_VERSION    MK_BI_VERSION( 2, 0 )
 
-#ifdef BOOTINFO_COMPAT_1_0
-
-    /*
-     *  Backwards compatibility with bootinfo interface version 1.0
-     */
-
-#define COMPAT_AMIGA_BOOTI_VERSION    MK_BI_VERSION( 1, 0 )
-#define COMPAT_ATARI_BOOTI_VERSION    MK_BI_VERSION( 1, 0 )
-#define COMPAT_MAC_BOOTI_VERSION      MK_BI_VERSION( 1, 0 )
-
-#include <linux/zorro.h>
-
-#define COMPAT_NUM_AUTO    16
-
-struct compat_bi_Amiga {
-    int model;
-    int num_autocon;
-    struct ConfigDev autocon[COMPAT_NUM_AUTO];
-    unsigned long chip_size;
-    unsigned char vblank;
-    unsigned char psfreq;
-    unsigned long eclock;
-    unsigned long chipset;
-    unsigned long hw_present;
-};
-
-struct compat_bi_Atari {
-    unsigned long hw_present;
-    unsigned long mch_cookie;
-};
-
-#ifndef __ASSEMBLY__
-
-struct compat_bi_Macintosh
-{
-	unsigned long videoaddr;
-	unsigned long videorow;
-	unsigned long videodepth;
-	unsigned long dimensions;
-	unsigned long args;
-	unsigned long boottime;
-	unsigned long gmtbias;
-	unsigned long bootver;
-	unsigned long videological;
-	unsigned long sccbase;
-	unsigned long id;
-	unsigned long memsize;
-	unsigned long serialmf;
-	unsigned long serialhsk;
-	unsigned long serialgpi;
-	unsigned long printmf;
-	unsigned long printhsk;
-	unsigned long printgpi;
-	unsigned long cpuid;
-	unsigned long rombase;
-	unsigned long adbdelay;
-	unsigned long timedbra;
-};
-
-#endif
-
-struct compat_mem_info {
-    unsigned long addr;
-    unsigned long size;
-};
-
-#define COMPAT_NUM_MEMINFO  4
-
-#define COMPAT_CPUB_68020 0
-#define COMPAT_CPUB_68030 1
-#define COMPAT_CPUB_68040 2
-#define COMPAT_CPUB_68060 3
-#define COMPAT_FPUB_68881 5
-#define COMPAT_FPUB_68882 6
-#define COMPAT_FPUB_68040 7
-#define COMPAT_FPUB_68060 8
-
-#define COMPAT_CPU_68020    (1<<COMPAT_CPUB_68020)
-#define COMPAT_CPU_68030    (1<<COMPAT_CPUB_68030)
-#define COMPAT_CPU_68040    (1<<COMPAT_CPUB_68040)
-#define COMPAT_CPU_68060    (1<<COMPAT_CPUB_68060)
-#define COMPAT_CPU_MASK     (31)
-#define COMPAT_FPU_68881    (1<<COMPAT_FPUB_68881)
-#define COMPAT_FPU_68882    (1<<COMPAT_FPUB_68882)
-#define COMPAT_FPU_68040    (1<<COMPAT_FPUB_68040)
-#define COMPAT_FPU_68060    (1<<COMPAT_FPUB_68060)
-#define COMPAT_FPU_MASK     (0xfe0)
-
-#define COMPAT_CL_SIZE      (256)
-
-struct compat_bootinfo {
-    unsigned long machtype;
-    unsigned long cputype;
-    struct compat_mem_info memory[COMPAT_NUM_MEMINFO];
-    int num_memory;
-    unsigned long ramdisk_size;
-    unsigned long ramdisk_addr;
-    char command_line[COMPAT_CL_SIZE];
-    union {
-	struct compat_bi_Amiga     bi_ami;
-	struct compat_bi_Atari     bi_ata;
-	struct compat_bi_Macintosh bi_mac;
-    } bi_un;
-};
-
-#define bi_amiga	bi_un.bi_ami
-#define bi_atari	bi_un.bi_ata
-#define bi_mac		bi_un.bi_mac
-
-#endif /* BOOTINFO_COMPAT_1_0 */
-
-
 #endif /* _M68K_BOOTINFO_H */
diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c
index afb95d5fb26b..677244e0371c 100644
--- a/arch/m68k/mac/config.c
+++ b/arch/m68k/mac/config.c
@@ -26,7 +26,6 @@
 #include <linux/adb.h>
 #include <linux/cuda.h>
 
-#define BOOTINFO_COMPAT_1_0
 #include <asm/setup.h>
 #include <asm/bootinfo.h>
 
diff --git a/arch/m68k/mac/misc.c b/arch/m68k/mac/misc.c
index 5e085554ac7f..3d02a3c3a93a 100644
--- a/arch/m68k/mac/misc.c
+++ b/arch/m68k/mac/misc.c
@@ -25,7 +25,6 @@
 #include <asm/mac_via.h>
 #include <asm/mac_oss.h>
 
-#define BOOTINFO_COMPAT_1_0
 #include <asm/bootinfo.h>
 #include <asm/machdep.h>
 
-- 
cgit v1.2.3


From bdd47c9fc1d96d3319e938931c83b863b5331b57 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Wed, 2 Oct 2013 10:01:43 +0200
Subject: m68k/mac: Move struct mac_booter_data to <asm/macintosh.h>

struct mac_booter_data is no longer part of the bootinfo API, hence move it
from <asm/bootinfo.h> to <asm/macintosh.h>, dropping all unused fields in
the process.

Also remove the no longer used mac_booter_data pointer from head.S.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/include/asm/bootinfo.h  | 39 ---------------------------------------
 arch/m68k/include/asm/macintosh.h | 23 +++++++++++++++++++++++
 arch/m68k/kernel/head.S           |  2 --
 3 files changed, 23 insertions(+), 41 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/include/asm/bootinfo.h b/arch/m68k/include/asm/bootinfo.h
index 44f97beb5215..908c42f91e6a 100644
--- a/arch/m68k/include/asm/bootinfo.h
+++ b/arch/m68k/include/asm/bootinfo.h
@@ -167,45 +167,6 @@ struct bi_record {
 #define BI_MAC_IOP_SWIM		0x8020	/* Mac SWIM floppy IOP */
 #define BI_MAC_IOP_ADB		0x8021	/* Mac ADB IOP */
 
-    /*
-     * Mac: compatibility with old booter data format (temporarily)
-     * Fields unused with the new bootinfo can be deleted now; instead of
-     * adding new fields the struct might be splitted into a hardware address
-     * part and a hardware type part
-     */
-
-#ifndef __ASSEMBLY__
-
-struct mac_booter_data
-{
-	unsigned long videoaddr;
-	unsigned long videorow;
-	unsigned long videodepth;
-	unsigned long dimensions;
-	unsigned long args;
-	unsigned long boottime;
-	unsigned long gmtbias;
-	unsigned long bootver;
-	unsigned long videological;
-	unsigned long sccbase;
-	unsigned long id;
-	unsigned long memsize;
-	unsigned long serialmf;
-	unsigned long serialhsk;
-	unsigned long serialgpi;
-	unsigned long printmf;
-	unsigned long printhsk;
-	unsigned long printgpi;
-	unsigned long cpuid;
-	unsigned long rombase;
-	unsigned long adbdelay;
-	unsigned long timedbra;
-};
-
-extern struct mac_booter_data
-	mac_bi_data;
-
-#endif
 
     /*
      *  Apollo-specific tags
diff --git a/arch/m68k/include/asm/macintosh.h b/arch/m68k/include/asm/macintosh.h
index 682a1a2ff55f..6a633eb64331 100644
--- a/arch/m68k/include/asm/macintosh.h
+++ b/arch/m68k/include/asm/macintosh.h
@@ -135,4 +135,27 @@ struct mac_model
 
 extern struct mac_model *macintosh_config;
 
+
+    /*
+     * Internal representation of the Mac hardware, filled in from bootinfo
+     */
+
+struct mac_booter_data
+{
+	unsigned long videoaddr;
+	unsigned long videorow;
+	unsigned long videodepth;
+	unsigned long dimensions;
+	unsigned long boottime;
+	unsigned long gmtbias;
+	unsigned long videological;
+	unsigned long sccbase;
+	unsigned long id;
+	unsigned long memsize;
+	unsigned long cpuid;
+	unsigned long rombase;
+};
+
+extern struct mac_booter_data mac_bi_data;
+
 #endif
diff --git a/arch/m68k/kernel/head.S b/arch/m68k/kernel/head.S
index 8d14b0f9a36f..28f817481f3b 100644
--- a/arch/m68k/kernel/head.S
+++ b/arch/m68k/kernel/head.S
@@ -3896,8 +3896,6 @@ BVME_SCC_DATA_A	= 0xffb0000f
 #endif
 
 #if defined(CONFIG_MAC)
-L(mac_booter_data):
-	.long	0
 L(mac_videobase):
 	.long	0
 L(mac_videodepth):
-- 
cgit v1.2.3


From 4edf07fd8f6520bea292384a3638088ccbfdcaa1 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Wed, 2 Oct 2013 22:28:08 +0200
Subject: m68k/vme: Remove unused mvme_bdid_ptr

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/include/asm/mvme16xhw.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/include/asm/mvme16xhw.h b/arch/m68k/include/asm/mvme16xhw.h
index 6117f56653d2..87fa84ad252e 100644
--- a/arch/m68k/include/asm/mvme16xhw.h
+++ b/arch/m68k/include/asm/mvme16xhw.h
@@ -7,8 +7,6 @@
 
 /* Note, bytes 12 and 13 are board no in BCD (0162,0166,0167,0177,etc) */
 
-extern long mvme_bdid_ptr;
-
 typedef struct {
 	char	bdid[4];
 	u_char	rev, mth, day, yr;
-- 
cgit v1.2.3


From 4c3c522bcebe16a717d7a809fd14b11823794027 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Wed, 2 Oct 2013 11:37:33 +0200
Subject: m68k/UAPI: Disintegrate arch/m68k/include/asm/bootinfo.h

Export the bootinfo definitions that are used by bootstrap loaders, and
split them up in generic and platform-specific parts.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/amiga/config.c                     |   1 +
 arch/m68k/apollo/config.c                    |   1 +
 arch/m68k/atari/config.c                     |   1 +
 arch/m68k/bvme6000/config.c                  |   1 +
 arch/m68k/hp300/config.c                     |   1 +
 arch/m68k/include/asm/atarihw.h              |   2 +-
 arch/m68k/include/asm/bootinfo.h             | 213 +--------------------------
 arch/m68k/include/uapi/asm/Kbuild            |   8 +
 arch/m68k/include/uapi/asm/bootinfo-amiga.h  |  31 ++++
 arch/m68k/include/uapi/asm/bootinfo-apollo.h |  16 ++
 arch/m68k/include/uapi/asm/bootinfo-atari.h  |  44 ++++++
 arch/m68k/include/uapi/asm/bootinfo-hp300.h  |  25 ++++
 arch/m68k/include/uapi/asm/bootinfo-mac.h    |  59 ++++++++
 arch/m68k/include/uapi/asm/bootinfo-q40.h    |  16 ++
 arch/m68k/include/uapi/asm/bootinfo-vme.h    |  46 ++++++
 arch/m68k/include/uapi/asm/bootinfo.h        |  95 ++++++++++++
 arch/m68k/kernel/head.S                      |   6 +
 arch/m68k/mac/config.c                       |   1 +
 arch/m68k/mvme147/config.c                   |   1 +
 arch/m68k/mvme16x/config.c                   |   1 +
 20 files changed, 356 insertions(+), 213 deletions(-)
 create mode 100644 arch/m68k/include/uapi/asm/bootinfo-amiga.h
 create mode 100644 arch/m68k/include/uapi/asm/bootinfo-apollo.h
 create mode 100644 arch/m68k/include/uapi/asm/bootinfo-atari.h
 create mode 100644 arch/m68k/include/uapi/asm/bootinfo-hp300.h
 create mode 100644 arch/m68k/include/uapi/asm/bootinfo-mac.h
 create mode 100644 arch/m68k/include/uapi/asm/bootinfo-q40.h
 create mode 100644 arch/m68k/include/uapi/asm/bootinfo-vme.h
 create mode 100644 arch/m68k/include/uapi/asm/bootinfo.h

(limited to 'arch')

diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c
index 65b5e937ebba..0c92c1baf9bf 100644
--- a/arch/m68k/amiga/config.c
+++ b/arch/m68k/amiga/config.c
@@ -28,6 +28,7 @@
 #include <linux/keyboard.h>
 
 #include <asm/bootinfo.h>
+#include <asm/bootinfo-amiga.h>
 #include <asm/byteorder.h>
 #include <asm/setup.h>
 #include <asm/pgtable.h>
diff --git a/arch/m68k/apollo/config.c b/arch/m68k/apollo/config.c
index 2bdde9685a29..c90c19e904bc 100644
--- a/arch/m68k/apollo/config.c
+++ b/arch/m68k/apollo/config.c
@@ -10,6 +10,7 @@
 
 #include <asm/setup.h>
 #include <asm/bootinfo.h>
+#include <asm/bootinfo-apollo.h>
 #include <asm/pgtable.h>
 #include <asm/apollohw.h>
 #include <asm/irq.h>
diff --git a/arch/m68k/atari/config.c b/arch/m68k/atari/config.c
index fb2d0bd9b3ad..9159195505d5 100644
--- a/arch/m68k/atari/config.c
+++ b/arch/m68k/atari/config.c
@@ -37,6 +37,7 @@
 #include <linux/module.h>
 
 #include <asm/bootinfo.h>
+#include <asm/bootinfo-atari.h>
 #include <asm/setup.h>
 #include <asm/atarihw.h>
 #include <asm/atariints.h>
diff --git a/arch/m68k/bvme6000/config.c b/arch/m68k/bvme6000/config.c
index 1b9e31e0130f..4d1b403822fa 100644
--- a/arch/m68k/bvme6000/config.c
+++ b/arch/m68k/bvme6000/config.c
@@ -28,6 +28,7 @@
 #include <linux/bcd.h>
 
 #include <asm/bootinfo.h>
+#include <asm/bootinfo-vme.h>
 #include <asm/pgtable.h>
 #include <asm/setup.h>
 #include <asm/irq.h>
diff --git a/arch/m68k/hp300/config.c b/arch/m68k/hp300/config.c
index b7609f791522..892d0794fc0b 100644
--- a/arch/m68k/hp300/config.c
+++ b/arch/m68k/hp300/config.c
@@ -14,6 +14,7 @@
 #include <linux/console.h>
 
 #include <asm/bootinfo.h>
+#include <asm/bootinfo-hp300.h>
 #include <asm/machdep.h>
 #include <asm/blinken.h>
 #include <asm/io.h>                               /* readb() and writeb() */
diff --git a/arch/m68k/include/asm/atarihw.h b/arch/m68k/include/asm/atarihw.h
index d887050e6da6..972c8f33f055 100644
--- a/arch/m68k/include/asm/atarihw.h
+++ b/arch/m68k/include/asm/atarihw.h
@@ -21,7 +21,7 @@
 #define _LINUX_ATARIHW_H_
 
 #include <linux/types.h>
-#include <asm/bootinfo.h>
+#include <asm/bootinfo-atari.h>
 #include <asm/raw_io.h>
 
 extern u_long atari_mch_cookie;
diff --git a/arch/m68k/include/asm/bootinfo.h b/arch/m68k/include/asm/bootinfo.h
index 908c42f91e6a..9edc31893fb8 100644
--- a/arch/m68k/include/asm/bootinfo.h
+++ b/arch/m68k/include/asm/bootinfo.h
@@ -6,222 +6,11 @@
 ** This file is subject to the terms and conditions of the GNU General Public
 ** License.  See the file COPYING in the main directory of this archive
 ** for more details.
-**
-** Created 09/29/92 by Greg Harp
-**
-** 5/2/94 Roman Hodek:
-**   Added bi_atari part of the machine dependent union bi_un; for now it
-**   contains just a model field to distinguish between TT and Falcon.
-** 26/7/96 Roman Zippel:
-**   Renamed to setup.h; added some useful macros to allow gcc some
-**   optimizations if possible.
-** 5/10/96 Geert Uytterhoeven:
-**   Redesign of the boot information structure; renamed to bootinfo.h again
-** 27/11/96 Geert Uytterhoeven:
-**   Backwards compatibility with bootinfo interface version 1.0
 */
 
 #ifndef _M68K_BOOTINFO_H
 #define _M68K_BOOTINFO_H
 
-
-    /*
-     *  Bootinfo definitions
-     *
-     *  This is an easily parsable and extendable structure containing all
-     *  information to be passed from the bootstrap to the kernel.
-     *
-     *  This way I hope to keep all future changes back/forewards compatible.
-     *  Thus, keep your fingers crossed...
-     *
-     *  This structure is copied right after the kernel by the bootstrap
-     *  routine.
-     */
-
-#ifndef __ASSEMBLY__
-
-struct bi_record {
-    unsigned short tag;			/* tag ID */
-    unsigned short size;		/* size of record (in bytes) */
-    unsigned long data[0];		/* data */
-};
-
-#endif /* __ASSEMBLY__ */
-
-
-    /*
-     *  Tag Definitions
-     *
-     *  Machine independent tags start counting from 0x0000
-     *  Machine dependent tags start counting from 0x8000
-     */
-
-#define BI_LAST			0x0000	/* last record (sentinel) */
-#define BI_MACHTYPE		0x0001	/* machine type (u_long) */
-#define BI_CPUTYPE		0x0002	/* cpu type (u_long) */
-#define BI_FPUTYPE		0x0003	/* fpu type (u_long) */
-#define BI_MMUTYPE		0x0004	/* mmu type (u_long) */
-#define BI_MEMCHUNK		0x0005	/* memory chunk address and size */
-					/* (struct mem_info) */
-#define BI_RAMDISK		0x0006	/* ramdisk address and size */
-					/* (struct mem_info) */
-#define BI_COMMAND_LINE		0x0007	/* kernel command line parameters */
-					/* (string) */
-
-    /*
-     *  Amiga-specific tags
-     */
-
-#define BI_AMIGA_MODEL		0x8000	/* model (u_long) */
-#define BI_AMIGA_AUTOCON	0x8001	/* AutoConfig device */
-					/* (struct ConfigDev) */
-#define BI_AMIGA_CHIP_SIZE	0x8002	/* size of Chip RAM (u_long) */
-#define BI_AMIGA_VBLANK		0x8003	/* VBLANK frequency (u_char) */
-#define BI_AMIGA_PSFREQ		0x8004	/* power supply frequency (u_char) */
-#define BI_AMIGA_ECLOCK		0x8005	/* EClock frequency (u_long) */
-#define BI_AMIGA_CHIPSET	0x8006	/* native chipset present (u_long) */
-#define BI_AMIGA_SERPER		0x8007	/* serial port period (u_short) */
-
-    /*
-     *  Atari-specific tags
-     */
-
-#define BI_ATARI_MCH_COOKIE	0x8000	/* _MCH cookie from TOS (u_long) */
-#define BI_ATARI_MCH_TYPE	0x8001	/* special machine type (u_long) */
-					/* (values are ATARI_MACH_* defines */
-
-/* mch_cookie values (upper word) */
-#define ATARI_MCH_ST		0
-#define ATARI_MCH_STE		1
-#define ATARI_MCH_TT		2
-#define ATARI_MCH_FALCON	3
-
-/* mch_type values */
-#define ATARI_MACH_NORMAL	0	/* no special machine type */
-#define ATARI_MACH_MEDUSA	1	/* Medusa 040 */
-#define ATARI_MACH_HADES	2	/* Hades 040 or 060 */
-#define ATARI_MACH_AB40		3	/* Afterburner040 on Falcon */
-
-    /*
-     *  VME-specific tags
-     */
-
-#define BI_VME_TYPE		0x8000	/* VME sub-architecture (u_long) */
-#define BI_VME_BRDINFO		0x8001	/* VME board information (struct) */
-
-/* BI_VME_TYPE codes */
-#define	VME_TYPE_TP34V		0x0034	/* Tadpole TP34V */
-#define VME_TYPE_MVME147	0x0147	/* Motorola MVME147 */
-#define VME_TYPE_MVME162	0x0162	/* Motorola MVME162 */
-#define VME_TYPE_MVME166	0x0166	/* Motorola MVME166 */
-#define VME_TYPE_MVME167	0x0167	/* Motorola MVME167 */
-#define VME_TYPE_MVME172	0x0172	/* Motorola MVME172 */
-#define VME_TYPE_MVME177	0x0177	/* Motorola MVME177 */
-#define VME_TYPE_BVME4000	0x4000	/* BVM Ltd. BVME4000 */
-#define VME_TYPE_BVME6000	0x6000	/* BVM Ltd. BVME6000 */
-
-/* BI_VME_BRDINFO is a 32 byte struct as returned by the Bug code on
- * Motorola VME boards.  Contains board number, Bug version, board
- * configuration options, etc.  See include/asm/mvme16xhw.h for details.
- */
-
-
-    /*
-     *  Macintosh-specific tags (all u_long)
-     */
-
-#define BI_MAC_MODEL		0x8000	/* Mac Gestalt ID (model type) */
-#define BI_MAC_VADDR		0x8001	/* Mac video base address */
-#define BI_MAC_VDEPTH		0x8002	/* Mac video depth */
-#define BI_MAC_VROW		0x8003	/* Mac video rowbytes */
-#define BI_MAC_VDIM		0x8004	/* Mac video dimensions */
-#define BI_MAC_VLOGICAL		0x8005	/* Mac video logical base */
-#define BI_MAC_SCCBASE		0x8006	/* Mac SCC base address */
-#define BI_MAC_BTIME		0x8007	/* Mac boot time */
-#define BI_MAC_GMTBIAS		0x8008	/* Mac GMT timezone offset */
-#define BI_MAC_MEMSIZE		0x8009	/* Mac RAM size (sanity check) */
-#define BI_MAC_CPUID		0x800a	/* Mac CPU type (sanity check) */
-#define BI_MAC_ROMBASE		0x800b	/* Mac system ROM base address */
-
-    /*
-     *  Macintosh hardware profile data - unused, see macintosh.h for
-     *  reasonable type values
-     */
-
-#define BI_MAC_VIA1BASE		0x8010	/* Mac VIA1 base address (always present) */
-#define BI_MAC_VIA2BASE		0x8011	/* Mac VIA2 base address (type varies) */
-#define BI_MAC_VIA2TYPE		0x8012	/* Mac VIA2 type (VIA, RBV, OSS) */
-#define BI_MAC_ADBTYPE		0x8013	/* Mac ADB interface type */
-#define BI_MAC_ASCBASE		0x8014	/* Mac Apple Sound Chip base address */
-#define BI_MAC_SCSI5380		0x8015	/* Mac NCR 5380 SCSI (base address, multi) */
-#define BI_MAC_SCSIDMA		0x8016	/* Mac SCSI DMA (base address) */
-#define BI_MAC_SCSI5396		0x8017	/* Mac NCR 53C96 SCSI (base address, multi) */
-#define BI_MAC_IDETYPE		0x8018	/* Mac IDE interface type */
-#define BI_MAC_IDEBASE		0x8019	/* Mac IDE interface base address */
-#define BI_MAC_NUBUS		0x801a	/* Mac Nubus type (none, regular, pseudo) */
-#define BI_MAC_SLOTMASK		0x801b	/* Mac Nubus slots present */
-#define BI_MAC_SCCTYPE		0x801c	/* Mac SCC serial type (normal, IOP) */
-#define BI_MAC_ETHTYPE		0x801d	/* Mac builtin ethernet type (Sonic, MACE */
-#define BI_MAC_ETHBASE		0x801e	/* Mac builtin ethernet base address */
-#define BI_MAC_PMU		0x801f	/* Mac power management / poweroff hardware */
-#define BI_MAC_IOP_SWIM		0x8020	/* Mac SWIM floppy IOP */
-#define BI_MAC_IOP_ADB		0x8021	/* Mac ADB IOP */
-
-
-    /*
-     *  Apollo-specific tags
-     */
-
-#define BI_APOLLO_MODEL         0x8000  /* model (u_long) */
-
-    /*
-     *  HP300-specific tags
-     */
-
-#define BI_HP300_MODEL		0x8000	/* model (u_long) */
-#define BI_HP300_UART_SCODE	0x8001	/* UART select code (u_long) */
-#define BI_HP300_UART_ADDR	0x8002	/* phys. addr of UART (u_long) */
-
-    /*
-     * Stuff for bootinfo interface versioning
-     *
-     * At the start of kernel code, a 'struct bootversion' is located.
-     * bootstrap checks for a matching version of the interface before booting
-     * a kernel, to avoid user confusion if kernel and bootstrap don't work
-     * together :-)
-     *
-     * If incompatible changes are made to the bootinfo interface, the major
-     * number below should be stepped (and the minor reset to 0) for the
-     * appropriate machine. If a change is backward-compatible, the minor
-     * should be stepped. "Backwards-compatible" means that booting will work,
-     * but certain features may not.
-     */
-
-#define BOOTINFOV_MAGIC			0x4249561A	/* 'BIV^Z' */
-#define MK_BI_VERSION(major,minor)	(((major)<<16)+(minor))
-#define BI_VERSION_MAJOR(v)		(((v) >> 16) & 0xffff)
-#define BI_VERSION_MINOR(v)		((v) & 0xffff)
-
-#ifndef __ASSEMBLY__
-
-struct bootversion {
-    unsigned short branch;
-    unsigned long magic;
-    struct {
-	unsigned long machtype;
-	unsigned long version;
-    } machversions[0];
-};
-
-#endif /* __ASSEMBLY__ */
-
-#define AMIGA_BOOTI_VERSION    MK_BI_VERSION( 2, 0 )
-#define ATARI_BOOTI_VERSION    MK_BI_VERSION( 2, 1 )
-#define MAC_BOOTI_VERSION      MK_BI_VERSION( 2, 0 )
-#define MVME147_BOOTI_VERSION  MK_BI_VERSION( 2, 0 )
-#define MVME16x_BOOTI_VERSION  MK_BI_VERSION( 2, 0 )
-#define BVME6000_BOOTI_VERSION MK_BI_VERSION( 2, 0 )
-#define Q40_BOOTI_VERSION      MK_BI_VERSION( 2, 0 )
-#define HP300_BOOTI_VERSION    MK_BI_VERSION( 2, 0 )
+#include <uapi/asm/bootinfo.h>
 
 #endif /* _M68K_BOOTINFO_H */
diff --git a/arch/m68k/include/uapi/asm/Kbuild b/arch/m68k/include/uapi/asm/Kbuild
index 1fef45ada097..6a2d257bdfb2 100644
--- a/arch/m68k/include/uapi/asm/Kbuild
+++ b/arch/m68k/include/uapi/asm/Kbuild
@@ -11,6 +11,14 @@ generic-y += termbits.h
 generic-y += termios.h
 
 header-y += a.out.h
+header-y += bootinfo.h
+header-y += bootinfo-amiga.h
+header-y += bootinfo-apollo.h
+header-y += bootinfo-atari.h
+header-y += bootinfo-hp300.h
+header-y += bootinfo-mac.h
+header-y += bootinfo-q40.h
+header-y += bootinfo-vme.h
 header-y += byteorder.h
 header-y += cachectl.h
 header-y += fcntl.h
diff --git a/arch/m68k/include/uapi/asm/bootinfo-amiga.h b/arch/m68k/include/uapi/asm/bootinfo-amiga.h
new file mode 100644
index 000000000000..2c32d671f232
--- /dev/null
+++ b/arch/m68k/include/uapi/asm/bootinfo-amiga.h
@@ -0,0 +1,31 @@
+/*
+** asm/bootinfo-amiga.h -- Amiga-specific boot information definitions
+*/
+
+#ifndef _UAPI_ASM_M68K_BOOTINFO_AMIGA_H
+#define _UAPI_ASM_M68K_BOOTINFO_AMIGA_H
+
+
+    /*
+     *  Amiga-specific tags
+     */
+
+#define BI_AMIGA_MODEL		0x8000	/* model (u_long) */
+#define BI_AMIGA_AUTOCON	0x8001	/* AutoConfig device */
+					/* (AmigaOS struct ConfigDev) */
+#define BI_AMIGA_CHIP_SIZE	0x8002	/* size of Chip RAM (u_long) */
+#define BI_AMIGA_VBLANK		0x8003	/* VBLANK frequency (u_char) */
+#define BI_AMIGA_PSFREQ		0x8004	/* power supply frequency (u_char) */
+#define BI_AMIGA_ECLOCK		0x8005	/* EClock frequency (u_long) */
+#define BI_AMIGA_CHIPSET	0x8006	/* native chipset present (u_long) */
+#define BI_AMIGA_SERPER		0x8007	/* serial port period (u_short) */
+
+
+    /*
+     *  Latest Amiga bootinfo version
+     */
+
+#define AMIGA_BOOTI_VERSION	MK_BI_VERSION(2, 0)
+
+
+#endif /* _UAPI_ASM_M68K_BOOTINFO_AMIGA_H */
diff --git a/arch/m68k/include/uapi/asm/bootinfo-apollo.h b/arch/m68k/include/uapi/asm/bootinfo-apollo.h
new file mode 100644
index 000000000000..3a2051e822b0
--- /dev/null
+++ b/arch/m68k/include/uapi/asm/bootinfo-apollo.h
@@ -0,0 +1,16 @@
+/*
+** asm/bootinfo-apollo.h -- Apollo-specific boot information definitions
+*/
+
+#ifndef _UAPI_ASM_M68K_BOOTINFO_APOLLO_H
+#define _UAPI_ASM_M68K_BOOTINFO_APOLLO_H
+
+
+    /*
+     *  Apollo-specific tags
+     */
+
+#define BI_APOLLO_MODEL		0x8000	/* model (u_long) */
+
+
+#endif /* _UAPI_ASM_M68K_BOOTINFO_APOLLO_H */
diff --git a/arch/m68k/include/uapi/asm/bootinfo-atari.h b/arch/m68k/include/uapi/asm/bootinfo-atari.h
new file mode 100644
index 000000000000..cca0a83fc0e5
--- /dev/null
+++ b/arch/m68k/include/uapi/asm/bootinfo-atari.h
@@ -0,0 +1,44 @@
+/*
+** asm/bootinfo-atari.h -- Atari-specific boot information definitions
+*/
+
+#ifndef _UAPI_ASM_M68K_BOOTINFO_ATARI_H
+#define _UAPI_ASM_M68K_BOOTINFO_ATARI_H
+
+
+    /*
+     *  Atari-specific tags
+     */
+
+#define BI_ATARI_MCH_COOKIE	0x8000	/* _MCH cookie from TOS (u_long) */
+#define BI_ATARI_MCH_TYPE	0x8001	/* special machine type (u_long) */
+
+
+    /*
+     *  mch_cookie values (upper word of BI_ATARI_MCH_COOKIE)
+     */
+
+#define ATARI_MCH_ST		0
+#define ATARI_MCH_STE		1
+#define ATARI_MCH_TT		2
+#define ATARI_MCH_FALCON	3
+
+
+    /*
+     *  Atari machine types (BI_ATARI_MCH_TYPE)
+     */
+
+#define ATARI_MACH_NORMAL	0	/* no special machine type */
+#define ATARI_MACH_MEDUSA	1	/* Medusa 040 */
+#define ATARI_MACH_HADES	2	/* Hades 040 or 060 */
+#define ATARI_MACH_AB40		3	/* Afterburner040 on Falcon */
+
+
+    /*
+     *  Latest Atari bootinfo version
+     */
+
+#define ATARI_BOOTI_VERSION	MK_BI_VERSION(2, 1)
+
+
+#endif /* _UAPI_ASM_M68K_BOOTINFO_ATARI_H */
diff --git a/arch/m68k/include/uapi/asm/bootinfo-hp300.h b/arch/m68k/include/uapi/asm/bootinfo-hp300.h
new file mode 100644
index 000000000000..e7ed19cdafe5
--- /dev/null
+++ b/arch/m68k/include/uapi/asm/bootinfo-hp300.h
@@ -0,0 +1,25 @@
+/*
+** asm/bootinfo-hp300.h -- HP9000/300-specific boot information definitions
+*/
+
+#ifndef _UAPI_ASM_M68K_BOOTINFO_HP300_H
+#define _UAPI_ASM_M68K_BOOTINFO_HP300_H
+
+
+    /*
+     *  HP9000/300-specific tags
+     */
+
+#define BI_HP300_MODEL		0x8000	/* model (u_long) */
+#define BI_HP300_UART_SCODE	0x8001	/* UART select code (u_long) */
+#define BI_HP300_UART_ADDR	0x8002	/* phys. addr of UART (u_long) */
+
+
+    /*
+     *  Latest HP9000/300 bootinfo version
+     */
+
+#define HP300_BOOTI_VERSION	MK_BI_VERSION(2, 0)
+
+
+#endif /* _UAPI_ASM_M68K_BOOTINFO_HP300_H */
diff --git a/arch/m68k/include/uapi/asm/bootinfo-mac.h b/arch/m68k/include/uapi/asm/bootinfo-mac.h
new file mode 100644
index 000000000000..d4ef2115c8f9
--- /dev/null
+++ b/arch/m68k/include/uapi/asm/bootinfo-mac.h
@@ -0,0 +1,59 @@
+/*
+** asm/bootinfo-mac.h -- Macintosh-specific boot information definitions
+*/
+
+#ifndef _UAPI_ASM_M68K_BOOTINFO_MAC_H
+#define _UAPI_ASM_M68K_BOOTINFO_MAC_H
+
+
+    /*
+     *  Macintosh-specific tags (all u_long)
+     */
+
+#define BI_MAC_MODEL		0x8000	/* Mac Gestalt ID (model type) */
+#define BI_MAC_VADDR		0x8001	/* Mac video base address */
+#define BI_MAC_VDEPTH		0x8002	/* Mac video depth */
+#define BI_MAC_VROW		0x8003	/* Mac video rowbytes */
+#define BI_MAC_VDIM		0x8004	/* Mac video dimensions */
+#define BI_MAC_VLOGICAL		0x8005	/* Mac video logical base */
+#define BI_MAC_SCCBASE		0x8006	/* Mac SCC base address */
+#define BI_MAC_BTIME		0x8007	/* Mac boot time */
+#define BI_MAC_GMTBIAS		0x8008	/* Mac GMT timezone offset */
+#define BI_MAC_MEMSIZE		0x8009	/* Mac RAM size (sanity check) */
+#define BI_MAC_CPUID		0x800a	/* Mac CPU type (sanity check) */
+#define BI_MAC_ROMBASE		0x800b	/* Mac system ROM base address */
+
+
+    /*
+     *  Macintosh hardware profile data - unused, see macintosh.h for
+     *  reasonable type values
+     */
+
+#define BI_MAC_VIA1BASE		0x8010	/* Mac VIA1 base address (always present) */
+#define BI_MAC_VIA2BASE		0x8011	/* Mac VIA2 base address (type varies) */
+#define BI_MAC_VIA2TYPE		0x8012	/* Mac VIA2 type (VIA, RBV, OSS) */
+#define BI_MAC_ADBTYPE		0x8013	/* Mac ADB interface type */
+#define BI_MAC_ASCBASE		0x8014	/* Mac Apple Sound Chip base address */
+#define BI_MAC_SCSI5380		0x8015	/* Mac NCR 5380 SCSI (base address, multi) */
+#define BI_MAC_SCSIDMA		0x8016	/* Mac SCSI DMA (base address) */
+#define BI_MAC_SCSI5396		0x8017	/* Mac NCR 53C96 SCSI (base address, multi) */
+#define BI_MAC_IDETYPE		0x8018	/* Mac IDE interface type */
+#define BI_MAC_IDEBASE		0x8019	/* Mac IDE interface base address */
+#define BI_MAC_NUBUS		0x801a	/* Mac Nubus type (none, regular, pseudo) */
+#define BI_MAC_SLOTMASK		0x801b	/* Mac Nubus slots present */
+#define BI_MAC_SCCTYPE		0x801c	/* Mac SCC serial type (normal, IOP) */
+#define BI_MAC_ETHTYPE		0x801d	/* Mac builtin ethernet type (Sonic, MACE */
+#define BI_MAC_ETHBASE		0x801e	/* Mac builtin ethernet base address */
+#define BI_MAC_PMU		0x801f	/* Mac power management / poweroff hardware */
+#define BI_MAC_IOP_SWIM		0x8020	/* Mac SWIM floppy IOP */
+#define BI_MAC_IOP_ADB		0x8021	/* Mac ADB IOP */
+
+
+    /*
+     *  Latest Macintosh bootinfo version
+     */
+
+#define MAC_BOOTI_VERSION	MK_BI_VERSION(2, 0)
+
+
+#endif /* _UAPI_ASM_M68K_BOOTINFO_MAC_H */
diff --git a/arch/m68k/include/uapi/asm/bootinfo-q40.h b/arch/m68k/include/uapi/asm/bootinfo-q40.h
new file mode 100644
index 000000000000..c79fea7e555b
--- /dev/null
+++ b/arch/m68k/include/uapi/asm/bootinfo-q40.h
@@ -0,0 +1,16 @@
+/*
+** asm/bootinfo-q40.h -- Q40-specific boot information definitions
+*/
+
+#ifndef _UAPI_ASM_M68K_BOOTINFO_Q40_H
+#define _UAPI_ASM_M68K_BOOTINFO_Q40_H
+
+
+    /*
+     *  Latest Q40 bootinfo version
+     */
+
+#define Q40_BOOTI_VERSION	MK_BI_VERSION(2, 0)
+
+
+#endif /* _UAPI_ASM_M68K_BOOTINFO_Q40_H */
diff --git a/arch/m68k/include/uapi/asm/bootinfo-vme.h b/arch/m68k/include/uapi/asm/bootinfo-vme.h
new file mode 100644
index 000000000000..4643a55ed768
--- /dev/null
+++ b/arch/m68k/include/uapi/asm/bootinfo-vme.h
@@ -0,0 +1,46 @@
+/*
+** asm/bootinfo-vme.h -- VME-specific boot information definitions
+*/
+
+#ifndef _UAPI_ASM_M68K_BOOTINFO_VME_H
+#define _UAPI_ASM_M68K_BOOTINFO_VME_H
+
+
+    /*
+     *  VME-specific tags
+     */
+
+#define BI_VME_TYPE		0x8000	/* VME sub-architecture (u_long) */
+#define BI_VME_BRDINFO		0x8001	/* VME board information (struct) */
+
+
+    /*
+     *  VME models (BI_VME_TYPE)
+     */
+
+#define VME_TYPE_TP34V		0x0034	/* Tadpole TP34V */
+#define VME_TYPE_MVME147	0x0147	/* Motorola MVME147 */
+#define VME_TYPE_MVME162	0x0162	/* Motorola MVME162 */
+#define VME_TYPE_MVME166	0x0166	/* Motorola MVME166 */
+#define VME_TYPE_MVME167	0x0167	/* Motorola MVME167 */
+#define VME_TYPE_MVME172	0x0172	/* Motorola MVME172 */
+#define VME_TYPE_MVME177	0x0177	/* Motorola MVME177 */
+#define VME_TYPE_BVME4000	0x4000	/* BVM Ltd. BVME4000 */
+#define VME_TYPE_BVME6000	0x6000	/* BVM Ltd. BVME6000 */
+
+/* BI_VME_BRDINFO is a 32 byte struct as returned by the Bug code on
+ * Motorola VME boards.  Contains board number, Bug version, board
+ * configuration options, etc.  See include/asm/mvme16xhw.h for details.
+ */
+
+
+    /*
+     *  Latest VME bootinfo versions
+     */
+
+#define MVME147_BOOTI_VERSION	MK_BI_VERSION(2, 0)
+#define MVME16x_BOOTI_VERSION	MK_BI_VERSION(2, 0)
+#define BVME6000_BOOTI_VERSION	MK_BI_VERSION(2, 0)
+
+
+#endif /* _UAPI_ASM_M68K_BOOTINFO_VME_H */
diff --git a/arch/m68k/include/uapi/asm/bootinfo.h b/arch/m68k/include/uapi/asm/bootinfo.h
new file mode 100644
index 000000000000..74a6fae2a5bf
--- /dev/null
+++ b/arch/m68k/include/uapi/asm/bootinfo.h
@@ -0,0 +1,95 @@
+/*
+ * asm/bootinfo.h -- Definition of the Linux/m68k boot information structure
+ *
+ * Copyright 1992 by Greg Harp
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive
+ * for more details.
+ */
+
+#ifndef _UAPI_ASM_M68K_BOOTINFO_H
+#define _UAPI_ASM_M68K_BOOTINFO_H
+
+#include <linux/types.h>
+
+
+#ifndef __ASSEMBLY__
+
+    /*
+     *  Bootinfo definitions
+     *
+     *  This is an easily parsable and extendable structure containing all
+     *  information to be passed from the bootstrap to the kernel.
+     *
+     *  This way I hope to keep all future changes back/forewards compatible.
+     *  Thus, keep your fingers crossed...
+     *
+     *  This structure is copied right after the kernel by the bootstrap
+     *  routine.
+     */
+
+struct bi_record {
+	unsigned short tag;		/* tag ID */
+	unsigned short size;		/* size of record (in bytes) */
+	unsigned long data[0];		/* data */
+};
+
+#endif /* __ASSEMBLY__ */
+
+
+    /*
+     *  Tag Definitions
+     *
+     *  Machine independent tags start counting from 0x0000
+     *  Machine dependent tags start counting from 0x8000
+     */
+
+#define BI_LAST			0x0000	/* last record (sentinel) */
+#define BI_MACHTYPE		0x0001	/* machine type (u_long) */
+#define BI_CPUTYPE		0x0002	/* cpu type (u_long) */
+#define BI_FPUTYPE		0x0003	/* fpu type (u_long) */
+#define BI_MMUTYPE		0x0004	/* mmu type (u_long) */
+#define BI_MEMCHUNK		0x0005	/* memory chunk address and size */
+					/* (struct mem_info) */
+#define BI_RAMDISK		0x0006	/* ramdisk address and size */
+					/* (struct mem_info) */
+#define BI_COMMAND_LINE		0x0007	/* kernel command line parameters */
+					/* (string) */
+
+
+    /*
+     * Stuff for bootinfo interface versioning
+     *
+     * At the start of kernel code, a 'struct bootversion' is located.
+     * bootstrap checks for a matching version of the interface before booting
+     * a kernel, to avoid user confusion if kernel and bootstrap don't work
+     * together :-)
+     *
+     * If incompatible changes are made to the bootinfo interface, the major
+     * number below should be stepped (and the minor reset to 0) for the
+     * appropriate machine. If a change is backward-compatible, the minor
+     * should be stepped. "Backwards-compatible" means that booting will work,
+     * but certain features may not.
+     */
+
+#define BOOTINFOV_MAGIC			0x4249561A	/* 'BIV^Z' */
+#define MK_BI_VERSION(major, minor)	(((major) << 16) + (minor))
+#define BI_VERSION_MAJOR(v)		(((v) >> 16) & 0xffff)
+#define BI_VERSION_MINOR(v)		((v) & 0xffff)
+
+#ifndef __ASSEMBLY__
+
+struct bootversion {
+	unsigned short branch;
+	unsigned long magic;
+	struct {
+		unsigned long machtype;
+		unsigned long version;
+	} machversions[0];
+} __packed;
+
+#endif /* __ASSEMBLY__ */
+
+
+#endif /* _UAPI_ASM_M68K_BOOTINFO_H */
diff --git a/arch/m68k/kernel/head.S b/arch/m68k/kernel/head.S
index 28f817481f3b..7d7913f5dce3 100644
--- a/arch/m68k/kernel/head.S
+++ b/arch/m68k/kernel/head.S
@@ -257,6 +257,12 @@
 #include <linux/linkage.h>
 #include <linux/init.h>
 #include <asm/bootinfo.h>
+#include <asm/bootinfo-amiga.h>
+#include <asm/bootinfo-atari.h>
+#include <asm/bootinfo-hp300.h>
+#include <asm/bootinfo-mac.h>
+#include <asm/bootinfo-q40.h>
+#include <asm/bootinfo-vme.h>
 #include <asm/setup.h>
 #include <asm/entry.h>
 #include <asm/pgtable.h>
diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c
index 677244e0371c..e48069da04ed 100644
--- a/arch/m68k/mac/config.c
+++ b/arch/m68k/mac/config.c
@@ -28,6 +28,7 @@
 
 #include <asm/setup.h>
 #include <asm/bootinfo.h>
+#include <asm/bootinfo-mac.h>
 
 #include <asm/io.h>
 #include <asm/irq.h>
diff --git a/arch/m68k/mvme147/config.c b/arch/m68k/mvme147/config.c
index 5c99154c67c8..3a1d47564e52 100644
--- a/arch/m68k/mvme147/config.c
+++ b/arch/m68k/mvme147/config.c
@@ -26,6 +26,7 @@
 #include <linux/interrupt.h>
 
 #include <asm/bootinfo.h>
+#include <asm/bootinfo-vme.h>
 #include <asm/pgtable.h>
 #include <asm/setup.h>
 #include <asm/irq.h>
diff --git a/arch/m68k/mvme16x/config.c b/arch/m68k/mvme16x/config.c
index 60d8a1bc837d..e05994fd048c 100644
--- a/arch/m68k/mvme16x/config.c
+++ b/arch/m68k/mvme16x/config.c
@@ -29,6 +29,7 @@
 #include <linux/module.h>
 
 #include <asm/bootinfo.h>
+#include <asm/bootinfo-vme.h>
 #include <asm/pgtable.h>
 #include <asm/setup.h>
 #include <asm/irq.h>
-- 
cgit v1.2.3


From 799300840c18e7fdc0a3dace70d9b56a189fd1ab Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Wed, 2 Oct 2013 21:50:56 +0200
Subject: m68k/UAPI: Move generic definitions to <asm/bootinfo.h>

Move generic definitions used by bootstraps to uapi/asm/bootinfo.h:
  - Machine types,
  - CPU, FPU, and MMU types,
  - struct mem_info.

Keep a copy of struct mem_info for in-kernel use, and rename it to struct
m68k_mem_info, as the exported one will be modified later.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/include/asm/setup.h         |  5 +-
 arch/m68k/include/uapi/asm/bootinfo.h | 80 +++++++++++++++++++++++++++++++-
 arch/m68k/include/uapi/asm/setup.h    | 87 -----------------------------------
 arch/m68k/kernel/setup_mm.c           |  4 +-
 arch/m68k/mm/init.c                   |  2 +-
 arch/m68k/mm/motorola.c               |  2 +-
 6 files changed, 85 insertions(+), 95 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/include/asm/setup.h b/arch/m68k/include/asm/setup.h
index 65e78a2dad64..8f2023f8c1c4 100644
--- a/arch/m68k/include/asm/setup.h
+++ b/arch/m68k/include/asm/setup.h
@@ -22,6 +22,7 @@
 #ifndef _M68K_SETUP_H
 #define _M68K_SETUP_H
 
+#include <uapi/asm/bootinfo.h>
 #include <uapi/asm/setup.h>
 
 
@@ -297,14 +298,14 @@ extern int m68k_is040or060;
 #define NUM_MEMINFO	4
 
 #ifndef __ASSEMBLY__
-struct mem_info {
+struct m68k_mem_info {
 	unsigned long addr;		/* physical address of memory chunk */
 	unsigned long size;		/* length of memory chunk (in bytes) */
 };
 
 extern int m68k_num_memory;		/* # of memory blocks found (and used) */
 extern int m68k_realnum_memory;		/* real # of memory blocks found */
-extern struct mem_info m68k_memory[NUM_MEMINFO];/* memory description */
+extern struct m68k_mem_info m68k_memory[NUM_MEMINFO];/* memory description */
 #endif
 
 #endif /* _M68K_SETUP_H */
diff --git a/arch/m68k/include/uapi/asm/bootinfo.h b/arch/m68k/include/uapi/asm/bootinfo.h
index 74a6fae2a5bf..9d64599d7bf5 100644
--- a/arch/m68k/include/uapi/asm/bootinfo.h
+++ b/arch/m68k/include/uapi/asm/bootinfo.h
@@ -11,8 +11,6 @@
 #ifndef _UAPI_ASM_M68K_BOOTINFO_H
 #define _UAPI_ASM_M68K_BOOTINFO_H
 
-#include <linux/types.h>
-
 
 #ifndef __ASSEMBLY__
 
@@ -35,6 +33,12 @@ struct bi_record {
 	unsigned long data[0];		/* data */
 };
 
+
+struct mem_info {
+	unsigned long addr;		/* physical address of memory chunk */
+	unsigned long size;		/* length of memory chunk (in bytes) */
+};
+
 #endif /* __ASSEMBLY__ */
 
 
@@ -58,6 +62,78 @@ struct bi_record {
 					/* (string) */
 
 
+    /*
+     *  Linux/m68k Architectures (BI_MACHTYPE)
+     */
+
+#define MACH_AMIGA		1
+#define MACH_ATARI		2
+#define MACH_MAC		3
+#define MACH_APOLLO		4
+#define MACH_SUN3		5
+#define MACH_MVME147		6
+#define MACH_MVME16x		7
+#define MACH_BVME6000		8
+#define MACH_HP300		9
+#define MACH_Q40		10
+#define MACH_SUN3X		11
+#define MACH_M54XX		12
+
+
+    /*
+     *  CPU, FPU and MMU types (BI_CPUTYPE, BI_FPUTYPE, BI_MMUTYPE)
+     *
+     *  Note: we may rely on the following equalities:
+     *
+     *      CPU_68020 == MMU_68851
+     *      CPU_68030 == MMU_68030
+     *      CPU_68040 == FPU_68040 == MMU_68040
+     *      CPU_68060 == FPU_68060 == MMU_68060
+     */
+
+#define CPUB_68020		0
+#define CPUB_68030		1
+#define CPUB_68040		2
+#define CPUB_68060		3
+#define CPUB_COLDFIRE		4
+
+#define CPU_68020		(1 << CPUB_68020)
+#define CPU_68030		(1 << CPUB_68030)
+#define CPU_68040		(1 << CPUB_68040)
+#define CPU_68060		(1 << CPUB_68060)
+#define CPU_COLDFIRE		(1 << CPUB_COLDFIRE)
+
+#define FPUB_68881		0
+#define FPUB_68882		1
+#define FPUB_68040		2	/* Internal FPU */
+#define FPUB_68060		3	/* Internal FPU */
+#define FPUB_SUNFPA		4	/* Sun-3 FPA */
+#define FPUB_COLDFIRE		5	/* ColdFire FPU */
+
+#define FPU_68881		(1 << FPUB_68881)
+#define FPU_68882		(1 << FPUB_68882)
+#define FPU_68040		(1 << FPUB_68040)
+#define FPU_68060		(1 << FPUB_68060)
+#define FPU_SUNFPA		(1 << FPUB_SUNFPA)
+#define FPU_COLDFIRE		(1 << FPUB_COLDFIRE)
+
+#define MMUB_68851		0
+#define MMUB_68030		1	/* Internal MMU */
+#define MMUB_68040		2	/* Internal MMU */
+#define MMUB_68060		3	/* Internal MMU */
+#define MMUB_APOLLO		4	/* Custom Apollo */
+#define MMUB_SUN3		5	/* Custom Sun-3 */
+#define MMUB_COLDFIRE		6	/* Internal MMU */
+
+#define MMU_68851		(1 << MMUB_68851)
+#define MMU_68030		(1 << MMUB_68030)
+#define MMU_68040		(1 << MMUB_68040)
+#define MMU_68060		(1 << MMUB_68060)
+#define MMU_SUN3		(1 << MMUB_SUN3)
+#define MMU_APOLLO		(1 << MMUB_APOLLO)
+#define MMU_COLDFIRE		(1 << MMUB_COLDFIRE)
+
+
     /*
      * Stuff for bootinfo interface versioning
      *
diff --git a/arch/m68k/include/uapi/asm/setup.h b/arch/m68k/include/uapi/asm/setup.h
index 85579bff455c..6a6dc636761e 100644
--- a/arch/m68k/include/uapi/asm/setup.h
+++ b/arch/m68k/include/uapi/asm/setup.h
@@ -6,98 +6,11 @@
 ** This file is subject to the terms and conditions of the GNU General Public
 ** License.  See the file COPYING in the main directory of this archive
 ** for more details.
-**
-** Created 09/29/92 by Greg Harp
-**
-** 5/2/94 Roman Hodek:
-**   Added bi_atari part of the machine dependent union bi_un; for now it
-**   contains just a model field to distinguish between TT and Falcon.
-** 26/7/96 Roman Zippel:
-**   Renamed to setup.h; added some useful macros to allow gcc some
-**   optimizations if possible.
-** 5/10/96 Geert Uytterhoeven:
-**   Redesign of the boot information structure; moved boot information
-**   structure to bootinfo.h
 */
 
 #ifndef _UAPI_M68K_SETUP_H
 #define _UAPI_M68K_SETUP_H
 
-
-
-    /*
-     *  Linux/m68k Architectures
-     */
-
-#define MACH_AMIGA    1
-#define MACH_ATARI    2
-#define MACH_MAC      3
-#define MACH_APOLLO   4
-#define MACH_SUN3     5
-#define MACH_MVME147  6
-#define MACH_MVME16x  7
-#define MACH_BVME6000 8
-#define MACH_HP300    9
-#define MACH_Q40     10
-#define MACH_SUN3X   11
-#define MACH_M54XX   12
-
 #define COMMAND_LINE_SIZE 256
 
-
-
-    /*
-     *  CPU, FPU and MMU types
-     *
-     *  Note: we may rely on the following equalities:
-     *
-     *      CPU_68020 == MMU_68851
-     *      CPU_68030 == MMU_68030
-     *      CPU_68040 == FPU_68040 == MMU_68040
-     *      CPU_68060 == FPU_68060 == MMU_68060
-     */
-
-#define CPUB_68020     0
-#define CPUB_68030     1
-#define CPUB_68040     2
-#define CPUB_68060     3
-#define CPUB_COLDFIRE  4
-
-#define CPU_68020      (1<<CPUB_68020)
-#define CPU_68030      (1<<CPUB_68030)
-#define CPU_68040      (1<<CPUB_68040)
-#define CPU_68060      (1<<CPUB_68060)
-#define CPU_COLDFIRE   (1<<CPUB_COLDFIRE)
-
-#define FPUB_68881     0
-#define FPUB_68882     1
-#define FPUB_68040     2                       /* Internal FPU */
-#define FPUB_68060     3                       /* Internal FPU */
-#define FPUB_SUNFPA    4                       /* Sun-3 FPA */
-#define FPUB_COLDFIRE  5                       /* ColdFire FPU */
-
-#define FPU_68881      (1<<FPUB_68881)
-#define FPU_68882      (1<<FPUB_68882)
-#define FPU_68040      (1<<FPUB_68040)
-#define FPU_68060      (1<<FPUB_68060)
-#define FPU_SUNFPA     (1<<FPUB_SUNFPA)
-#define FPU_COLDFIRE   (1<<FPUB_COLDFIRE)
-
-#define MMUB_68851     0
-#define MMUB_68030     1                       /* Internal MMU */
-#define MMUB_68040     2                       /* Internal MMU */
-#define MMUB_68060     3                       /* Internal MMU */
-#define MMUB_APOLLO    4                       /* Custom Apollo */
-#define MMUB_SUN3      5                       /* Custom Sun-3 */
-#define MMUB_COLDFIRE  6                       /* Internal MMU */
-
-#define MMU_68851      (1<<MMUB_68851)
-#define MMU_68030      (1<<MMUB_68030)
-#define MMU_68040      (1<<MMUB_68040)
-#define MMU_68060      (1<<MMUB_68060)
-#define MMU_SUN3       (1<<MMUB_SUN3)
-#define MMU_APOLLO     (1<<MMUB_APOLLO)
-#define MMU_COLDFIRE   (1<<MMUB_COLDFIRE)
-
-
 #endif /* _UAPI_M68K_SETUP_H */
diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c
index 18423a843379..cff9845708c0 100644
--- a/arch/m68k/kernel/setup_mm.c
+++ b/arch/m68k/kernel/setup_mm.c
@@ -71,10 +71,10 @@ EXPORT_SYMBOL(m68k_num_memory);
 int m68k_realnum_memory;
 EXPORT_SYMBOL(m68k_realnum_memory);
 unsigned long m68k_memoffset;
-struct mem_info m68k_memory[NUM_MEMINFO];
+struct m68k_mem_info m68k_memory[NUM_MEMINFO];
 EXPORT_SYMBOL(m68k_memory);
 
-static struct mem_info m68k_ramdisk __initdata;
+static struct m68k_mem_info m68k_ramdisk __initdata;
 
 static char m68k_command_line[CL_SIZE] __initdata;
 
diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c
index 6b4baa6e4d31..acaff6a49e35 100644
--- a/arch/m68k/mm/init.c
+++ b/arch/m68k/mm/init.c
@@ -59,7 +59,7 @@ EXPORT_SYMBOL(pg_data_table);
 void __init m68k_setup_node(int node)
 {
 #ifndef CONFIG_SINGLE_MEMORY_CHUNK
-	struct mem_info *info = m68k_memory + node;
+	struct m68k_mem_info *info = m68k_memory + node;
 	int i, end;
 
 	i = (unsigned long)phys_to_virt(info->addr) >> __virt_to_node_shift();
diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c
index 251c5437787b..7d4024432163 100644
--- a/arch/m68k/mm/motorola.c
+++ b/arch/m68k/mm/motorola.c
@@ -233,7 +233,7 @@ void __init paging_init(void)
 			printk("Fix your bootloader or use a memfile to make use of this area!\n");
 			m68k_num_memory--;
 			memmove(m68k_memory + i, m68k_memory + i + 1,
-				(m68k_num_memory - i) * sizeof(struct mem_info));
+				(m68k_num_memory - i) * sizeof(struct m68k_mem_info));
 			continue;
 		}
 		addr = m68k_memory[i].addr + m68k_memory[i].size;
-- 
cgit v1.2.3


From 7bc449688bf73891625616f3f2e93e359603016b Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Thu, 3 Oct 2013 11:26:22 +0200
Subject: m68k/UAPI: Move Amiga model/chipset definitions to
 <asm/bootinfo-amiga.h>

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/include/asm/amigahw.h             | 26 +----------------------
 arch/m68k/include/uapi/asm/bootinfo-amiga.h | 32 +++++++++++++++++++++++++++++
 2 files changed, 33 insertions(+), 25 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/include/asm/amigahw.h b/arch/m68k/include/asm/amigahw.h
index 3bef0b2ad945..5ad568110f17 100644
--- a/arch/m68k/include/asm/amigahw.h
+++ b/arch/m68k/include/asm/amigahw.h
@@ -18,26 +18,7 @@
 
 #include <linux/ioport.h>
 
-    /*
-     *  Different Amiga models
-     */
-
-#define AMI_UNKNOWN	(0)
-#define AMI_500		(1)
-#define AMI_500PLUS	(2)
-#define AMI_600		(3)
-#define AMI_1000	(4)
-#define AMI_1200	(5)
-#define AMI_2000	(6)
-#define AMI_2500	(7)
-#define AMI_3000	(8)
-#define AMI_3000T	(9)
-#define AMI_3000PLUS	(10)
-#define AMI_4000	(11)
-#define AMI_4000T	(12)
-#define AMI_CDTV	(13)
-#define AMI_CD32	(14)
-#define AMI_DRACO	(15)
+#include <asm/bootinfo-amiga.h>
 
 
     /*
@@ -46,11 +27,6 @@
 
 extern unsigned long amiga_chipset;
 
-#define CS_STONEAGE	(0)
-#define CS_OCS		(1)
-#define CS_ECS		(2)
-#define CS_AGA		(3)
-
 
     /*
      *  Miscellaneous
diff --git a/arch/m68k/include/uapi/asm/bootinfo-amiga.h b/arch/m68k/include/uapi/asm/bootinfo-amiga.h
index 2c32d671f232..28b6da07874a 100644
--- a/arch/m68k/include/uapi/asm/bootinfo-amiga.h
+++ b/arch/m68k/include/uapi/asm/bootinfo-amiga.h
@@ -21,6 +21,38 @@
 #define BI_AMIGA_SERPER		0x8007	/* serial port period (u_short) */
 
 
+    /*
+     *  Amiga models (BI_AMIGA_MODEL)
+     */
+
+#define AMI_UNKNOWN		0
+#define AMI_500			1
+#define AMI_500PLUS		2
+#define AMI_600			3
+#define AMI_1000		4
+#define AMI_1200		5
+#define AMI_2000		6
+#define AMI_2500		7
+#define AMI_3000		8
+#define AMI_3000T		9
+#define AMI_3000PLUS		10
+#define AMI_4000		11
+#define AMI_4000T		12
+#define AMI_CDTV		13
+#define AMI_CD32		14
+#define AMI_DRACO		15
+
+
+    /*
+     *  Amiga chipsets (BI_AMIGA_CHIPSET)
+     */
+
+#define CS_STONEAGE		0
+#define CS_OCS			1
+#define CS_ECS			2
+#define CS_AGA			3
+
+
     /*
      *  Latest Amiga bootinfo version
      */
-- 
cgit v1.2.3


From 7678e77d2a2fb2dd74a27b8283e030c580294a62 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Thu, 3 Oct 2013 11:31:08 +0200
Subject: m68k/UAPI: Move Apollo model definitions to <asm/bootinfo-apollo.h>

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/include/asm/apollohw.h             | 11 ++---------
 arch/m68k/include/uapi/asm/bootinfo-apollo.h | 12 ++++++++++++
 2 files changed, 14 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/include/asm/apollohw.h b/arch/m68k/include/asm/apollohw.h
index 6c19e0c22411..87fc899d32ee 100644
--- a/arch/m68k/include/asm/apollohw.h
+++ b/arch/m68k/include/asm/apollohw.h
@@ -5,18 +5,11 @@
 
 #include <linux/types.h>
 
-/*
-   apollo models
-*/
+#include <asm/bootinfo-apollo.h>
+
 
 extern u_long apollo_model;
 
-#define APOLLO_UNKNOWN (0)
-#define APOLLO_DN3000 (1)
-#define APOLLO_DN3010 (2)
-#define APOLLO_DN3500 (3)
-#define APOLLO_DN4000 (4)
-#define APOLLO_DN4500 (5)
 
 /*
    see scn2681 data sheet for more info.
diff --git a/arch/m68k/include/uapi/asm/bootinfo-apollo.h b/arch/m68k/include/uapi/asm/bootinfo-apollo.h
index 3a2051e822b0..69923697e131 100644
--- a/arch/m68k/include/uapi/asm/bootinfo-apollo.h
+++ b/arch/m68k/include/uapi/asm/bootinfo-apollo.h
@@ -13,4 +13,16 @@
 #define BI_APOLLO_MODEL		0x8000	/* model (u_long) */
 
 
+    /*
+     *  Apollo models (BI_APOLLO_MODEL)
+     */
+
+#define APOLLO_UNKNOWN		0
+#define APOLLO_DN3000		1
+#define APOLLO_DN3010		2
+#define APOLLO_DN3500		3
+#define APOLLO_DN4000		4
+#define APOLLO_DN4500		5
+
+
 #endif /* _UAPI_ASM_M68K_BOOTINFO_APOLLO_H */
-- 
cgit v1.2.3


From f3bd09e3dba76e76bec7e79a442f37b0762f4fa1 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Thu, 3 Oct 2013 11:36:52 +0200
Subject: m68k/UAPI: Move HP300 model definitions to <asm/bootinfo-hp300.h>

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/include/asm/hp300hw.h             | 20 ++------------------
 arch/m68k/include/uapi/asm/bootinfo-hp300.h | 25 +++++++++++++++++++++++++
 2 files changed, 27 insertions(+), 18 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/include/asm/hp300hw.h b/arch/m68k/include/asm/hp300hw.h
index d998ea67c19c..64f5271dd7be 100644
--- a/arch/m68k/include/asm/hp300hw.h
+++ b/arch/m68k/include/asm/hp300hw.h
@@ -1,25 +1,9 @@
 #ifndef _M68K_HP300HW_H
 #define _M68K_HP300HW_H
 
-extern unsigned long hp300_model;
+#include <asm/bootinfo-hp300.h>
 
-/* This information was taken from NetBSD */
-#define	HP_320		(0)	/* 16MHz 68020+HP MMU+16K external cache */
-#define	HP_330		(1)	/* 16MHz 68020+68851 MMU */
-#define	HP_340		(2)	/* 16MHz 68030 */
-#define	HP_345		(3)	/* 50MHz 68030+32K external cache */
-#define	HP_350		(4)	/* 25MHz 68020+HP MMU+32K external cache */
-#define	HP_360		(5)	/* 25MHz 68030 */
-#define	HP_370		(6)	/* 33MHz 68030+64K external cache */
-#define	HP_375		(7)	/* 50MHz 68030+32K external cache */
-#define	HP_380		(8)	/* 25MHz 68040 */
-#define	HP_385		(9)	/* 33MHz 68040 */
 
-#define	HP_400		(10)	/* 50MHz 68030+32K external cache */
-#define	HP_425T		(11)	/* 25MHz 68040 - model 425t */
-#define	HP_425S		(12)	/* 25MHz 68040 - model 425s */
-#define HP_425E		(13)	/* 25MHz 68040 - model 425e */
-#define HP_433T		(14)	/* 33MHz 68040 - model 433t */
-#define HP_433S		(15)	/* 33MHz 68040 - model 433s */
+extern unsigned long hp300_model;
 
 #endif /* _M68K_HP300HW_H */
diff --git a/arch/m68k/include/uapi/asm/bootinfo-hp300.h b/arch/m68k/include/uapi/asm/bootinfo-hp300.h
index e7ed19cdafe5..08530e12d4c3 100644
--- a/arch/m68k/include/uapi/asm/bootinfo-hp300.h
+++ b/arch/m68k/include/uapi/asm/bootinfo-hp300.h
@@ -15,6 +15,31 @@
 #define BI_HP300_UART_ADDR	0x8002	/* phys. addr of UART (u_long) */
 
 
+    /*
+     *  HP9000/300 and /400 models (BI_HP300_MODEL)
+     *
+     * This information was taken from NetBSD
+     */
+
+#define HP_320		0	/* 16MHz 68020+HP MMU+16K external cache */
+#define HP_330		1	/* 16MHz 68020+68851 MMU */
+#define HP_340		2	/* 16MHz 68030 */
+#define HP_345		3	/* 50MHz 68030+32K external cache */
+#define HP_350		4	/* 25MHz 68020+HP MMU+32K external cache */
+#define HP_360		5	/* 25MHz 68030 */
+#define HP_370		6	/* 33MHz 68030+64K external cache */
+#define HP_375		7	/* 50MHz 68030+32K external cache */
+#define HP_380		8	/* 25MHz 68040 */
+#define HP_385		9	/* 33MHz 68040 */
+
+#define HP_400		10	/* 50MHz 68030+32K external cache */
+#define HP_425T		11	/* 25MHz 68040 - model 425t */
+#define HP_425S		12	/* 25MHz 68040 - model 425s */
+#define HP_425E		13	/* 25MHz 68040 - model 425e */
+#define HP_433T		14	/* 33MHz 68040 - model 433t */
+#define HP_433S		15	/* 33MHz 68040 - model 433s */
+
+
     /*
      *  Latest HP9000/300 bootinfo version
      */
-- 
cgit v1.2.3


From 8693d6167e16baab1ee900d94b16af586a26a916 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Thu, 3 Oct 2013 11:40:00 +0200
Subject: m68k/UAPI: Move Macintosh model definitions to <asm/bootinfo-mac.h>

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/include/asm/macintosh.h         | 62 ++-----------------------------
 arch/m68k/include/uapi/asm/bootinfo-mac.h | 60 ++++++++++++++++++++++++++++++
 2 files changed, 63 insertions(+), 59 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/include/asm/macintosh.h b/arch/m68k/include/asm/macintosh.h
index 6a633eb64331..d323b2c2d07d 100644
--- a/arch/m68k/include/asm/macintosh.h
+++ b/arch/m68k/include/asm/macintosh.h
@@ -4,6 +4,9 @@
 #include <linux/seq_file.h>
 #include <linux/interrupt.h>
 
+#include <asm/bootinfo-mac.h>
+
+
 /*
  *	Apple Macintoshisms
  */
@@ -74,65 +77,6 @@ struct mac_model
 #define MAC_FLOPPY_SWIM_IOP	3
 #define MAC_FLOPPY_AV		4
 
-/*
- *	Gestalt numbers
- */
-
-#define MAC_MODEL_II		6
-#define MAC_MODEL_IIX		7
-#define MAC_MODEL_IICX		8
-#define MAC_MODEL_SE30		9
-#define MAC_MODEL_IICI		11
-#define MAC_MODEL_IIFX		13	/* And well numbered it is too */
-#define MAC_MODEL_IISI		18
-#define MAC_MODEL_LC		19
-#define MAC_MODEL_Q900		20
-#define MAC_MODEL_PB170		21
-#define MAC_MODEL_Q700		22
-#define MAC_MODEL_CLII		23	/* aka: P200 */
-#define MAC_MODEL_PB140		25
-#define MAC_MODEL_Q950		26	/* aka: WGS95 */
-#define MAC_MODEL_LCIII		27	/* aka: P450 */
-#define MAC_MODEL_PB210		29
-#define MAC_MODEL_C650		30
-#define MAC_MODEL_PB230		32
-#define MAC_MODEL_PB180		33
-#define MAC_MODEL_PB160		34
-#define MAC_MODEL_Q800		35	/* aka: WGS80 */
-#define MAC_MODEL_Q650		36
-#define MAC_MODEL_LCII		37	/* aka: P400/405/410/430 */
-#define MAC_MODEL_PB250		38
-#define MAC_MODEL_IIVI		44
-#define MAC_MODEL_P600		45	/* aka: P600CD */
-#define MAC_MODEL_IIVX		48
-#define MAC_MODEL_CCL		49	/* aka: P250 */
-#define MAC_MODEL_PB165C	50
-#define MAC_MODEL_C610		52	/* aka: WGS60 */
-#define MAC_MODEL_Q610		53
-#define MAC_MODEL_PB145		54	/* aka: PB145B */
-#define MAC_MODEL_P520		56	/* aka: LC520 */
-#define MAC_MODEL_C660		60
-#define MAC_MODEL_P460		62	/* aka: LCIII+, P466/P467 */
-#define MAC_MODEL_PB180C	71
-#define MAC_MODEL_PB520		72	/* aka: PB520C, PB540, PB540C, PB550C */
-#define MAC_MODEL_PB270C	77
-#define MAC_MODEL_Q840		78
-#define MAC_MODEL_P550		80	/* aka: LC550, P560 */
-#define MAC_MODEL_CCLII		83	/* aka: P275 */
-#define MAC_MODEL_PB165		84
-#define MAC_MODEL_PB190		85	/* aka: PB190CS */
-#define MAC_MODEL_TV		88
-#define MAC_MODEL_P475		89	/* aka: LC475, P476 */
-#define MAC_MODEL_P475F		90	/* aka: P475 w/ FPU (no LC040) */
-#define MAC_MODEL_P575		92	/* aka: LC575, P577/P578 */
-#define MAC_MODEL_Q605		94
-#define MAC_MODEL_Q605_ACC	95	/* Q605 accelerated to 33 MHz */
-#define MAC_MODEL_Q630		98	/* aka: LC630, P630/631/635/636/637/638/640 */
-#define MAC_MODEL_P588		99	/* aka: LC580, P580 */
-#define MAC_MODEL_PB280		102
-#define MAC_MODEL_PB280C	103
-#define MAC_MODEL_PB150		115
-
 extern struct mac_model *macintosh_config;
 
 
diff --git a/arch/m68k/include/uapi/asm/bootinfo-mac.h b/arch/m68k/include/uapi/asm/bootinfo-mac.h
index d4ef2115c8f9..971046f49bb9 100644
--- a/arch/m68k/include/uapi/asm/bootinfo-mac.h
+++ b/arch/m68k/include/uapi/asm/bootinfo-mac.h
@@ -49,6 +49,66 @@
 #define BI_MAC_IOP_ADB		0x8021	/* Mac ADB IOP */
 
 
+    /*
+     * Macintosh Gestalt numbers (BI_MAC_MODEL)
+     */
+
+#define MAC_MODEL_II		6
+#define MAC_MODEL_IIX		7
+#define MAC_MODEL_IICX		8
+#define MAC_MODEL_SE30		9
+#define MAC_MODEL_IICI		11
+#define MAC_MODEL_IIFX		13	/* And well numbered it is too */
+#define MAC_MODEL_IISI		18
+#define MAC_MODEL_LC		19
+#define MAC_MODEL_Q900		20
+#define MAC_MODEL_PB170		21
+#define MAC_MODEL_Q700		22
+#define MAC_MODEL_CLII		23	/* aka: P200 */
+#define MAC_MODEL_PB140		25
+#define MAC_MODEL_Q950		26	/* aka: WGS95 */
+#define MAC_MODEL_LCIII		27	/* aka: P450 */
+#define MAC_MODEL_PB210		29
+#define MAC_MODEL_C650		30
+#define MAC_MODEL_PB230		32
+#define MAC_MODEL_PB180		33
+#define MAC_MODEL_PB160		34
+#define MAC_MODEL_Q800		35	/* aka: WGS80 */
+#define MAC_MODEL_Q650		36
+#define MAC_MODEL_LCII		37	/* aka: P400/405/410/430 */
+#define MAC_MODEL_PB250		38
+#define MAC_MODEL_IIVI		44
+#define MAC_MODEL_P600		45	/* aka: P600CD */
+#define MAC_MODEL_IIVX		48
+#define MAC_MODEL_CCL		49	/* aka: P250 */
+#define MAC_MODEL_PB165C	50
+#define MAC_MODEL_C610		52	/* aka: WGS60 */
+#define MAC_MODEL_Q610		53
+#define MAC_MODEL_PB145		54	/* aka: PB145B */
+#define MAC_MODEL_P520		56	/* aka: LC520 */
+#define MAC_MODEL_C660		60
+#define MAC_MODEL_P460		62	/* aka: LCIII+, P466/P467 */
+#define MAC_MODEL_PB180C	71
+#define MAC_MODEL_PB520		72	/* aka: PB520C, PB540, PB540C, PB550C */
+#define MAC_MODEL_PB270C	77
+#define MAC_MODEL_Q840		78
+#define MAC_MODEL_P550		80	/* aka: LC550, P560 */
+#define MAC_MODEL_CCLII		83	/* aka: P275 */
+#define MAC_MODEL_PB165		84
+#define MAC_MODEL_PB190		85	/* aka: PB190CS */
+#define MAC_MODEL_TV		88
+#define MAC_MODEL_P475		89	/* aka: LC475, P476 */
+#define MAC_MODEL_P475F		90	/* aka: P475 w/ FPU (no LC040) */
+#define MAC_MODEL_P575		92	/* aka: LC575, P577/P578 */
+#define MAC_MODEL_Q605		94
+#define MAC_MODEL_Q605_ACC	95	/* Q605 accelerated to 33 MHz */
+#define MAC_MODEL_Q630		98	/* aka: LC630, P630/631/635/636/637/638/640 */
+#define MAC_MODEL_P588		99	/* aka: LC580, P580 */
+#define MAC_MODEL_PB280		102
+#define MAC_MODEL_PB280C	103
+#define MAC_MODEL_PB150		115
+
+
     /*
      *  Latest Macintosh bootinfo version
      */
-- 
cgit v1.2.3


From cf288bd5b122d12476fd7d9825c292daef5dba58 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Wed, 2 Oct 2013 22:40:44 +0200
Subject: m68k/UAPI: Move VME Board ID definition to <asm/bootinfo-vme.h>

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/include/asm/mvme16xhw.h         | 15 ---------------
 arch/m68k/include/uapi/asm/bootinfo-vme.h | 25 +++++++++++++++++++++++--
 2 files changed, 23 insertions(+), 17 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/include/asm/mvme16xhw.h b/arch/m68k/include/asm/mvme16xhw.h
index 87fa84ad252e..1eb89de631e5 100644
--- a/arch/m68k/include/asm/mvme16xhw.h
+++ b/arch/m68k/include/asm/mvme16xhw.h
@@ -3,21 +3,6 @@
 
 #include <asm/irq.h>
 
-/* Board ID data structure - pointer to this retrieved from Bug by head.S */
-
-/* Note, bytes 12 and 13 are board no in BCD (0162,0166,0167,0177,etc) */
-
-typedef struct {
-	char	bdid[4];
-	u_char	rev, mth, day, yr;
-	u_short	size, reserved;
-	u_short	brdno;
-	char brdsuffix[2];
-	u_long	options;
-	u_short	clun, dlun, ctype, dnum;
-	u_long	option2;
-} t_bdid, *p_bdid;
-
 
 typedef struct {
 	u_char	ack_icr,
diff --git a/arch/m68k/include/uapi/asm/bootinfo-vme.h b/arch/m68k/include/uapi/asm/bootinfo-vme.h
index 4643a55ed768..13ba5e19fe24 100644
--- a/arch/m68k/include/uapi/asm/bootinfo-vme.h
+++ b/arch/m68k/include/uapi/asm/bootinfo-vme.h
@@ -28,11 +28,32 @@
 #define VME_TYPE_BVME4000	0x4000	/* BVM Ltd. BVME4000 */
 #define VME_TYPE_BVME6000	0x6000	/* BVM Ltd. BVME6000 */
 
-/* BI_VME_BRDINFO is a 32 byte struct as returned by the Bug code on
+
+#ifndef __ASSEMBLY__
+
+/*
+ * Board ID data structure - pointer to this retrieved from Bug by head.S
+ *
+ * BI_VME_BRDINFO is a 32 byte struct as returned by the Bug code on
  * Motorola VME boards.  Contains board number, Bug version, board
- * configuration options, etc.  See include/asm/mvme16xhw.h for details.
+ * configuration options, etc.
+ *
+ * Note, bytes 12 and 13 are board no in BCD (0162,0166,0167,0177,etc)
  */
 
+typedef struct {
+	char	bdid[4];
+	u_char	rev, mth, day, yr;
+	u_short	size, reserved;
+	u_short	brdno;
+	char	brdsuffix[2];
+	u_long	options;
+	u_short	clun, dlun, ctype, dnum;
+	u_long	option2;
+} t_bdid, *p_bdid;
+
+#endif /* __ASSEMBLY__ */
+
 
     /*
      *  Latest VME bootinfo versions
-- 
cgit v1.2.3


From abe48101c17eaf1b5d85270272392e6111562626 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Fri, 4 Oct 2013 11:41:24 +0200
Subject: m68k/UAPI: Use proper types (endianness/size) in <asm/bootinfo*.h>

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/amiga/config.c                     | 18 ++++++++--------
 arch/m68k/apollo/config.c                    | 15 +++++++------
 arch/m68k/atari/config.c                     |  9 ++++----
 arch/m68k/bvme6000/config.c                  |  3 ++-
 arch/m68k/hp300/config.c                     |  9 ++++----
 arch/m68k/include/uapi/asm/bootinfo-amiga.h  | 14 ++++++------
 arch/m68k/include/uapi/asm/bootinfo-apollo.h |  2 +-
 arch/m68k/include/uapi/asm/bootinfo-atari.h  |  4 ++--
 arch/m68k/include/uapi/asm/bootinfo-hp300.h  |  6 +++---
 arch/m68k/include/uapi/asm/bootinfo-mac.h    |  2 +-
 arch/m68k/include/uapi/asm/bootinfo-vme.h    | 17 +++++++++------
 arch/m68k/include/uapi/asm/bootinfo.h        | 29 ++++++++++++++-----------
 arch/m68k/kernel/setup_mm.c                  | 31 +++++++++++++++++----------
 arch/m68k/mac/config.c                       | 32 +++++++++++++++-------------
 arch/m68k/mvme147/config.c                   |  4 +++-
 arch/m68k/mvme16x/config.c                   | 27 ++++++++++++-----------
 16 files changed, 124 insertions(+), 98 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c
index 0c92c1baf9bf..06920e85fac9 100644
--- a/arch/m68k/amiga/config.c
+++ b/arch/m68k/amiga/config.c
@@ -145,37 +145,37 @@ static struct resource ram_resource[NUM_MEMINFO];
 int __init amiga_parse_bootinfo(const struct bi_record *record)
 {
 	int unknown = 0;
-	const unsigned long *data = record->data;
+	const void *data = record->data;
 
-	switch (record->tag) {
+	switch (be16_to_cpu(record->tag)) {
 	case BI_AMIGA_MODEL:
-		amiga_model = *data;
+		amiga_model = be32_to_cpup(data);
 		break;
 
 	case BI_AMIGA_ECLOCK:
-		amiga_eclock = *data;
+		amiga_eclock = be32_to_cpup(data);
 		break;
 
 	case BI_AMIGA_CHIPSET:
-		amiga_chipset = *data;
+		amiga_chipset = be32_to_cpup(data);
 		break;
 
 	case BI_AMIGA_CHIP_SIZE:
-		amiga_chip_size = *(const int *)data;
+		amiga_chip_size = be32_to_cpup(data);
 		break;
 
 	case BI_AMIGA_VBLANK:
-		amiga_vblank = *(const unsigned char *)data;
+		amiga_vblank = *(const __u8 *)data;
 		break;
 
 	case BI_AMIGA_PSFREQ:
-		amiga_psfreq = *(const unsigned char *)data;
+		amiga_psfreq = *(const __u8 *)data;
 		break;
 
 	case BI_AMIGA_AUTOCON:
 #ifdef CONFIG_ZORRO
 		if (zorro_num_autocon < ZORRO_NUM_AUTO) {
-			const struct ConfigDev *cd = (struct ConfigDev *)data;
+			const struct ConfigDev *cd = data;
 			struct zorro_dev_init *dev = &zorro_autocon_init[zorro_num_autocon++];
 			dev->rom = cd->cd_Rom;
 			dev->slotaddr = be16_to_cpu(cd->cd_SlotAddr);
diff --git a/arch/m68k/apollo/config.c b/arch/m68k/apollo/config.c
index c90c19e904bc..9268c0f96376 100644
--- a/arch/m68k/apollo/config.c
+++ b/arch/m68k/apollo/config.c
@@ -11,6 +11,7 @@
 #include <asm/setup.h>
 #include <asm/bootinfo.h>
 #include <asm/bootinfo-apollo.h>
+#include <asm/byteorder.h>
 #include <asm/pgtable.h>
 #include <asm/apollohw.h>
 #include <asm/irq.h>
@@ -48,15 +49,15 @@ static const char *apollo_models[] = {
 int __init apollo_parse_bootinfo(const struct bi_record *record)
 {
 	int unknown = 0;
-	const unsigned long *data = record->data;
+	const void *data = record->data;
 
-	switch(record->tag) {
-		case BI_APOLLO_MODEL:
-			apollo_model=*data;
-			break;
+	switch (be16_to_cpu(record->tag)) {
+	case BI_APOLLO_MODEL:
+		apollo_model = be32_to_cpup(data);
+		break;
 
-		default:
-			 unknown=1;
+	default:
+		 unknown=1;
 	}
 
 	return unknown;
diff --git a/arch/m68k/atari/config.c b/arch/m68k/atari/config.c
index 9159195505d5..01a62161b08a 100644
--- a/arch/m68k/atari/config.c
+++ b/arch/m68k/atari/config.c
@@ -38,6 +38,7 @@
 
 #include <asm/bootinfo.h>
 #include <asm/bootinfo-atari.h>
+#include <asm/byteorder.h>
 #include <asm/setup.h>
 #include <asm/atarihw.h>
 #include <asm/atariints.h>
@@ -130,14 +131,14 @@ static int __init scc_test(volatile char *ctla)
 int __init atari_parse_bootinfo(const struct bi_record *record)
 {
 	int unknown = 0;
-	const u_long *data = record->data;
+	const void *data = record->data;
 
-	switch (record->tag) {
+	switch (be16_to_cpu(record->tag)) {
 	case BI_ATARI_MCH_COOKIE:
-		atari_mch_cookie = *data;
+		atari_mch_cookie = be32_to_cpup(data);
 		break;
 	case BI_ATARI_MCH_TYPE:
-		atari_mch_type = *data;
+		atari_mch_type = be32_to_cpup(data);
 		break;
 	default:
 		unknown = 1;
diff --git a/arch/m68k/bvme6000/config.c b/arch/m68k/bvme6000/config.c
index 4d1b403822fa..478623dbb209 100644
--- a/arch/m68k/bvme6000/config.c
+++ b/arch/m68k/bvme6000/config.c
@@ -29,6 +29,7 @@
 
 #include <asm/bootinfo.h>
 #include <asm/bootinfo-vme.h>
+#include <asm/byteorder.h>
 #include <asm/pgtable.h>
 #include <asm/setup.h>
 #include <asm/irq.h>
@@ -53,7 +54,7 @@ static irq_handler_t tick_handler;
 
 int __init bvme6000_parse_bootinfo(const struct bi_record *bi)
 {
-	if (bi->tag == BI_VME_TYPE)
+	if (be16_to_cpu(bi->tag) == BI_VME_TYPE)
 		return 0;
 	else
 		return 1;
diff --git a/arch/m68k/hp300/config.c b/arch/m68k/hp300/config.c
index 892d0794fc0b..2e5a787ea11b 100644
--- a/arch/m68k/hp300/config.c
+++ b/arch/m68k/hp300/config.c
@@ -15,6 +15,7 @@
 
 #include <asm/bootinfo.h>
 #include <asm/bootinfo-hp300.h>
+#include <asm/byteorder.h>
 #include <asm/machdep.h>
 #include <asm/blinken.h>
 #include <asm/io.h>                               /* readb() and writeb() */
@@ -71,15 +72,15 @@ extern int hp300_setup_serial_console(void) __init;
 int __init hp300_parse_bootinfo(const struct bi_record *record)
 {
 	int unknown = 0;
-	const unsigned long *data = record->data;
+	const void *data = record->data;
 
-	switch (record->tag) {
+	switch (be16_to_cpu(record->tag)) {
 	case BI_HP300_MODEL:
-		hp300_model = *data;
+		hp300_model = be32_to_cpup(data);
 		break;
 
 	case BI_HP300_UART_SCODE:
-		hp300_uart_scode = *data;
+		hp300_uart_scode = be32_to_cpup(data);
 		break;
 
 	case BI_HP300_UART_ADDR:
diff --git a/arch/m68k/include/uapi/asm/bootinfo-amiga.h b/arch/m68k/include/uapi/asm/bootinfo-amiga.h
index 28b6da07874a..daad3c58d2da 100644
--- a/arch/m68k/include/uapi/asm/bootinfo-amiga.h
+++ b/arch/m68k/include/uapi/asm/bootinfo-amiga.h
@@ -10,15 +10,15 @@
      *  Amiga-specific tags
      */
 
-#define BI_AMIGA_MODEL		0x8000	/* model (u_long) */
+#define BI_AMIGA_MODEL		0x8000	/* model (__be32) */
 #define BI_AMIGA_AUTOCON	0x8001	/* AutoConfig device */
 					/* (AmigaOS struct ConfigDev) */
-#define BI_AMIGA_CHIP_SIZE	0x8002	/* size of Chip RAM (u_long) */
-#define BI_AMIGA_VBLANK		0x8003	/* VBLANK frequency (u_char) */
-#define BI_AMIGA_PSFREQ		0x8004	/* power supply frequency (u_char) */
-#define BI_AMIGA_ECLOCK		0x8005	/* EClock frequency (u_long) */
-#define BI_AMIGA_CHIPSET	0x8006	/* native chipset present (u_long) */
-#define BI_AMIGA_SERPER		0x8007	/* serial port period (u_short) */
+#define BI_AMIGA_CHIP_SIZE	0x8002	/* size of Chip RAM (__be32) */
+#define BI_AMIGA_VBLANK		0x8003	/* VBLANK frequency (__u8) */
+#define BI_AMIGA_PSFREQ		0x8004	/* power supply frequency (__u8) */
+#define BI_AMIGA_ECLOCK		0x8005	/* EClock frequency (__be32) */
+#define BI_AMIGA_CHIPSET	0x8006	/* native chipset present (__be32) */
+#define BI_AMIGA_SERPER		0x8007	/* serial port period (__be16) */
 
 
     /*
diff --git a/arch/m68k/include/uapi/asm/bootinfo-apollo.h b/arch/m68k/include/uapi/asm/bootinfo-apollo.h
index 69923697e131..a93e0af1c6fe 100644
--- a/arch/m68k/include/uapi/asm/bootinfo-apollo.h
+++ b/arch/m68k/include/uapi/asm/bootinfo-apollo.h
@@ -10,7 +10,7 @@
      *  Apollo-specific tags
      */
 
-#define BI_APOLLO_MODEL		0x8000	/* model (u_long) */
+#define BI_APOLLO_MODEL		0x8000	/* model (__be32) */
 
 
     /*
diff --git a/arch/m68k/include/uapi/asm/bootinfo-atari.h b/arch/m68k/include/uapi/asm/bootinfo-atari.h
index cca0a83fc0e5..a817854049bb 100644
--- a/arch/m68k/include/uapi/asm/bootinfo-atari.h
+++ b/arch/m68k/include/uapi/asm/bootinfo-atari.h
@@ -10,8 +10,8 @@
      *  Atari-specific tags
      */
 
-#define BI_ATARI_MCH_COOKIE	0x8000	/* _MCH cookie from TOS (u_long) */
-#define BI_ATARI_MCH_TYPE	0x8001	/* special machine type (u_long) */
+#define BI_ATARI_MCH_COOKIE	0x8000	/* _MCH cookie from TOS (__be32) */
+#define BI_ATARI_MCH_TYPE	0x8001	/* special machine type (__be32) */
 
 
     /*
diff --git a/arch/m68k/include/uapi/asm/bootinfo-hp300.h b/arch/m68k/include/uapi/asm/bootinfo-hp300.h
index 08530e12d4c3..c90cb71ed89a 100644
--- a/arch/m68k/include/uapi/asm/bootinfo-hp300.h
+++ b/arch/m68k/include/uapi/asm/bootinfo-hp300.h
@@ -10,9 +10,9 @@
      *  HP9000/300-specific tags
      */
 
-#define BI_HP300_MODEL		0x8000	/* model (u_long) */
-#define BI_HP300_UART_SCODE	0x8001	/* UART select code (u_long) */
-#define BI_HP300_UART_ADDR	0x8002	/* phys. addr of UART (u_long) */
+#define BI_HP300_MODEL		0x8000	/* model (__be32) */
+#define BI_HP300_UART_SCODE	0x8001	/* UART select code (__be32) */
+#define BI_HP300_UART_ADDR	0x8002	/* phys. addr of UART (__be32) */
 
 
     /*
diff --git a/arch/m68k/include/uapi/asm/bootinfo-mac.h b/arch/m68k/include/uapi/asm/bootinfo-mac.h
index 971046f49bb9..b44ff73898a9 100644
--- a/arch/m68k/include/uapi/asm/bootinfo-mac.h
+++ b/arch/m68k/include/uapi/asm/bootinfo-mac.h
@@ -7,7 +7,7 @@
 
 
     /*
-     *  Macintosh-specific tags (all u_long)
+     *  Macintosh-specific tags (all __be32)
      */
 
 #define BI_MAC_MODEL		0x8000	/* Mac Gestalt ID (model type) */
diff --git a/arch/m68k/include/uapi/asm/bootinfo-vme.h b/arch/m68k/include/uapi/asm/bootinfo-vme.h
index 13ba5e19fe24..a135eb41d672 100644
--- a/arch/m68k/include/uapi/asm/bootinfo-vme.h
+++ b/arch/m68k/include/uapi/asm/bootinfo-vme.h
@@ -6,11 +6,14 @@
 #define _UAPI_ASM_M68K_BOOTINFO_VME_H
 
 
+#include <linux/types.h>
+
+
     /*
      *  VME-specific tags
      */
 
-#define BI_VME_TYPE		0x8000	/* VME sub-architecture (u_long) */
+#define BI_VME_TYPE		0x8000	/* VME sub-architecture (__be32) */
 #define BI_VME_BRDINFO		0x8001	/* VME board information (struct) */
 
 
@@ -43,13 +46,13 @@
 
 typedef struct {
 	char	bdid[4];
-	u_char	rev, mth, day, yr;
-	u_short	size, reserved;
-	u_short	brdno;
+	__u8	rev, mth, day, yr;
+	__be16	size, reserved;
+	__be16	brdno;
 	char	brdsuffix[2];
-	u_long	options;
-	u_short	clun, dlun, ctype, dnum;
-	u_long	option2;
+	__be32	options;
+	__be16	clun, dlun, ctype, dnum;
+	__be32	option2;
 } t_bdid, *p_bdid;
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/m68k/include/uapi/asm/bootinfo.h b/arch/m68k/include/uapi/asm/bootinfo.h
index 9d64599d7bf5..cdeb26a015b0 100644
--- a/arch/m68k/include/uapi/asm/bootinfo.h
+++ b/arch/m68k/include/uapi/asm/bootinfo.h
@@ -12,6 +12,9 @@
 #define _UAPI_ASM_M68K_BOOTINFO_H
 
 
+#include <linux/types.h>
+
+
 #ifndef __ASSEMBLY__
 
     /*
@@ -28,15 +31,15 @@
      */
 
 struct bi_record {
-	unsigned short tag;		/* tag ID */
-	unsigned short size;		/* size of record (in bytes) */
-	unsigned long data[0];		/* data */
+	__be16 tag;			/* tag ID */
+	__be16 size;			/* size of record (in bytes) */
+	__be32 data[0];			/* data */
 };
 
 
 struct mem_info {
-	unsigned long addr;		/* physical address of memory chunk */
-	unsigned long size;		/* length of memory chunk (in bytes) */
+	__be32 addr;			/* physical address of memory chunk */
+	__be32 size;			/* length of memory chunk (in bytes) */
 };
 
 #endif /* __ASSEMBLY__ */
@@ -50,10 +53,10 @@ struct mem_info {
      */
 
 #define BI_LAST			0x0000	/* last record (sentinel) */
-#define BI_MACHTYPE		0x0001	/* machine type (u_long) */
-#define BI_CPUTYPE		0x0002	/* cpu type (u_long) */
-#define BI_FPUTYPE		0x0003	/* fpu type (u_long) */
-#define BI_MMUTYPE		0x0004	/* mmu type (u_long) */
+#define BI_MACHTYPE		0x0001	/* machine type (__be32) */
+#define BI_CPUTYPE		0x0002	/* cpu type (__be32) */
+#define BI_FPUTYPE		0x0003	/* fpu type (__be32) */
+#define BI_MMUTYPE		0x0004	/* mmu type (__be32) */
 #define BI_MEMCHUNK		0x0005	/* memory chunk address and size */
 					/* (struct mem_info) */
 #define BI_RAMDISK		0x0006	/* ramdisk address and size */
@@ -157,11 +160,11 @@ struct mem_info {
 #ifndef __ASSEMBLY__
 
 struct bootversion {
-	unsigned short branch;
-	unsigned long magic;
+	__be16 branch;
+	__be32 magic;
 	struct {
-		unsigned long machtype;
-		unsigned long version;
+		__be32 machtype;
+		__be32 version;
 	} machversions[0];
 } __packed;
 
diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c
index cff9845708c0..0191485d7b7d 100644
--- a/arch/m68k/kernel/setup_mm.c
+++ b/arch/m68k/kernel/setup_mm.c
@@ -26,6 +26,7 @@
 #include <linux/initrd.h>
 
 #include <asm/bootinfo.h>
+#include <asm/byteorder.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
 #include <asm/fpu.h>
@@ -143,11 +144,14 @@ extern void paging_init(void);
 
 static void __init m68k_parse_bootinfo(const struct bi_record *record)
 {
-	while (record->tag != BI_LAST) {
+	uint16_t tag;
+
+	while ((tag = be16_to_cpu(record->tag)) != BI_LAST) {
 		int unknown = 0;
-		const unsigned long *data = record->data;
+		const void *data = record->data;
+		uint16_t size = be16_to_cpu(record->size);
 
-		switch (record->tag) {
+		switch (tag) {
 		case BI_MACHTYPE:
 		case BI_CPUTYPE:
 		case BI_FPUTYPE:
@@ -157,8 +161,11 @@ static void __init m68k_parse_bootinfo(const struct bi_record *record)
 
 		case BI_MEMCHUNK:
 			if (m68k_num_memory < NUM_MEMINFO) {
-				m68k_memory[m68k_num_memory].addr = data[0];
-				m68k_memory[m68k_num_memory].size = data[1];
+				const struct mem_info *m = data;
+				m68k_memory[m68k_num_memory].addr =
+					be32_to_cpu(m->addr);
+				m68k_memory[m68k_num_memory].size =
+					be32_to_cpu(m->size);
 				m68k_num_memory++;
 			} else
 				pr_warn("%s: too many memory chunks\n",
@@ -166,12 +173,15 @@ static void __init m68k_parse_bootinfo(const struct bi_record *record)
 			break;
 
 		case BI_RAMDISK:
-			m68k_ramdisk.addr = data[0];
-			m68k_ramdisk.size = data[1];
+			{
+				const struct mem_info *m = data;
+				m68k_ramdisk.addr = be32_to_cpu(m->addr);
+				m68k_ramdisk.size = be32_to_cpu(m->size);
+			}
 			break;
 
 		case BI_COMMAND_LINE:
-			strlcpy(m68k_command_line, (const char *)data,
+			strlcpy(m68k_command_line, data,
 				sizeof(m68k_command_line));
 			break;
 
@@ -199,9 +209,8 @@ static void __init m68k_parse_bootinfo(const struct bi_record *record)
 		}
 		if (unknown)
 			pr_warn("%s: unknown tag 0x%04x ignored\n", __func__,
-				   record->tag);
-		record = (struct bi_record *)((unsigned long)record +
-					      record->size);
+				tag);
+		record = (struct bi_record *)((unsigned long)record + size);
 	}
 
 	m68k_realnum_memory = m68k_num_memory;
diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c
index e48069da04ed..982c3fe73c4a 100644
--- a/arch/m68k/mac/config.c
+++ b/arch/m68k/mac/config.c
@@ -29,6 +29,7 @@
 #include <asm/setup.h>
 #include <asm/bootinfo.h>
 #include <asm/bootinfo-mac.h>
+#include <asm/byteorder.h>
 
 #include <asm/io.h>
 #include <asm/irq.h>
@@ -107,45 +108,46 @@ static void __init mac_sched_init(irq_handler_t vector)
 int __init mac_parse_bootinfo(const struct bi_record *record)
 {
 	int unknown = 0;
-	const u_long *data = record->data;
+	const void *data = record->data;
 
-	switch (record->tag) {
+	switch (be16_to_cpu(record->tag)) {
 	case BI_MAC_MODEL:
-		mac_bi_data.id = *data;
+		mac_bi_data.id = be32_to_cpup(data);
 		break;
 	case BI_MAC_VADDR:
-		mac_bi_data.videoaddr = *data;
+		mac_bi_data.videoaddr = be32_to_cpup(data);
 		break;
 	case BI_MAC_VDEPTH:
-		mac_bi_data.videodepth = *data;
+		mac_bi_data.videodepth = be32_to_cpup(data);
 		break;
 	case BI_MAC_VROW:
-		mac_bi_data.videorow = *data;
+		mac_bi_data.videorow = be32_to_cpup(data);
 		break;
 	case BI_MAC_VDIM:
-		mac_bi_data.dimensions = *data;
+		mac_bi_data.dimensions = be32_to_cpup(data);
 		break;
 	case BI_MAC_VLOGICAL:
-		mac_bi_data.videological = VIDEOMEMBASE + (*data & ~VIDEOMEMMASK);
-		mac_orig_videoaddr = *data;
+		mac_orig_videoaddr = be32_to_cpup(data);
+		mac_bi_data.videological =
+			VIDEOMEMBASE + (mac_orig_videoaddr & ~VIDEOMEMMASK);
 		break;
 	case BI_MAC_SCCBASE:
-		mac_bi_data.sccbase = *data;
+		mac_bi_data.sccbase = be32_to_cpup(data);
 		break;
 	case BI_MAC_BTIME:
-		mac_bi_data.boottime = *data;
+		mac_bi_data.boottime = be32_to_cpup(data);
 		break;
 	case BI_MAC_GMTBIAS:
-		mac_bi_data.gmtbias = *data;
+		mac_bi_data.gmtbias = be32_to_cpup(data);
 		break;
 	case BI_MAC_MEMSIZE:
-		mac_bi_data.memsize = *data;
+		mac_bi_data.memsize = be32_to_cpup(data);
 		break;
 	case BI_MAC_CPUID:
-		mac_bi_data.cpuid = *data;
+		mac_bi_data.cpuid = be32_to_cpup(data);
 		break;
 	case BI_MAC_ROMBASE:
-		mac_bi_data.rombase = *data;
+		mac_bi_data.rombase = be32_to_cpup(data);
 		break;
 	default:
 		unknown = 1;
diff --git a/arch/m68k/mvme147/config.c b/arch/m68k/mvme147/config.c
index 3a1d47564e52..1bb3ce6634d3 100644
--- a/arch/m68k/mvme147/config.c
+++ b/arch/m68k/mvme147/config.c
@@ -27,6 +27,7 @@
 
 #include <asm/bootinfo.h>
 #include <asm/bootinfo-vme.h>
+#include <asm/byteorder.h>
 #include <asm/pgtable.h>
 #include <asm/setup.h>
 #include <asm/irq.h>
@@ -54,7 +55,8 @@ irq_handler_t tick_handler;
 
 int __init mvme147_parse_bootinfo(const struct bi_record *bi)
 {
-	if (bi->tag == BI_VME_TYPE || bi->tag == BI_VME_BRDINFO)
+	uint16_t tag = be16_to_cpu(bi->tag);
+	if (tag == BI_VME_TYPE || tag == BI_VME_BRDINFO)
 		return 0;
 	else
 		return 1;
diff --git a/arch/m68k/mvme16x/config.c b/arch/m68k/mvme16x/config.c
index e05994fd048c..eab7d342757e 100644
--- a/arch/m68k/mvme16x/config.c
+++ b/arch/m68k/mvme16x/config.c
@@ -30,6 +30,7 @@
 
 #include <asm/bootinfo.h>
 #include <asm/bootinfo-vme.h>
+#include <asm/byteorder.h>
 #include <asm/pgtable.h>
 #include <asm/setup.h>
 #include <asm/irq.h>
@@ -63,7 +64,8 @@ EXPORT_SYMBOL(mvme16x_config);
 
 int __init mvme16x_parse_bootinfo(const struct bi_record *bi)
 {
-	if (bi->tag == BI_VME_TYPE || bi->tag == BI_VME_BRDINFO)
+	uint16_t tag = be16_to_cpu(bi->tag);
+	if (tag == BI_VME_TYPE || tag == BI_VME_BRDINFO)
 		return 0;
 	else
 		return 1;
@@ -88,15 +90,15 @@ static void mvme16x_get_model(char *model)
     suf[3] = '\0';
     suf[0] = suf[1] ? '-' : '\0';
 
-    sprintf(model, "Motorola MVME%x%s", p->brdno, suf);
+    sprintf(model, "Motorola MVME%x%s", be16_to_cpu(p->brdno), suf);
 }
 
 
 static void mvme16x_get_hardware_list(struct seq_file *m)
 {
-    p_bdid p = &mvme_bdid;
+    uint16_t brdno = be16_to_cpu(mvme_bdid.brdno);
 
-    if (p->brdno == 0x0162 || p->brdno == 0x0172)
+    if (brdno == 0x0162 || brdno == 0x0172)
     {
 	unsigned char rev = *(unsigned char *)MVME162_VERSION_REG;
 
@@ -286,6 +288,7 @@ void __init config_mvme16x(void)
 {
     p_bdid p = &mvme_bdid;
     char id[40];
+    uint16_t brdno = be16_to_cpu(p->brdno);
 
     mach_max_dma_address = 0xffffffff;
     mach_sched_init      = mvme16x_sched_init;
@@ -307,18 +310,18 @@ void __init config_mvme16x(void)
     }
     /* Board type is only set by newer versions of vmelilo/tftplilo */
     if (vme_brdtype == 0)
-	vme_brdtype = p->brdno;
+	vme_brdtype = brdno;
 
     mvme16x_get_model(id);
     printk ("\nBRD_ID: %s   BUG %x.%x %02x/%02x/%02x\n", id, p->rev>>4,
 					p->rev&0xf, p->yr, p->mth, p->day);
-    if (p->brdno == 0x0162 || p->brdno == 0x172)
+    if (brdno == 0x0162 || brdno == 0x172)
     {
 	unsigned char rev = *(unsigned char *)MVME162_VERSION_REG;
 
 	mvme16x_config = rev | MVME16x_CONFIG_GOT_SCCA;
 
-	printk ("MVME%x Hardware status:\n", p->brdno);
+	printk ("MVME%x Hardware status:\n", brdno);
 	printk ("    CPU Type           68%s040\n",
 			rev & MVME16x_CONFIG_GOT_FPU ? "" : "LC");
 	printk ("    CPU clock          %dMHz\n",
@@ -348,12 +351,12 @@ void __init config_mvme16x(void)
 
 static irqreturn_t mvme16x_abort_int (int irq, void *dev_id)
 {
-	p_bdid p = &mvme_bdid;
 	unsigned long *new = (unsigned long *)vectors;
 	unsigned long *old = (unsigned long *)0xffe00000;
 	volatile unsigned char uc, *ucp;
+	uint16_t brdno = be16_to_cpu(mvme_bdid.brdno);
 
-	if (p->brdno == 0x0162 || p->brdno == 0x172)
+	if (brdno == 0x0162 || brdno == 0x172)
 	{
 		ucp = (volatile unsigned char *)0xfff42043;
 		uc = *ucp | 8;
@@ -367,7 +370,7 @@ static irqreturn_t mvme16x_abort_int (int irq, void *dev_id)
 	*(new+9) = *(old+9);		/* Trace */
 	*(new+47) = *(old+47);		/* Trap #15 */
 
-	if (p->brdno == 0x0162 || p->brdno == 0x172)
+	if (brdno == 0x0162 || brdno == 0x172)
 		*(new+0x5e) = *(old+0x5e);	/* ABORT switch */
 	else
 		*(new+0x6e) = *(old+0x6e);	/* ABORT switch */
@@ -382,7 +385,7 @@ static irqreturn_t mvme16x_timer_int (int irq, void *dev_id)
 
 void mvme16x_sched_init (irq_handler_t timer_routine)
 {
-    p_bdid p = &mvme_bdid;
+    uint16_t brdno = be16_to_cpu(mvme_bdid.brdno);
     int irq;
 
     tick_handler = timer_routine;
@@ -395,7 +398,7 @@ void mvme16x_sched_init (irq_handler_t timer_routine)
 				"timer", mvme16x_timer_int))
 	panic ("Couldn't register timer int");
 
-    if (p->brdno == 0x0162 || p->brdno == 0x172)
+    if (brdno == 0x0162 || brdno == 0x172)
 	irq = MVME162_IRQ_ABORT;
     else
         irq = MVME167_IRQ_ABORT;
-- 
cgit v1.2.3


From 371001e502e8cd3543f7b9907d398a112939dff7 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Sat, 5 Oct 2013 21:14:22 +0200
Subject: m68k: Remove superfluous inclusions of <asm/bootinfo.h>

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/mac/iop.c                     | 1 -
 arch/m68k/mac/misc.c                    | 1 -
 arch/m68k/mac/oss.c                     | 1 -
 arch/m68k/mac/psc.c                     | 1 -
 arch/m68k/mac/via.c                     | 1 -
 arch/m68k/sun3x/prom.c                  | 1 -
 drivers/net/ethernet/natsemi/macsonic.c | 1 -
 drivers/video/macfb.c                   | 1 -
 drivers/video/valkyriefb.c              | 1 -
 9 files changed, 9 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/mac/iop.c b/arch/m68k/mac/iop.c
index ed0d9b0473ea..4d2adfb32a2a 100644
--- a/arch/m68k/mac/iop.c
+++ b/arch/m68k/mac/iop.c
@@ -111,7 +111,6 @@
 #include <linux/init.h>
 #include <linux/interrupt.h>
 
-#include <asm/bootinfo.h>
 #include <asm/macintosh.h>
 #include <asm/macints.h>
 #include <asm/mac_iop.h>
diff --git a/arch/m68k/mac/misc.c b/arch/m68k/mac/misc.c
index 3d02a3c3a93a..707b61aea203 100644
--- a/arch/m68k/mac/misc.c
+++ b/arch/m68k/mac/misc.c
@@ -25,7 +25,6 @@
 #include <asm/mac_via.h>
 #include <asm/mac_oss.h>
 
-#include <asm/bootinfo.h>
 #include <asm/machdep.h>
 
 /* Offset between Unix time (1970-based) and Mac time (1904-based) */
diff --git a/arch/m68k/mac/oss.c b/arch/m68k/mac/oss.c
index 6c4c882c126e..54037125ebf8 100644
--- a/arch/m68k/mac/oss.c
+++ b/arch/m68k/mac/oss.c
@@ -21,7 +21,6 @@
 #include <linux/init.h>
 #include <linux/irq.h>
 
-#include <asm/bootinfo.h>
 #include <asm/macintosh.h>
 #include <asm/macints.h>
 #include <asm/mac_via.h>
diff --git a/arch/m68k/mac/psc.c b/arch/m68k/mac/psc.c
index cc9f3f097be7..835fa04511c8 100644
--- a/arch/m68k/mac/psc.c
+++ b/arch/m68k/mac/psc.c
@@ -21,7 +21,6 @@
 #include <linux/irq.h>
 
 #include <asm/traps.h>
-#include <asm/bootinfo.h>
 #include <asm/macintosh.h>
 #include <asm/macints.h>
 #include <asm/mac_psc.h>
diff --git a/arch/m68k/mac/via.c b/arch/m68k/mac/via.c
index 5d1458bb871b..e198dec868e4 100644
--- a/arch/m68k/mac/via.c
+++ b/arch/m68k/mac/via.c
@@ -30,7 +30,6 @@
 #include <linux/module.h>
 #include <linux/irq.h>
 
-#include <asm/bootinfo.h>
 #include <asm/macintosh.h>
 #include <asm/macints.h>
 #include <asm/mac_via.h>
diff --git a/arch/m68k/sun3x/prom.c b/arch/m68k/sun3x/prom.c
index a7b7e818d627..0898c3f81508 100644
--- a/arch/m68k/sun3x/prom.c
+++ b/arch/m68k/sun3x/prom.c
@@ -10,7 +10,6 @@
 
 #include <asm/page.h>
 #include <asm/pgtable.h>
-#include <asm/bootinfo.h>
 #include <asm/setup.h>
 #include <asm/traps.h>
 #include <asm/sun3xprom.h>
diff --git a/drivers/net/ethernet/natsemi/macsonic.c b/drivers/net/ethernet/natsemi/macsonic.c
index 346a4e025c34..04b3ec1352f1 100644
--- a/drivers/net/ethernet/natsemi/macsonic.c
+++ b/drivers/net/ethernet/natsemi/macsonic.c
@@ -52,7 +52,6 @@
 #include <linux/bitrev.h>
 #include <linux/slab.h>
 
-#include <asm/bootinfo.h>
 #include <asm/pgtable.h>
 #include <asm/io.h>
 #include <asm/hwtest.h>
diff --git a/drivers/video/macfb.c b/drivers/video/macfb.c
index 5bd2eb8d4f39..cda7587cbc86 100644
--- a/drivers/video/macfb.c
+++ b/drivers/video/macfb.c
@@ -34,7 +34,6 @@
 #include <linux/fb.h>
 
 #include <asm/setup.h>
-#include <asm/bootinfo.h>
 #include <asm/macintosh.h>
 #include <asm/io.h>
 
diff --git a/drivers/video/valkyriefb.c b/drivers/video/valkyriefb.c
index e287ebc47817..97cb9bd1d1dd 100644
--- a/drivers/video/valkyriefb.c
+++ b/drivers/video/valkyriefb.c
@@ -56,7 +56,6 @@
 #include <linux/cuda.h>
 #include <asm/io.h>
 #ifdef CONFIG_MAC
-#include <asm/bootinfo.h>
 #include <asm/macintosh.h>
 #else
 #include <asm/prom.h>
-- 
cgit v1.2.3


From 7a15dd5c4b9d916cb1734dfd79dbdd5773772abb Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Fri, 18 Oct 2013 09:05:22 +0200
Subject: m68k/atari: Call paging_init() before nf_init()

nf_init() uses virt_to_phys(), which depends on m68k_memoffset being set and
module_fixup() having been called, but this is only done in paging_init().
Hence call paging_init() before nf_init().

This went unnoticed, as virt_to_phys() is a no-op on Atari, unless you start
fiddling with the memory blocks in the bootinfo manually.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/kernel/setup_mm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c
index 0191485d7b7d..28abca421974 100644
--- a/arch/m68k/kernel/setup_mm.c
+++ b/arch/m68k/kernel/setup_mm.c
@@ -346,12 +346,12 @@ void __init setup_arch(char **cmdline_p)
 		panic("No configuration setup");
 	}
 
+	paging_init();
+
 #ifdef CONFIG_NATFEAT
 	nf_init();
 #endif
 
-	paging_init();
-
 #ifndef CONFIG_SUN3
 	for (i = 1; i < m68k_num_memory; i++)
 		free_bootmem_node(NODE_DATA(i), m68k_memory[i].addr,
-- 
cgit v1.2.3


From 017cecee99d897ae426f3948e7b970eb4f95e1f4 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Fri, 18 Oct 2013 13:10:08 +0200
Subject: m68k: Add infrastructure for machine-specific random_get_entropy()

On m68k, get_cycles() (the default implementation for random_get_entropy())
always returns zero, providing no entropy for the random driver.

Add a hook where platforms can provide their own implementation, and wire
it up in the infrastructure provided by commit
61875f30daf60305712e25b209ef41ced2635bad ("random: allow architectures to
optionally define random_get_entropy()").

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/include/asm/timex.h | 10 ++++++++++
 arch/m68k/kernel/time.c       |  4 ++++
 2 files changed, 14 insertions(+)

(limited to 'arch')

diff --git a/arch/m68k/include/asm/timex.h b/arch/m68k/include/asm/timex.h
index 6759dad954f6..efc1f4892357 100644
--- a/arch/m68k/include/asm/timex.h
+++ b/arch/m68k/include/asm/timex.h
@@ -28,4 +28,14 @@ static inline cycles_t get_cycles(void)
 	return 0;
 }
 
+extern unsigned long (*mach_random_get_entropy)(void);
+
+static inline unsigned long random_get_entropy(void)
+{
+	if (mach_random_get_entropy)
+		return mach_random_get_entropy();
+	return 0;
+}
+#define random_get_entropy	random_get_entropy
+
 #endif
diff --git a/arch/m68k/kernel/time.c b/arch/m68k/kernel/time.c
index 7eb9792009f8..958f1adb9d0c 100644
--- a/arch/m68k/kernel/time.c
+++ b/arch/m68k/kernel/time.c
@@ -28,6 +28,10 @@
 #include <linux/timex.h>
 #include <linux/profile.h>
 
+
+unsigned long (*mach_random_get_entropy)(void);
+
+
 /*
  * timer_interrupt() needs to keep up the real-time clock,
  * as well as call the "xtime_update()" routine every clocktick
-- 
cgit v1.2.3


From 50190edb2a56bb9efa3e2f8ca32482dbd3e42412 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Fri, 18 Oct 2013 13:16:06 +0200
Subject: m68k/amiga: Provide mach_random_get_entropy()

Use the beam position registers, which provide at least 17 bits of data
changing at 1.79 MHz.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/amiga/config.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'arch')

diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c
index 06920e85fac9..c425fa6c2795 100644
--- a/arch/m68k/amiga/config.c
+++ b/arch/m68k/amiga/config.c
@@ -360,6 +360,14 @@ static void __init amiga_identify(void)
 #undef AMIGAHW_ANNOUNCE
 }
 
+
+static unsigned long amiga_random_get_entropy(void)
+{
+	/* VPOSR/VHPOSR provide at least 17 bits of data changing at 1.79 MHz */
+	return *(unsigned long *)&amiga_custom.vposr;
+}
+
+
     /*
      *  Setup the Amiga configuration info
      */
@@ -397,6 +405,8 @@ void __init config_amiga(void)
 	mach_heartbeat = amiga_heartbeat;
 #endif
 
+	mach_random_get_entropy = amiga_random_get_entropy;
+
 	/* Fill in the clock value (based on the 700 kHz E-Clock) */
 	amiga_colorclock = 5*amiga_eclock;	/* 3.5 MHz */
 
-- 
cgit v1.2.3


From 7972e966b032939afe63d8db22975240879dfd2a Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Mon, 25 Nov 2013 23:23:07 +0000
Subject: MIPS: Remove panic_timeout settings

Now that we have a CONFIG_PANIC_TIMEOUT=x setting, remove the
mips settings. The default is 0, which means don't reboot on
panic.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Acked-by: Shinya Kuribayashi <skuribay@pobox.com>
Signed-off-by: Jason Baron <jbaron@akamai.com>
Cc: benh@kernel.crashing.org
Cc: paulus@samba.org
Cc: mpe@ellerman.id.au
Cc: felipe.contreras@gmail.com
Cc: linux-mips@linux-mips.org
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/d19dc75fca343ec5d9ada75a1400f57330021976.1385418410.git.jbaron@akamai.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/mips/ar7/setup.c           | 1 -
 arch/mips/emma/markeins/setup.c | 3 ---
 arch/mips/netlogic/xlp/setup.c  | 1 -
 arch/mips/netlogic/xlr/setup.c  | 1 -
 arch/mips/sibyte/swarm/setup.c  | 2 --
 5 files changed, 8 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/ar7/setup.c b/arch/mips/ar7/setup.c
index 9a357fffcfbe..820b7a313d9b 100644
--- a/arch/mips/ar7/setup.c
+++ b/arch/mips/ar7/setup.c
@@ -92,7 +92,6 @@ void __init plat_mem_setup(void)
 	_machine_restart = ar7_machine_restart;
 	_machine_halt = ar7_machine_halt;
 	pm_power_off = ar7_machine_power_off;
-	panic_timeout = 3;
 
 	io_base = (unsigned long)ioremap(AR7_REGS_BASE, 0x10000);
 	if (!io_base)
diff --git a/arch/mips/emma/markeins/setup.c b/arch/mips/emma/markeins/setup.c
index d71005835c00..9100122e5cef 100644
--- a/arch/mips/emma/markeins/setup.c
+++ b/arch/mips/emma/markeins/setup.c
@@ -111,9 +111,6 @@ void __init plat_mem_setup(void)
 	iomem_resource.start = EMMA2RH_IO_BASE;
 	iomem_resource.end = EMMA2RH_ROM_BASE - 1;
 
-	/* Reboot on panic */
-	panic_timeout = 180;
-
 	markeins_sio_setup();
 }
 
diff --git a/arch/mips/netlogic/xlp/setup.c b/arch/mips/netlogic/xlp/setup.c
index 6d981bb337ec..54e75c77184b 100644
--- a/arch/mips/netlogic/xlp/setup.c
+++ b/arch/mips/netlogic/xlp/setup.c
@@ -92,7 +92,6 @@ static void __init xlp_init_mem_from_bars(void)
 
 void __init plat_mem_setup(void)
 {
-	panic_timeout	= 5;
 	_machine_restart = (void (*)(char *))nlm_linux_exit;
 	_machine_halt	= nlm_linux_exit;
 	pm_power_off	= nlm_linux_exit;
diff --git a/arch/mips/netlogic/xlr/setup.c b/arch/mips/netlogic/xlr/setup.c
index 214d123b79fa..921be5f77797 100644
--- a/arch/mips/netlogic/xlr/setup.c
+++ b/arch/mips/netlogic/xlr/setup.c
@@ -92,7 +92,6 @@ static void nlm_linux_exit(void)
 
 void __init plat_mem_setup(void)
 {
-	panic_timeout	= 5;
 	_machine_restart = (void (*)(char *))nlm_linux_exit;
 	_machine_halt	= nlm_linux_exit;
 	pm_power_off	= nlm_linux_exit;
diff --git a/arch/mips/sibyte/swarm/setup.c b/arch/mips/sibyte/swarm/setup.c
index 41707a245dea..3462c831d0ea 100644
--- a/arch/mips/sibyte/swarm/setup.c
+++ b/arch/mips/sibyte/swarm/setup.c
@@ -134,8 +134,6 @@ void __init plat_mem_setup(void)
 #error invalid SiByte board configuration
 #endif
 
-	panic_timeout = 5;  /* For debug.  */
-
 	board_be_handler = swarm_be_handler;
 
 	if (xicor_probe())
-- 
cgit v1.2.3


From b71d47c14fba6270c0b5a0d56639bf042017025b Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@akamai.com>
Date: Mon, 25 Nov 2013 23:23:11 +0000
Subject: powerpc: Clean up panic_timeout usage

Default CONFIG_PANIC_TIMEOUT to 180 seconds on powerpc. The
pSeries continue to set the timeout to 10 seconds at run-time.

Thus, there's a small window where we don't have the correct
value on pSeries, but if this is only run-time discoverable we
don't have a better option. In any case, if the user changes the
default setting of 180 seconds, we honor that user setting.

Signed-off-by: Jason Baron <jbaron@akamai.com>
Cc: benh@kernel.crashing.org
Cc: paulus@samba.org
Cc: ralf@linux-mips.org
Cc: mpe@ellerman.id.au
Cc: felipe.contreras@gmail.com
Cc: linuxppc-dev@lists.ozlabs.org
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/705bbe0f70fb20759151642ba0176a6414ec9f7a.1385418410.git.jbaron@akamai.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/powerpc/Kconfig                   | 4 ++++
 arch/powerpc/include/asm/setup.h       | 1 +
 arch/powerpc/kernel/setup_32.c         | 3 ---
 arch/powerpc/kernel/setup_64.c         | 3 ---
 arch/powerpc/platforms/pseries/setup.c | 2 +-
 5 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index b44b52c0a8f0..b2be8e8cb5c7 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -147,6 +147,10 @@ config EARLY_PRINTK
 	bool
 	default y
 
+config PANIC_TIMEOUT
+	int
+	default 180
+
 config COMPAT
 	bool
 	default y if PPC64
diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h
index 703a8412dac2..11ba86e17631 100644
--- a/arch/powerpc/include/asm/setup.h
+++ b/arch/powerpc/include/asm/setup.h
@@ -26,6 +26,7 @@ extern void reloc_got2(unsigned long);
 void check_for_initrd(void);
 void do_init_bootmem(void);
 void setup_panic(void);
+#define ARCH_PANIC_TIMEOUT 180
 
 #endif /* !__ASSEMBLY__ */
 
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index b903dc5cf944..2b0da27eaee4 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -296,9 +296,6 @@ void __init setup_arch(char **cmdline_p)
 	if (cpu_has_feature(CPU_FTR_UNIFIED_ID_CACHE))
 		ucache_bsize = icache_bsize = dcache_bsize;
 
-	/* reboot on panic */
-	panic_timeout = 180;
-
 	if (ppc_md.panic)
 		setup_panic();
 
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 4085aaa9478f..856dd4e99bfe 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -588,9 +588,6 @@ void __init setup_arch(char **cmdline_p)
 	dcache_bsize = ppc64_caches.dline_size;
 	icache_bsize = ppc64_caches.iline_size;
 
-	/* reboot on panic */
-	panic_timeout = 180;
-
 	if (ppc_md.panic)
 		setup_panic();
 
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index c1f190858701..6f76ae417f47 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -470,7 +470,7 @@ static long pseries_little_endian_exceptions(void)
 
 static void __init pSeries_setup_arch(void)
 {
-	panic_timeout = 10;
+	set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
 
 	/* Discover PIC type and setup ppc_md accordingly */
 	pseries_discover_pic();
-- 
cgit v1.2.3


From 4788e5b4b2338f85fa42a712a182d8afd65d7c58 Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Tue, 12 Nov 2013 17:58:50 +0100
Subject: perf/x86: Add Intel RAPL PMU support

This patch adds a new uncore PMU to expose the Intel
RAPL energy consumption counters. Up to 3 counters,
each counting a particular RAPL event are exposed.

The RAPL counters are available on Intel SandyBridge,
IvyBridge, Haswell. The server skus add a 3rd counter.

The following events are available and exposed in sysfs:

  - power/energy-cores: power consumption of all cores on socket
  - power/energy-pkg: power consumption of all cores + LLc cache
  - power/energy-dram: power consumption of DRAM (servers only)

For each event both the unit (Joules) and scale (2^-32 J)
is exposed in sysfs for use by perf stat and other tools.
The files are:

	/sys/devices/power/events/energy-*.unit
	/sys/devices/power/events/energy-*.scale

The RAPL PMU is uncore by nature and is implemented such
that it only works in system-wide mode. Measuring only
one CPU per socket is sufficient. The /sys/devices/power/cpumask
file can be used by tools to figure out which CPUs to monitor
by default. For instance, on a 2-socket system, 2 CPUs
(one on each socket) will be shown.

All the counters measure in the same unit (exposed via sysfs).
The perf_events API exposes all RAPL counters as 64-bit integers
counting in unit of 1/2^32 Joules (about 0.23 nJ). User level tools
must convert the counts by multiplying them by 2^-32 to obtain
Joules. The reason for this is that the kernel avoids
doing floating point math whenever possible because it is
expensive (user floating-point state must be saved). The method
used avoids kernel floating-point usage. There is no loss of
precision. Thanks to PeterZ for suggesting this approach.

To convert the raw count in Watt:
   W = C * 2.3 / (1e10 * time)
or ldexp(C, -32).

RAPL PMU is a new standalone PMU which registers with the
perf_event core subsystem. The PMU type (attr->type) is
dynamically allocated and is available from /sys/device/power/type.

Sampling is not supported by the RAPL PMU. There is no
privilege level filtering either.

Signed-off-by: Stephane Eranian <eranian@google.com>
Reviewed-by: Maria Dimakopoulou <maria.n.dimakopoulou@gmail.com>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: acme@redhat.com
Cc: jolsa@redhat.com
Cc: zheng.z.yan@intel.com
Cc: bp@alien8.de
Link: http://lkml.kernel.org/r/1384275531-10892-4-git-send-email-eranian@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/Makefile                |   2 +-
 arch/x86/kernel/cpu/perf_event_intel_rapl.c | 591 ++++++++++++++++++++++++++++
 2 files changed, 592 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/kernel/cpu/perf_event_intel_rapl.c

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 47b56a7e99cb..6359506a19ee 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -36,7 +36,7 @@ obj-$(CONFIG_CPU_SUP_AMD)		+= perf_event_amd_iommu.o
 endif
 obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_p6.o perf_event_knc.o perf_event_p4.o
 obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
-obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_intel_uncore.o
+obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_intel_uncore.o perf_event_intel_rapl.o
 endif
 
 
diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
new file mode 100644
index 000000000000..cfcd386b5d89
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
@@ -0,0 +1,591 @@
+/*
+ * perf_event_intel_rapl.c: support Intel RAPL energy consumption counters
+ * Copyright (C) 2013 Google, Inc., Stephane Eranian
+ *
+ * Intel RAPL interface is specified in the IA-32 Manual Vol3b
+ * section 14.7.1 (September 2013)
+ *
+ * RAPL provides more controls than just reporting energy consumption
+ * however here we only expose the 3 energy consumption free running
+ * counters (pp0, pkg, dram).
+ *
+ * Each of those counters increments in a power unit defined by the
+ * RAPL_POWER_UNIT MSR. On SandyBridge, this unit is 1/(2^16) Joules
+ * but it can vary.
+ *
+ * Counter to rapl events mappings:
+ *
+ *  pp0 counter: consumption of all physical cores (power plane 0)
+ * 	  event: rapl_energy_cores
+ *    perf code: 0x1
+ *
+ *  pkg counter: consumption of the whole processor package
+ *	  event: rapl_energy_pkg
+ *    perf code: 0x2
+ *
+ * dram counter: consumption of the dram domain (servers only)
+ *	  event: rapl_energy_dram
+ *    perf code: 0x3
+ *
+ * We manage those counters as free running (read-only). They may be
+ * use simultaneously by other tools, such as turbostat.
+ *
+ * The events only support system-wide mode counting. There is no
+ * sampling support because it does not make sense and is not
+ * supported by the RAPL hardware.
+ *
+ * Because we want to avoid floating-point operations in the kernel,
+ * the events are all reported in fixed point arithmetic (32.32).
+ * Tools must adjust the counts to convert them to Watts using
+ * the duration of the measurement. Tools may use a function such as
+ * ldexp(raw_count, -32);
+ */
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/perf_event.h>
+#include <asm/cpu_device_id.h>
+#include "perf_event.h"
+
+/*
+ * RAPL energy status counters
+ */
+#define RAPL_IDX_PP0_NRG_STAT	0	/* all cores */
+#define INTEL_RAPL_PP0		0x1	/* pseudo-encoding */
+#define RAPL_IDX_PKG_NRG_STAT	1	/* entire package */
+#define INTEL_RAPL_PKG		0x2	/* pseudo-encoding */
+#define RAPL_IDX_RAM_NRG_STAT	2	/* DRAM */
+#define INTEL_RAPL_RAM		0x3	/* pseudo-encoding */
+
+/* Clients have PP0, PKG */
+#define RAPL_IDX_CLN	(1<<RAPL_IDX_PP0_NRG_STAT|\
+			 1<<RAPL_IDX_PKG_NRG_STAT)
+
+/* Servers have PP0, PKG, RAM */
+#define RAPL_IDX_SRV	(1<<RAPL_IDX_PP0_NRG_STAT|\
+			 1<<RAPL_IDX_PKG_NRG_STAT|\
+			 1<<RAPL_IDX_RAM_NRG_STAT)
+
+/*
+ * event code: LSB 8 bits, passed in attr->config
+ * any other bit is reserved
+ */
+#define RAPL_EVENT_MASK	0xFFULL
+
+#define DEFINE_RAPL_FORMAT_ATTR(_var, _name, _format)		\
+static ssize_t __rapl_##_var##_show(struct kobject *kobj,	\
+				struct kobj_attribute *attr,	\
+				char *page)			\
+{								\
+	BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);		\
+	return sprintf(page, _format "\n");			\
+}								\
+static struct kobj_attribute format_attr_##_var =		\
+	__ATTR(_name, 0444, __rapl_##_var##_show, NULL)
+
+#define RAPL_EVENT_DESC(_name, _config)				\
+{								\
+	.attr	= __ATTR(_name, 0444, rapl_event_show, NULL),	\
+	.config	= _config,					\
+}
+
+#define RAPL_CNTR_WIDTH 32 /* 32-bit rapl counters */
+
+struct rapl_pmu {
+	spinlock_t	 lock;
+	int		 hw_unit;  /* 1/2^hw_unit Joule */
+	int		 n_active; /* number of active events */
+	struct list_head active_list;
+	struct pmu	 *pmu; /* pointer to rapl_pmu_class */
+};
+
+static struct pmu rapl_pmu_class;
+static cpumask_t rapl_cpu_mask;
+static int rapl_cntr_mask;
+
+static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu);
+static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu_to_free);
+
+static inline u64 rapl_read_counter(struct perf_event *event)
+{
+	u64 raw;
+	rdmsrl(event->hw.event_base, raw);
+	return raw;
+}
+
+static inline u64 rapl_scale(u64 v)
+{
+	/*
+	 * scale delta to smallest unit (1/2^32)
+	 * users must then scale back: count * 1/(1e9*2^32) to get Joules
+	 * or use ldexp(count, -32).
+	 * Watts = Joules/Time delta
+	 */
+	return v << (32 - __get_cpu_var(rapl_pmu)->hw_unit);
+}
+
+static u64 rapl_event_update(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	u64 prev_raw_count, new_raw_count;
+	s64 delta, sdelta;
+	int shift = RAPL_CNTR_WIDTH;
+
+again:
+	prev_raw_count = local64_read(&hwc->prev_count);
+	rdmsrl(event->hw.event_base, new_raw_count);
+
+	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+			    new_raw_count) != prev_raw_count) {
+		cpu_relax();
+		goto again;
+	}
+
+	/*
+	 * Now we have the new raw value and have updated the prev
+	 * timestamp already. We can now calculate the elapsed delta
+	 * (event-)time and add that to the generic event.
+	 *
+	 * Careful, not all hw sign-extends above the physical width
+	 * of the count.
+	 */
+	delta = (new_raw_count << shift) - (prev_raw_count << shift);
+	delta >>= shift;
+
+	sdelta = rapl_scale(delta);
+
+	local64_add(sdelta, &event->count);
+
+	return new_raw_count;
+}
+
+static void __rapl_pmu_event_start(struct rapl_pmu *pmu,
+				   struct perf_event *event)
+{
+	if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+		return;
+
+	event->hw.state = 0;
+
+	list_add_tail(&event->active_entry, &pmu->active_list);
+
+	local64_set(&event->hw.prev_count, rapl_read_counter(event));
+
+	pmu->n_active++;
+}
+
+static void rapl_pmu_event_start(struct perf_event *event, int mode)
+{
+	struct rapl_pmu *pmu = __get_cpu_var(rapl_pmu);
+	unsigned long flags;
+
+	spin_lock_irqsave(&pmu->lock, flags);
+	__rapl_pmu_event_start(pmu, event);
+	spin_unlock_irqrestore(&pmu->lock, flags);
+}
+
+static void rapl_pmu_event_stop(struct perf_event *event, int mode)
+{
+	struct rapl_pmu *pmu = __get_cpu_var(rapl_pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	unsigned long flags;
+
+	spin_lock_irqsave(&pmu->lock, flags);
+
+	/* mark event as deactivated and stopped */
+	if (!(hwc->state & PERF_HES_STOPPED)) {
+		WARN_ON_ONCE(pmu->n_active <= 0);
+		pmu->n_active--;
+
+		list_del(&event->active_entry);
+
+		WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+		hwc->state |= PERF_HES_STOPPED;
+	}
+
+	/* check if update of sw counter is necessary */
+	if ((mode & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+		/*
+		 * Drain the remaining delta count out of a event
+		 * that we are disabling:
+		 */
+		rapl_event_update(event);
+		hwc->state |= PERF_HES_UPTODATE;
+	}
+
+	spin_unlock_irqrestore(&pmu->lock, flags);
+}
+
+static int rapl_pmu_event_add(struct perf_event *event, int mode)
+{
+	struct rapl_pmu *pmu = __get_cpu_var(rapl_pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	unsigned long flags;
+
+	spin_lock_irqsave(&pmu->lock, flags);
+
+	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+	if (mode & PERF_EF_START)
+		__rapl_pmu_event_start(pmu, event);
+
+	spin_unlock_irqrestore(&pmu->lock, flags);
+
+	return 0;
+}
+
+static void rapl_pmu_event_del(struct perf_event *event, int flags)
+{
+	rapl_pmu_event_stop(event, PERF_EF_UPDATE);
+}
+
+static int rapl_pmu_event_init(struct perf_event *event)
+{
+	u64 cfg = event->attr.config & RAPL_EVENT_MASK;
+	int bit, msr, ret = 0;
+
+	/* only look at RAPL events */
+	if (event->attr.type != rapl_pmu_class.type)
+		return -ENOENT;
+
+	/* check only supported bits are set */
+	if (event->attr.config & ~RAPL_EVENT_MASK)
+		return -EINVAL;
+
+	/*
+	 * check event is known (determines counter)
+	 */
+	switch (cfg) {
+	case INTEL_RAPL_PP0:
+		bit = RAPL_IDX_PP0_NRG_STAT;
+		msr = MSR_PP0_ENERGY_STATUS;
+		break;
+	case INTEL_RAPL_PKG:
+		bit = RAPL_IDX_PKG_NRG_STAT;
+		msr = MSR_PKG_ENERGY_STATUS;
+		break;
+	case INTEL_RAPL_RAM:
+		bit = RAPL_IDX_RAM_NRG_STAT;
+		msr = MSR_DRAM_ENERGY_STATUS;
+		break;
+	default:
+		return -EINVAL;
+	}
+	/* check event supported */
+	if (!(rapl_cntr_mask & (1 << bit)))
+		return -EINVAL;
+
+	/* unsupported modes and filters */
+	if (event->attr.exclude_user   ||
+	    event->attr.exclude_kernel ||
+	    event->attr.exclude_hv     ||
+	    event->attr.exclude_idle   ||
+	    event->attr.exclude_host   ||
+	    event->attr.exclude_guest  ||
+	    event->attr.sample_period) /* no sampling */
+		return -EINVAL;
+
+	/* must be done before validate_group */
+	event->hw.event_base = msr;
+	event->hw.config = cfg;
+	event->hw.idx = bit;
+
+	return ret;
+}
+
+static void rapl_pmu_event_read(struct perf_event *event)
+{
+	rapl_event_update(event);
+}
+
+static ssize_t rapl_get_attr_cpumask(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &rapl_cpu_mask);
+
+	buf[n++] = '\n';
+	buf[n] = '\0';
+	return n;
+}
+
+static DEVICE_ATTR(cpumask, S_IRUGO, rapl_get_attr_cpumask, NULL);
+
+static struct attribute *rapl_pmu_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL,
+};
+
+static struct attribute_group rapl_pmu_attr_group = {
+	.attrs = rapl_pmu_attrs,
+};
+
+EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01");
+EVENT_ATTR_STR(energy-pkg  , rapl_pkg, "event=0x02");
+EVENT_ATTR_STR(energy-ram  , rapl_ram, "event=0x03");
+
+EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules");
+EVENT_ATTR_STR(energy-pkg.unit  , rapl_pkg_unit, "Joules");
+EVENT_ATTR_STR(energy-ram.unit  , rapl_ram_unit, "Joules");
+
+/*
+ * we compute in 0.23 nJ increments regardless of MSR
+ */
+EVENT_ATTR_STR(energy-cores.scale, rapl_cores_scale, "2.3283064365386962890625e-10");
+EVENT_ATTR_STR(energy-pkg.scale, rapl_pkg_scale, "2.3283064365386962890625e-10");
+EVENT_ATTR_STR(energy-ram.scale, rapl_ram_scale, "2.3283064365386962890625e-10");
+
+static struct attribute *rapl_events_srv_attr[] = {
+	EVENT_PTR(rapl_cores),
+	EVENT_PTR(rapl_pkg),
+	EVENT_PTR(rapl_ram),
+
+	EVENT_PTR(rapl_cores_unit),
+	EVENT_PTR(rapl_pkg_unit),
+	EVENT_PTR(rapl_ram_unit),
+
+	EVENT_PTR(rapl_cores_scale),
+	EVENT_PTR(rapl_pkg_scale),
+	EVENT_PTR(rapl_ram_scale),
+	NULL,
+};
+
+static struct attribute *rapl_events_cln_attr[] = {
+	EVENT_PTR(rapl_cores),
+	EVENT_PTR(rapl_pkg),
+
+	EVENT_PTR(rapl_cores_unit),
+	EVENT_PTR(rapl_pkg_unit),
+
+	EVENT_PTR(rapl_cores_scale),
+	EVENT_PTR(rapl_pkg_scale),
+	NULL,
+};
+
+static struct attribute_group rapl_pmu_events_group = {
+	.name = "events",
+	.attrs = NULL, /* patched at runtime */
+};
+
+DEFINE_RAPL_FORMAT_ATTR(event, event, "config:0-7");
+static struct attribute *rapl_formats_attr[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+
+static struct attribute_group rapl_pmu_format_group = {
+	.name = "format",
+	.attrs = rapl_formats_attr,
+};
+
+const struct attribute_group *rapl_attr_groups[] = {
+	&rapl_pmu_attr_group,
+	&rapl_pmu_format_group,
+	&rapl_pmu_events_group,
+	NULL,
+};
+
+static struct pmu rapl_pmu_class = {
+	.attr_groups	= rapl_attr_groups,
+	.task_ctx_nr	= perf_invalid_context, /* system-wide only */
+	.event_init	= rapl_pmu_event_init,
+	.add		= rapl_pmu_event_add, /* must have */
+	.del		= rapl_pmu_event_del, /* must have */
+	.start		= rapl_pmu_event_start,
+	.stop		= rapl_pmu_event_stop,
+	.read		= rapl_pmu_event_read,
+};
+
+static void rapl_cpu_exit(int cpu)
+{
+	struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
+	int i, phys_id = topology_physical_package_id(cpu);
+	int target = -1;
+
+	/* find a new cpu on same package */
+	for_each_online_cpu(i) {
+		if (i == cpu)
+			continue;
+		if (phys_id == topology_physical_package_id(i)) {
+			target = i;
+			break;
+		}
+	}
+	/*
+	 * clear cpu from cpumask
+	 * if was set in cpumask and still some cpu on package,
+	 * then move to new cpu
+	 */
+	if (cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask) && target >= 0)
+		cpumask_set_cpu(target, &rapl_cpu_mask);
+
+	WARN_ON(cpumask_empty(&rapl_cpu_mask));
+	/*
+	 * migrate events and context to new cpu
+	 */
+	if (target >= 0)
+		perf_pmu_migrate_context(pmu->pmu, cpu, target);
+}
+
+static void rapl_cpu_init(int cpu)
+{
+	int i, phys_id = topology_physical_package_id(cpu);
+
+	/* check if phys_is is already covered */
+	for_each_cpu(i, &rapl_cpu_mask) {
+		if (phys_id == topology_physical_package_id(i))
+			return;
+	}
+	/* was not found, so add it */
+	cpumask_set_cpu(cpu, &rapl_cpu_mask);
+}
+
+static int rapl_cpu_prepare(int cpu)
+{
+	struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
+	int phys_id = topology_physical_package_id(cpu);
+
+	if (pmu)
+		return 0;
+
+	if (phys_id < 0)
+		return -1;
+
+	pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
+	if (!pmu)
+		return -1;
+
+	spin_lock_init(&pmu->lock);
+
+	INIT_LIST_HEAD(&pmu->active_list);
+
+	/*
+	 * grab power unit as: 1/2^unit Joules
+	 *
+	 * we cache in local PMU instance
+	 */
+	rdmsrl(MSR_RAPL_POWER_UNIT, pmu->hw_unit);
+	pmu->hw_unit = (pmu->hw_unit >> 8) & 0x1FULL;
+	pmu->pmu = &rapl_pmu_class;
+
+	/* set RAPL pmu for this cpu for now */
+	per_cpu(rapl_pmu, cpu) = pmu;
+	per_cpu(rapl_pmu_to_free, cpu) = NULL;
+
+	return 0;
+}
+
+static void rapl_cpu_kfree(int cpu)
+{
+	struct rapl_pmu *pmu = per_cpu(rapl_pmu_to_free, cpu);
+
+	kfree(pmu);
+
+	per_cpu(rapl_pmu_to_free, cpu) = NULL;
+}
+
+static int rapl_cpu_dying(int cpu)
+{
+	struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
+
+	if (!pmu)
+		return 0;
+
+	per_cpu(rapl_pmu, cpu) = NULL;
+
+	per_cpu(rapl_pmu_to_free, cpu) = pmu;
+
+	return 0;
+}
+
+static int rapl_cpu_notifier(struct notifier_block *self,
+			     unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (long)hcpu;
+
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_UP_PREPARE:
+		rapl_cpu_prepare(cpu);
+		break;
+	case CPU_STARTING:
+		rapl_cpu_init(cpu);
+		break;
+	case CPU_UP_CANCELED:
+	case CPU_DYING:
+		rapl_cpu_dying(cpu);
+		break;
+	case CPU_ONLINE:
+	case CPU_DEAD:
+		rapl_cpu_kfree(cpu);
+		break;
+	case CPU_DOWN_PREPARE:
+		rapl_cpu_exit(cpu);
+		break;
+	default:
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static const struct x86_cpu_id rapl_cpu_match[] = {
+	[0] = { .vendor = X86_VENDOR_INTEL, .family = 6 },
+	[1] = {},
+};
+
+static int __init rapl_pmu_init(void)
+{
+	struct rapl_pmu *pmu;
+	int cpu, ret;
+
+	/*
+	 * check for Intel processor family 6
+	 */
+	if (!x86_match_cpu(rapl_cpu_match))
+		return 0;
+
+	/* check supported CPU */
+	switch (boot_cpu_data.x86_model) {
+	case 42: /* Sandy Bridge */
+	case 58: /* Ivy Bridge */
+	case 60: /* Haswell */
+		rapl_cntr_mask = RAPL_IDX_CLN;
+		rapl_pmu_events_group.attrs = rapl_events_cln_attr;
+		break;
+	case 45: /* Sandy Bridge-EP */
+	case 62: /* IvyTown */
+		rapl_cntr_mask = RAPL_IDX_SRV;
+		rapl_pmu_events_group.attrs = rapl_events_srv_attr;
+		break;
+
+	default:
+		/* unsupported */
+		return 0;
+	}
+	get_online_cpus();
+
+	for_each_online_cpu(cpu) {
+		rapl_cpu_prepare(cpu);
+		rapl_cpu_init(cpu);
+	}
+
+	perf_cpu_notifier(rapl_cpu_notifier);
+
+	ret = perf_pmu_register(&rapl_pmu_class, "power", -1);
+	if (WARN_ON(ret)) {
+		pr_info("RAPL PMU detected, registration failed (%d), RAPL PMU disabled\n", ret);
+		put_online_cpus();
+		return -1;
+	}
+
+	pmu = __get_cpu_var(rapl_pmu);
+
+	pr_info("RAPL PMU detected, hw unit 2^-%d Joules,"
+		" API unit is 2^-32 Joules,"
+		" %d fixed counters\n",
+		pmu->hw_unit,
+		hweight32(rapl_cntr_mask));
+
+	put_online_cpus();
+
+	return 0;
+}
+device_initcall(rapl_pmu_init);
-- 
cgit v1.2.3


From 65661f96d3b32f4b28fef26d21be81d7e173b965 Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Tue, 12 Nov 2013 17:58:51 +0100
Subject: perf/x86: Add RAPL hrtimer support

The RAPL PMU counters do not interrupt on overflow.
Therefore, the kernel needs to poll the counters
to avoid missing an overflow. This patch adds
the hrtimer code to do this.

The timer interval is calculated at boot time
based on the power unit used by the HW.

There is one hrtimer per-cpu to handle the case
of multiple simultaneous use across cores on
the same package + hotplug CPU.

Thanks to Maria Dimakopoulou for her contributions
to this patch especially on the math aspects.

Signed-off-by: Stephane Eranian <eranian@google.com>
Reviewed-by: Maria Dimakopoulou <maria.n.dimakopoulou@gmail.com>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
[ Applied 32-bit build fix. ]
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: acme@redhat.com
Cc: jolsa@redhat.com
Cc: zheng.z.yan@intel.com
Cc: bp@alien8.de
Cc: maria.n.dimakopoulou@gmail.com
Link: http://lkml.kernel.org/r/1384275531-10892-5-git-send-email-eranian@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_rapl.c | 74 ++++++++++++++++++++++++++++-
 1 file changed, 72 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
index cfcd386b5d89..bf8e4a736d48 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
@@ -96,6 +96,8 @@ struct rapl_pmu {
 	int		 n_active; /* number of active events */
 	struct list_head active_list;
 	struct pmu	 *pmu; /* pointer to rapl_pmu_class */
+	ktime_t		 timer_interval; /* in ktime_t unit */
+	struct hrtimer   hrtimer;
 };
 
 static struct pmu rapl_pmu_class;
@@ -158,6 +160,48 @@ again:
 	return new_raw_count;
 }
 
+static void rapl_start_hrtimer(struct rapl_pmu *pmu)
+{
+	__hrtimer_start_range_ns(&pmu->hrtimer,
+			pmu->timer_interval, 0,
+			HRTIMER_MODE_REL_PINNED, 0);
+}
+
+static void rapl_stop_hrtimer(struct rapl_pmu *pmu)
+{
+	hrtimer_cancel(&pmu->hrtimer);
+}
+
+static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer)
+{
+	struct rapl_pmu *pmu = __get_cpu_var(rapl_pmu);
+	struct perf_event *event;
+	unsigned long flags;
+
+	if (!pmu->n_active)
+		return HRTIMER_NORESTART;
+
+	spin_lock_irqsave(&pmu->lock, flags);
+
+	list_for_each_entry(event, &pmu->active_list, active_entry) {
+		rapl_event_update(event);
+	}
+
+	spin_unlock_irqrestore(&pmu->lock, flags);
+
+	hrtimer_forward_now(hrtimer, pmu->timer_interval);
+
+	return HRTIMER_RESTART;
+}
+
+static void rapl_hrtimer_init(struct rapl_pmu *pmu)
+{
+	struct hrtimer *hr = &pmu->hrtimer;
+
+	hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hr->function = rapl_hrtimer_handle;
+}
+
 static void __rapl_pmu_event_start(struct rapl_pmu *pmu,
 				   struct perf_event *event)
 {
@@ -171,6 +215,8 @@ static void __rapl_pmu_event_start(struct rapl_pmu *pmu,
 	local64_set(&event->hw.prev_count, rapl_read_counter(event));
 
 	pmu->n_active++;
+	if (pmu->n_active == 1)
+		rapl_start_hrtimer(pmu);
 }
 
 static void rapl_pmu_event_start(struct perf_event *event, int mode)
@@ -195,6 +241,8 @@ static void rapl_pmu_event_stop(struct perf_event *event, int mode)
 	if (!(hwc->state & PERF_HES_STOPPED)) {
 		WARN_ON_ONCE(pmu->n_active <= 0);
 		pmu->n_active--;
+		if (pmu->n_active == 0)
+			rapl_stop_hrtimer(pmu);
 
 		list_del(&event->active_entry);
 
@@ -423,6 +471,9 @@ static void rapl_cpu_exit(int cpu)
 	 */
 	if (target >= 0)
 		perf_pmu_migrate_context(pmu->pmu, cpu, target);
+
+	/* cancel overflow polling timer for CPU */
+	rapl_stop_hrtimer(pmu);
 }
 
 static void rapl_cpu_init(int cpu)
@@ -442,6 +493,7 @@ static int rapl_cpu_prepare(int cpu)
 {
 	struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
 	int phys_id = topology_physical_package_id(cpu);
+	u64 ms;
 
 	if (pmu)
 		return 0;
@@ -466,6 +518,22 @@ static int rapl_cpu_prepare(int cpu)
 	pmu->hw_unit = (pmu->hw_unit >> 8) & 0x1FULL;
 	pmu->pmu = &rapl_pmu_class;
 
+	/*
+	 * use reference of 200W for scaling the timeout
+	 * to avoid missing counter overflows.
+	 * 200W = 200 Joules/sec
+	 * divide interval by 2 to avoid lockstep (2 * 100)
+	 * if hw unit is 32, then we use 2 ms 1/200/2
+	 */
+	if (pmu->hw_unit < 32)
+		ms = (1000 / (2 * 100)) * (1ULL << (32 - pmu->hw_unit - 1));
+	else
+		ms = 2;
+
+	pmu->timer_interval = ms_to_ktime(ms);
+
+	rapl_hrtimer_init(pmu);
+
 	/* set RAPL pmu for this cpu for now */
 	per_cpu(rapl_pmu, cpu) = pmu;
 	per_cpu(rapl_pmu_to_free, cpu) = NULL;
@@ -580,9 +648,11 @@ static int __init rapl_pmu_init(void)
 
 	pr_info("RAPL PMU detected, hw unit 2^-%d Joules,"
 		" API unit is 2^-32 Joules,"
-		" %d fixed counters\n",
+		" %d fixed counters"
+		" %llu ms ovfl timer\n",
 		pmu->hw_unit,
-		hweight32(rapl_cntr_mask));
+		hweight32(rapl_cntr_mask),
+		ktime_to_ms(pmu->timer_interval));
 
 	put_online_cpus();
 
-- 
cgit v1.2.3


From 5a25f7bb5aa1b2f876d9df81c44b516e015022f8 Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Mon, 25 Nov 2013 11:14:50 +0000
Subject: metag: smp: don't set irq regs in do_IPI()

Since commit f6b30d32d242 (metag: kick: add missing irq_enter/exit to
kick_handler()), the main kick_handler() function deals with setting and
restoring the irq registers pointer. Therefore do_IPI() which is called
indirectly from kick_handler() doesn't need to do that itself any
longer. Therefore remove that code and do_IPI()'s pt_regs argument.

Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: linux-metag@vger.kernel.org
Cc: Thomas Gleixner <tglx@linutronix.de>
---
 arch/metag/kernel/smp.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/metag/kernel/smp.c b/arch/metag/kernel/smp.c
index 7c0113142981..177ffb408c5a 100644
--- a/arch/metag/kernel/smp.c
+++ b/arch/metag/kernel/smp.c
@@ -517,11 +517,10 @@ static DEFINE_SPINLOCK(stop_lock);
  *
  *  Bit 0 - Inter-processor function call
  */
-static int do_IPI(struct pt_regs *regs)
+static int do_IPI(void)
 {
 	unsigned int cpu = smp_processor_id();
 	struct ipi_data *ipi = &per_cpu(ipi_data, cpu);
-	struct pt_regs *old_regs = set_irq_regs(regs);
 	unsigned long msgs, nextmsg;
 	int handled = 0;
 
@@ -557,8 +556,6 @@ static int do_IPI(struct pt_regs *regs)
 		}
 	}
 
-	set_irq_regs(old_regs);
-
 	return handled;
 }
 
@@ -624,7 +621,7 @@ static void kick_raise_softirq(cpumask_t callmap, unsigned int irq)
 static TBIRES ipi_handler(TBIRES State, int SigNum, int Triggers,
 		   int Inst, PTBI pTBI, int *handled)
 {
-	*handled = do_IPI((struct pt_regs *)State.Sig.pCtx);
+	*handled = do_IPI();
 
 	return State;
 }
-- 
cgit v1.2.3


From 853d9b18f1e861d37e9b271742329f8c1176eabe Mon Sep 17 00:00:00 2001
From: Levente Kurusa <levex@linux.com>
Date: Fri, 29 Nov 2013 21:28:48 +0100
Subject: x86, mce: Call put_device on device_register failure

This patch adds a call to put_device() when the device_register() call
has failed. This is required so that the last reference to the device is
given up.

Signed-off-by: Levente Kurusa <levex@linux.com>
Link: http://lkml.kernel.org/r/5298F900.9000208@linux.com
Signed-off-by: Borislav Petkov <bp@suse.de>
---
 arch/x86/kernel/cpu/mcheck/mce.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index b3218cdee95f..a389c1d859ec 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -2272,8 +2272,10 @@ static int mce_device_create(unsigned int cpu)
 	dev->release = &mce_device_release;
 
 	err = device_register(dev);
-	if (err)
+	if (err) {
+		put_device(dev);
 		return err;
+	}
 
 	for (i = 0; mce_device_attrs[i]; i++) {
 		err = device_create_file(dev, mce_device_attrs[i]);
-- 
cgit v1.2.3


From 1de425c7b271220df5c1bd28e45ca97ca3f4dae8 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <tkhai@yandex.ru>
Date: Tue, 3 Dec 2013 20:12:37 +0400
Subject: sparc64: Fix build regression

This patch fixes build error which was introduced by commit

812cb83a56a908729c453a7db3fb2c262119bc9d (Implement HAVE_CONTEXT_TRACKING).

[*]https://lkml.org/lkml/2013/11/23/103

Signed-off-by: Kirill Tkhai <tkhai@yandex.ru>
CC: David Miller <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/kgdb_64.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/sparc/kernel/kgdb_64.c b/arch/sparc/kernel/kgdb_64.c
index 60b19f50c80a..b45fe3fb4d2c 100644
--- a/arch/sparc/kernel/kgdb_64.c
+++ b/arch/sparc/kernel/kgdb_64.c
@@ -6,6 +6,7 @@
 #include <linux/kgdb.h>
 #include <linux/kdebug.h>
 #include <linux/ftrace.h>
+#include <linux/context_tracking.h>
 
 #include <asm/cacheflush.h>
 #include <asm/kdebug.h>
-- 
cgit v1.2.3


From 1d61cf121d7d9085145553294307e81c25586288 Mon Sep 17 00:00:00 2001
From: Jiang Liu <liuj97@gmail.com>
Date: Thu, 5 Dec 2013 00:13:01 +0800
Subject: smp, metag: kill SMP single function call interrupt

Commit 9a46ad6d6df3 "smp: make smp_call_function_many() use logic
similar to smp_call_function_single()" has unified the way to handle
single and multiple cross-CPU function calls. Now only one interrupt is
needed for architecture specific code to support generic SMP function
call interfaces, so kill the redundant single function call interrupt.

Signed-off-by: Jiang Liu <liuj97@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Shaohua Li <shli@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Jiri Kosina <trivial@kernel.org>
Signed-off-by: James Hogan <james.hogan@imgtec.com>
---
 arch/metag/include/asm/smp.h | 2 --
 arch/metag/kernel/smp.c      | 6 +-----
 2 files changed, 1 insertion(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/metag/include/asm/smp.h b/arch/metag/include/asm/smp.h
index e0373f81a117..1d7e770f7a54 100644
--- a/arch/metag/include/asm/smp.h
+++ b/arch/metag/include/asm/smp.h
@@ -7,13 +7,11 @@
 
 enum ipi_msg_type {
 	IPI_CALL_FUNC,
-	IPI_CALL_FUNC_SINGLE,
 	IPI_RESCHEDULE,
 };
 
 extern void arch_send_call_function_single_ipi(int cpu);
 extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
-#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask
 
 asmlinkage void secondary_start_kernel(void);
 
diff --git a/arch/metag/kernel/smp.c b/arch/metag/kernel/smp.c
index 177ffb408c5a..f74008b6da07 100644
--- a/arch/metag/kernel/smp.c
+++ b/arch/metag/kernel/smp.c
@@ -491,7 +491,7 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask)
 
 void arch_send_call_function_single_ipi(int cpu)
 {
-	send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE);
+	send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC);
 }
 
 void show_ipi_list(struct seq_file *p)
@@ -545,10 +545,6 @@ static int do_IPI(void)
 			generic_smp_call_function_interrupt();
 			break;
 
-		case IPI_CALL_FUNC_SINGLE:
-			generic_smp_call_function_single_interrupt();
-			break;
-
 		default:
 			pr_crit("CPU%u: Unknown IPI message 0x%lx\n",
 				cpu, nextmsg);
-- 
cgit v1.2.3


From 7d5f5fa276efbbe45f0ed270b3f4e4a2816398c2 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Tue, 20 Aug 2013 22:51:47 +0200
Subject: m68k: Add kexec support

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/Kconfig                  |  17 ++++
 arch/m68k/include/asm/kexec.h      |  29 +++++++
 arch/m68k/kernel/Makefile          |   2 +
 arch/m68k/kernel/asm-offsets.c     |   3 +
 arch/m68k/kernel/machine_kexec.c   |  58 ++++++++++++++
 arch/m68k/kernel/relocate_kernel.S | 159 +++++++++++++++++++++++++++++++++++++
 include/uapi/linux/kexec.h         |   1 +
 7 files changed, 269 insertions(+)
 create mode 100644 arch/m68k/include/asm/kexec.h
 create mode 100644 arch/m68k/kernel/machine_kexec.c
 create mode 100644 arch/m68k/kernel/relocate_kernel.S

(limited to 'arch')

diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 75f25a8e3001..178dffa9de80 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -87,6 +87,23 @@ config MMU_SUN3
 	bool
 	depends on MMU && !MMU_MOTOROLA && !MMU_COLDFIRE
 
+config KEXEC
+	bool "kexec system call"
+	depends on M68KCLASSIC
+	help
+	  kexec is a system call that implements the ability to shutdown your
+	  current kernel, and to start another kernel.  It is like a reboot
+	  but it is independent of the system firmware.   And like a reboot
+	  you can start any kernel with it, not just Linux.
+
+	  The name comes from the similarity to the exec system call.
+
+	  It is an ongoing process to be certain the hardware in a machine
+	  is properly shutdown, so do not be surprised if this code does not
+	  initially work for you.  As of this writing the exact hardware
+	  interface is strongly in flux, so no good recommendation can be
+	  made.
+
 menu "Platform setup"
 
 source arch/m68k/Kconfig.cpu
diff --git a/arch/m68k/include/asm/kexec.h b/arch/m68k/include/asm/kexec.h
new file mode 100644
index 000000000000..3df97abac147
--- /dev/null
+++ b/arch/m68k/include/asm/kexec.h
@@ -0,0 +1,29 @@
+#ifndef _ASM_M68K_KEXEC_H
+#define _ASM_M68K_KEXEC_H
+
+#ifdef CONFIG_KEXEC
+
+/* Maximum physical address we can use pages from */
+#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
+/* Maximum address we can reach in physical address mode */
+#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
+/* Maximum address we can use for the control code buffer */
+#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL)
+
+#define KEXEC_CONTROL_PAGE_SIZE	4096
+
+#define KEXEC_ARCH KEXEC_ARCH_68K
+
+#ifndef __ASSEMBLY__
+
+static inline void crash_setup_regs(struct pt_regs *newregs,
+				    struct pt_regs *oldregs)
+{
+	/* Dummy implementation for now */
+}
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* CONFIG_KEXEC */
+
+#endif /* _ASM_M68K_KEXEC_H */
diff --git a/arch/m68k/kernel/Makefile b/arch/m68k/kernel/Makefile
index 655347d80780..7ee5f00a9abb 100644
--- a/arch/m68k/kernel/Makefile
+++ b/arch/m68k/kernel/Makefile
@@ -22,3 +22,5 @@ obj-$(CONFIG_PCI) += pcibios.o
 
 obj-$(CONFIG_HAS_DMA)	+= dma.o
 
+obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o
+
diff --git a/arch/m68k/kernel/asm-offsets.c b/arch/m68k/kernel/asm-offsets.c
index 8b7b22846366..3a386341aa6e 100644
--- a/arch/m68k/kernel/asm-offsets.c
+++ b/arch/m68k/kernel/asm-offsets.c
@@ -98,6 +98,9 @@ int main(void)
 	DEFINE(CIABBASE, &ciab);
 	DEFINE(C_PRA, offsetof(struct CIA, pra));
 	DEFINE(ZTWOBASE, zTwoBase);
+
+	/* enum m68k_fixup_type */
+	DEFINE(M68K_FIXUP_MEMOFFSET, m68k_fixup_memoffset);
 #endif
 
 	return 0;
diff --git a/arch/m68k/kernel/machine_kexec.c b/arch/m68k/kernel/machine_kexec.c
new file mode 100644
index 000000000000..d4affc917d9d
--- /dev/null
+++ b/arch/m68k/kernel/machine_kexec.c
@@ -0,0 +1,58 @@
+/*
+ * machine_kexec.c - handle transition of Linux booting another kernel
+ */
+#include <linux/compiler.h>
+#include <linux/kexec.h>
+#include <linux/mm.h>
+#include <linux/delay.h>
+
+#include <asm/cacheflush.h>
+#include <asm/page.h>
+#include <asm/setup.h>
+
+extern const unsigned char relocate_new_kernel[];
+extern const size_t relocate_new_kernel_size;
+
+int machine_kexec_prepare(struct kimage *kimage)
+{
+	return 0;
+}
+
+void machine_kexec_cleanup(struct kimage *kimage)
+{
+}
+
+void machine_shutdown(void)
+{
+}
+
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+}
+
+typedef void (*relocate_kernel_t)(unsigned long ptr,
+				  unsigned long start,
+				  unsigned long cpu_mmu_flags) __noreturn;
+
+void machine_kexec(struct kimage *image)
+{
+	void *reboot_code_buffer;
+	unsigned long cpu_mmu_flags;
+
+	reboot_code_buffer = page_address(image->control_code_page);
+
+	memcpy(reboot_code_buffer, relocate_new_kernel,
+	       relocate_new_kernel_size);
+
+	/*
+	 * we do not want to be bothered.
+	 */
+	local_irq_disable();
+
+	pr_info("Will call new kernel at 0x%08lx. Bye...\n", image->start);
+	__flush_cache_all();
+	cpu_mmu_flags = m68k_cputype | m68k_mmutype << 8;
+	((relocate_kernel_t) reboot_code_buffer)(image->head & PAGE_MASK,
+						 image->start,
+						 cpu_mmu_flags);
+}
diff --git a/arch/m68k/kernel/relocate_kernel.S b/arch/m68k/kernel/relocate_kernel.S
new file mode 100644
index 000000000000..3e09a89067ad
--- /dev/null
+++ b/arch/m68k/kernel/relocate_kernel.S
@@ -0,0 +1,159 @@
+#include <linux/linkage.h>
+
+#include <asm/asm-offsets.h>
+#include <asm/page.h>
+#include <asm/setup.h>
+
+
+#define MMU_BASE	8		/* MMU flags base in cpu_mmu_flags */
+
+.text
+
+ENTRY(relocate_new_kernel)
+	movel %sp@(4),%a0		/* a0 = ptr */
+	movel %sp@(8),%a1		/* a1 = start */
+	movel %sp@(12),%d1		/* d1 = cpu_mmu_flags */
+	movew #PAGE_MASK,%d2		/* d2 = PAGE_MASK */
+
+	/* Disable MMU */
+
+	btst #MMU_BASE + MMUB_68851,%d1
+	jeq 3f
+
+1:	/* 68851 or 68030 */
+
+	lea %pc@(.Lcopy),%a4
+2:	addl #0x00000000,%a4		/* virt_to_phys() */
+
+	.section ".m68k_fixup","aw"
+	.long M68K_FIXUP_MEMOFFSET, 2b+2
+	.previous
+
+	.chip 68030
+	pmove %tc,%d0			/* Disable MMU */
+	bclr #7,%d0
+	pmove %d0,%tc
+	jmp %a4@			/* Jump to physical .Lcopy */
+	.chip 68k
+
+3:
+	btst #MMU_BASE + MMUB_68030,%d1
+	jne 1b
+
+	btst #MMU_BASE + MMUB_68040,%d1
+	jeq 6f
+
+4:	/* 68040 or 68060 */
+
+	lea %pc@(.Lcont040),%a4
+5:	addl #0x00000000,%a4		/* virt_to_phys() */
+
+	.section ".m68k_fixup","aw"
+	.long M68K_FIXUP_MEMOFFSET, 5b+2
+	.previous
+
+	movel %a4,%d0
+	andl #0xff000000,%d0
+	orw #0xe020,%d0			/* Map 16 MiB, enable, cacheable */
+	.chip 68040
+	movec %d0,%itt0
+	movec %d0,%dtt0
+	.chip 68k
+	jmp %a4@			/* Jump to physical .Lcont040 */
+
+.Lcont040:
+	moveq #0,%d0
+	.chip 68040
+	movec %d0,%tc			/* Disable MMU */
+	movec %d0,%itt0
+	movec %d0,%itt1
+	movec %d0,%dtt0
+	movec %d0,%dtt1
+	.chip 68k
+	jra .Lcopy
+
+6:
+	btst #MMU_BASE + MMUB_68060,%d1
+	jne 4b
+
+.Lcopy:
+	movel %a0@+,%d0			/* d0 = entry = *ptr */
+	jeq .Lflush
+
+	btst #2,%d0			/* entry & IND_DONE? */
+	jne .Lflush
+
+	btst #1,%d0			/* entry & IND_INDIRECTION? */
+	jeq 1f
+	andw %d2,%d0
+	movel %d0,%a0			/* ptr = entry & PAGE_MASK */
+	jra .Lcopy
+
+1:
+	btst #0,%d0			/* entry & IND_DESTINATION? */
+	jeq 2f
+	andw %d2,%d0
+	movel %d0,%a2			/* a2 = dst = entry & PAGE_MASK */
+	jra .Lcopy
+
+2:
+	btst #3,%d0			/* entry & IND_SOURCE? */
+	jeq .Lcopy
+
+	andw %d2,%d0
+	movel %d0,%a3			/* a3 = src = entry & PAGE_MASK */
+	movew #PAGE_SIZE/32 - 1,%d0	/* d0 = PAGE_SIZE/32 - 1 */
+3:
+	movel %a3@+,%a2@+		/* *dst++ = *src++ */
+	movel %a3@+,%a2@+		/* *dst++ = *src++ */
+	movel %a3@+,%a2@+		/* *dst++ = *src++ */
+	movel %a3@+,%a2@+		/* *dst++ = *src++ */
+	movel %a3@+,%a2@+		/* *dst++ = *src++ */
+	movel %a3@+,%a2@+		/* *dst++ = *src++ */
+	movel %a3@+,%a2@+		/* *dst++ = *src++ */
+	movel %a3@+,%a2@+		/* *dst++ = *src++ */
+	dbf %d0, 3b
+	jra .Lcopy
+
+.Lflush:
+	/* Flush all caches */
+
+	btst #CPUB_68020,%d1
+	jeq 2f
+
+1:	/* 68020 or 68030 */
+	.chip 68030
+	movec %cacr,%d0
+	orw #0x808,%d0
+	movec %d0,%cacr
+	.chip 68k
+	jra .Lreincarnate
+
+2:
+	btst #CPUB_68030,%d1
+	jne 1b
+
+	btst #CPUB_68040,%d1
+	jeq 4f
+
+3:	/* 68040 or 68060 */
+	.chip 68040
+	nop
+	cpusha %bc
+	nop
+	cinva %bc
+	nop
+	.chip 68k
+	jra .Lreincarnate
+
+4:
+	btst #CPUB_68060,%d1
+	jne 3b
+
+.Lreincarnate:
+	jmp %a1@
+
+relocate_new_kernel_end:
+
+ENTRY(relocate_new_kernel_size)
+	.long relocate_new_kernel_end - relocate_new_kernel
diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h
index 104838f65bc1..d6629d49a243 100644
--- a/include/uapi/linux/kexec.h
+++ b/include/uapi/linux/kexec.h
@@ -18,6 +18,7 @@
  */
 #define KEXEC_ARCH_DEFAULT ( 0 << 16)
 #define KEXEC_ARCH_386     ( 3 << 16)
+#define KEXEC_ARCH_68K     ( 4 << 16)
 #define KEXEC_ARCH_X86_64  (62 << 16)
 #define KEXEC_ARCH_PPC     (20 << 16)
 #define KEXEC_ARCH_PPC64   (21 << 16)
-- 
cgit v1.2.3


From 7bc1e4d8d506462c7d40118196f79a709f3fecfd Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Wed, 21 Aug 2013 22:36:32 +0200
Subject: m68k: Add support to export bootinfo in procfs

Add optional support to export the bootinfo used to boot the kernel in a
"bootinfo" file in procfs.  This is useful with kexec.

This is based on the similar feature for ATAGS on ARM.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/Kconfig                |  7 ++++
 arch/m68k/include/asm/bootinfo.h | 12 ++++++
 arch/m68k/kernel/Makefile        |  1 +
 arch/m68k/kernel/bootinfo_proc.c | 80 ++++++++++++++++++++++++++++++++++++++++
 arch/m68k/kernel/setup_mm.c      |  2 +
 5 files changed, 102 insertions(+)
 create mode 100644 arch/m68k/kernel/bootinfo_proc.c

(limited to 'arch')

diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 178dffa9de80..dbdd2231c75d 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -104,6 +104,13 @@ config KEXEC
 	  interface is strongly in flux, so no good recommendation can be
 	  made.
 
+config BOOTINFO_PROC
+	bool "Export bootinfo in procfs"
+	depends on KEXEC && M68KCLASSIC
+	help
+	  Say Y to export the bootinfo used to boot the kernel in a
+	  "bootinfo" file in procfs.  This is useful with kexec.
+
 menu "Platform setup"
 
 source arch/m68k/Kconfig.cpu
diff --git a/arch/m68k/include/asm/bootinfo.h b/arch/m68k/include/asm/bootinfo.h
index 9edc31893fb8..8e213267f8e7 100644
--- a/arch/m68k/include/asm/bootinfo.h
+++ b/arch/m68k/include/asm/bootinfo.h
@@ -13,4 +13,16 @@
 
 #include <uapi/asm/bootinfo.h>
 
+
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_BOOTINFO_PROC
+extern void save_bootinfo(const struct bi_record *bi);
+#else
+static inline void save_bootinfo(const struct bi_record *bi) {}
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+
 #endif /* _M68K_BOOTINFO_H */
diff --git a/arch/m68k/kernel/Makefile b/arch/m68k/kernel/Makefile
index 7ee5f00a9abb..2d5d9be16273 100644
--- a/arch/m68k/kernel/Makefile
+++ b/arch/m68k/kernel/Makefile
@@ -23,4 +23,5 @@ obj-$(CONFIG_PCI) += pcibios.o
 obj-$(CONFIG_HAS_DMA)	+= dma.o
 
 obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o
+obj-$(CONFIG_BOOTINFO_PROC)	+= bootinfo_proc.o
 
diff --git a/arch/m68k/kernel/bootinfo_proc.c b/arch/m68k/kernel/bootinfo_proc.c
new file mode 100644
index 000000000000..7ee853e1432b
--- /dev/null
+++ b/arch/m68k/kernel/bootinfo_proc.c
@@ -0,0 +1,80 @@
+/*
+ * Based on arch/arm/kernel/atags_proc.c
+ */
+
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/printk.h>
+#include <linux/proc_fs.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+
+#include <asm/bootinfo.h>
+#include <asm/byteorder.h>
+
+
+static char bootinfo_tmp[1536] __initdata;
+
+static void *bootinfo_copy;
+static size_t bootinfo_size;
+
+static ssize_t bootinfo_read(struct file *file, char __user *buf,
+			  size_t count, loff_t *ppos)
+{
+	return simple_read_from_buffer(buf, count, ppos, bootinfo_copy,
+				       bootinfo_size);
+}
+
+static const struct file_operations bootinfo_fops = {
+	.read = bootinfo_read,
+	.llseek = default_llseek,
+};
+
+void __init save_bootinfo(const struct bi_record *bi)
+{
+	const void *start = bi;
+	size_t size = sizeof(bi->tag);
+
+	while (be16_to_cpu(bi->tag) != BI_LAST) {
+		uint16_t n = be16_to_cpu(bi->size);
+		size += n;
+		bi = (struct bi_record *)((unsigned long)bi + n);
+	}
+
+	if (size > sizeof(bootinfo_tmp)) {
+		pr_err("Cannot save %zu bytes of bootinfo\n", size);
+		return;
+	}
+
+	pr_info("Saving %zu bytes of bootinfo\n", size);
+	memcpy(bootinfo_tmp, start, size);
+	bootinfo_size = size;
+}
+
+static int __init init_bootinfo_procfs(void)
+{
+	/*
+	 * This cannot go into save_bootinfo() because kmalloc and proc don't
+	 * work yet when it is called.
+	 */
+	struct proc_dir_entry *pde;
+
+	if (!bootinfo_size)
+		return -EINVAL;
+
+	bootinfo_copy = kmalloc(bootinfo_size, GFP_KERNEL);
+	if (!bootinfo_copy)
+		return -ENOMEM;
+
+	memcpy(bootinfo_copy, bootinfo_tmp, bootinfo_size);
+
+	pde = proc_create_data("bootinfo", 0400, NULL, &bootinfo_fops, NULL);
+	if (!pde) {
+		kfree(bootinfo_copy);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+arch_initcall(init_bootinfo_procfs);
diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c
index 28abca421974..5b8ec4d5f8e8 100644
--- a/arch/m68k/kernel/setup_mm.c
+++ b/arch/m68k/kernel/setup_mm.c
@@ -146,6 +146,8 @@ static void __init m68k_parse_bootinfo(const struct bi_record *record)
 {
 	uint16_t tag;
 
+	save_bootinfo(record);
+
 	while ((tag = be16_to_cpu(record->tag)) != BI_LAST) {
 		int unknown = 0;
 		const void *data = record->data;
-- 
cgit v1.2.3


From c6188d0f5756e3b20c216483cc7982d097c7b9de Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Thu, 28 Nov 2013 13:57:24 +0100
Subject: m68k/defconfig: Use ext4 for ext2/ext3 file systems

This reduces the kernel image size by ca. 160 KiB.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/configs/amiga_defconfig    | 4 ----
 arch/m68k/configs/apollo_defconfig   | 4 ----
 arch/m68k/configs/atari_defconfig    | 4 ----
 arch/m68k/configs/bvme6000_defconfig | 4 ----
 arch/m68k/configs/hp300_defconfig    | 4 ----
 arch/m68k/configs/mac_defconfig      | 4 ----
 arch/m68k/configs/multi_defconfig    | 4 ----
 arch/m68k/configs/mvme147_defconfig  | 4 ----
 arch/m68k/configs/mvme16x_defconfig  | 4 ----
 arch/m68k/configs/q40_defconfig      | 4 ----
 arch/m68k/configs/sun3_defconfig     | 4 ----
 arch/m68k/configs/sun3x_defconfig    | 4 ----
 12 files changed, 48 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index 04432b7ce157..1f87c0789b12 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -345,10 +345,6 @@ CONFIG_HEARTBEAT=y
 CONFIG_PROC_HARDWARE=y
 CONFIG_AMIGA_BUILTIN_SERIAL=y
 CONFIG_SERIAL_CONSOLE=y
-CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-# CONFIG_EXT3_FS_XATTR is not set
 CONFIG_EXT4_FS=y
 CONFIG_REISERFS_FS=m
 CONFIG_JFS_FS=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index a018fd7ed7bc..ea5b64794364 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -302,10 +302,6 @@ CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_HEARTBEAT=y
 CONFIG_PROC_HARDWARE=y
-CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-# CONFIG_EXT3_FS_XATTR is not set
 CONFIG_EXT4_FS=y
 CONFIG_REISERFS_FS=m
 CONFIG_JFS_FS=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index fa33639b4107..0c3d280187f2 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -320,10 +320,6 @@ CONFIG_NFBLOCK=y
 CONFIG_NFCON=y
 CONFIG_NFETH=y
 CONFIG_ATARI_DSP56K=m
-CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-# CONFIG_EXT3_FS_XATTR is not set
 CONFIG_EXT4_FS=y
 CONFIG_REISERFS_FS=m
 CONFIG_JFS_FS=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index ca310d586689..9b6b515c5a23 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -294,10 +294,6 @@ CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_PROC_HARDWARE=y
-CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-# CONFIG_EXT3_FS_XATTR is not set
 CONFIG_EXT4_FS=y
 CONFIG_REISERFS_FS=m
 CONFIG_JFS_FS=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index aa98958c230e..efffa687135f 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -304,10 +304,6 @@ CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_PROC_HARDWARE=y
-CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-# CONFIG_EXT3_FS_XATTR is not set
 CONFIG_EXT4_FS=y
 CONFIG_REISERFS_FS=m
 CONFIG_JFS_FS=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index ac2c153f8a4c..73a8565746dc 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -327,10 +327,6 @@ CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_PROC_HARDWARE=y
-CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-# CONFIG_EXT3_FS_XATTR is not set
 CONFIG_EXT4_FS=y
 CONFIG_REISERFS_FS=m
 CONFIG_JFS_FS=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index 61cdb096ca43..19c7939f700f 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -405,10 +405,6 @@ CONFIG_NFETH=y
 CONFIG_ATARI_DSP56K=m
 CONFIG_AMIGA_BUILTIN_SERIAL=y
 CONFIG_SERIAL_CONSOLE=y
-CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-# CONFIG_EXT3_FS_XATTR is not set
 CONFIG_EXT4_FS=y
 CONFIG_REISERFS_FS=m
 CONFIG_JFS_FS=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index f89613f9c82b..81fab00de63a 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -294,10 +294,6 @@ CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_PROC_HARDWARE=y
-CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-# CONFIG_EXT3_FS_XATTR is not set
 CONFIG_EXT4_FS=y
 CONFIG_REISERFS_FS=m
 CONFIG_JFS_FS=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index d086acd6aa26..df0783770611 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -294,10 +294,6 @@ CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_PROC_HARDWARE=y
-CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-# CONFIG_EXT3_FS_XATTR is not set
 CONFIG_EXT4_FS=y
 CONFIG_REISERFS_FS=m
 CONFIG_JFS_FS=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index 3662899c0f88..f64c8963fa5d 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -318,10 +318,6 @@ CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_HEARTBEAT=y
 CONFIG_PROC_HARDWARE=y
-CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-# CONFIG_EXT3_FS_XATTR is not set
 CONFIG_EXT4_FS=y
 CONFIG_REISERFS_FS=m
 CONFIG_JFS_FS=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index bb87f17ef477..99bcc60f82a3 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -296,10 +296,6 @@ CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_PROC_HARDWARE=y
-CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-# CONFIG_EXT3_FS_XATTR is not set
 CONFIG_EXT4_FS=y
 CONFIG_REISERFS_FS=m
 CONFIG_JFS_FS=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index b75a0b177b24..2023a91500f3 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -296,10 +296,6 @@ CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_PROC_HARDWARE=y
-CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-# CONFIG_EXT3_FS_XATTR is not set
 CONFIG_EXT4_FS=y
 CONFIG_REISERFS_FS=m
 CONFIG_JFS_FS=m
-- 
cgit v1.2.3


From 8a09cec25fa19a3a7e9f21b4186fecc69085a60f Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Sun, 1 Dec 2013 11:14:51 +0100
Subject: m68k/amiga,atari: Fix specifying multiple debug= parameters

Since commit d6713b4091a99fa2af2fabdcd2f3fb97f32ecf2e ("m68k: early
parameter support"), the user can specify multiple debug consoles using the
"debug=" kernel command line parameter.
However, as there's only a single struct console object, which is reused,
it would actually register the same console object multiple times, causing
the following warning:

WARNING: CPU: 0 PID: 0 at kernel/printk/printk.c:2233 register_console+0x36/
console 'debug0' already registered

Make sure to register the console object only once, to avoid the warning.

Note that still only one console (the one corresponding to the last
"debug=" parameter) will be active at the same time, as the .write() method
of the already registered console object is overwritten by a subsequent
"debug=" parameter.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/amiga/config.c | 19 ++++++++++++++-----
 arch/m68k/atari/debug.c  |  5 ++++-
 2 files changed, 18 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c
index c425fa6c2795..9625b7132227 100644
--- a/arch/m68k/amiga/config.c
+++ b/arch/m68k/amiga/config.c
@@ -620,6 +620,8 @@ static void amiga_mem_console_write(struct console *co, const char *s,
 
 static int __init amiga_savekmsg_setup(char *arg)
 {
+	bool registered;
+
 	if (!MACH_IS_AMIGA || strcmp(arg, "mem"))
 		return 0;
 
@@ -636,8 +638,10 @@ static int __init amiga_savekmsg_setup(char *arg)
 	savekmsg->magicptr = ZTWO_PADDR(savekmsg);
 	savekmsg->size = 0;
 
+	registered = !!amiga_console_driver.write;
 	amiga_console_driver.write = amiga_mem_console_write;
-	register_console(&amiga_console_driver);
+	if (!registered)
+		register_console(&amiga_console_driver);
 	return 0;
 }
 
@@ -719,11 +723,16 @@ void amiga_serial_gets(struct console *co, char *s, int len)
 
 static int __init amiga_debug_setup(char *arg)
 {
-	if (MACH_IS_AMIGA && !strcmp(arg, "ser")) {
-		/* no initialization required (?) */
-		amiga_console_driver.write = amiga_serial_console_write;
+	bool registered;
+
+	if (!MACH_IS_AMIGA || strcmp(arg, "ser"))
+		return 0;
+
+	/* no initialization required (?) */
+	registered = !!amiga_console_driver.write;
+	amiga_console_driver.write = amiga_serial_console_write;
+	if (!registered)
 		register_console(&amiga_console_driver);
-	}
 	return 0;
 }
 
diff --git a/arch/m68k/atari/debug.c b/arch/m68k/atari/debug.c
index a547ba9683d1..03cb5e08d7cf 100644
--- a/arch/m68k/atari/debug.c
+++ b/arch/m68k/atari/debug.c
@@ -287,6 +287,8 @@ static void __init atari_init_midi_port(int cflag)
 
 static int __init atari_debug_setup(char *arg)
 {
+	bool registered;
+
 	if (!MACH_IS_ATARI)
 		return 0;
 
@@ -294,6 +296,7 @@ static int __init atari_debug_setup(char *arg)
 		/* defaults to ser2 for a Falcon and ser1 otherwise */
 		arg = MACH_IS_FALCON ? "ser2" : "ser1";
 
+	registered = !!atari_console_driver.write;
 	if (!strcmp(arg, "ser1")) {
 		/* ST-MFP Modem1 serial port */
 		atari_init_mfp_port(B9600|CS8);
@@ -317,7 +320,7 @@ static int __init atari_debug_setup(char *arg)
 		sound_ym.wd_data = sound_ym.rd_data_reg_sel | 0x20; /* strobe H */
 		atari_console_driver.write = atari_par_console_write;
 	}
-	if (atari_console_driver.write)
+	if (atari_console_driver.write && !registered)
 		register_console(&atari_console_driver);
 
 	return 0;
-- 
cgit v1.2.3


From 27d632d27c4f4f0e9201b38f4ae70bd3050de843 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Mon, 2 Dec 2013 11:08:13 +0100
Subject: m68k/atari: Hide RTC_PORT() macro from rtc-cmos

Rename RTC_PORT() to ATARI_RTC_PORT(), as the rtc-cmos RTC driver uses the
presence of this macro to enable support for the second NVRAM bank, which
Atari doesn't have ("Unable to handle kernel access at virtual address
00ff8965").

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/include/asm/mc146818rtc.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/include/asm/mc146818rtc.h b/arch/m68k/include/asm/mc146818rtc.h
index 9f70a01f73dc..05b43bf5cdf3 100644
--- a/arch/m68k/include/asm/mc146818rtc.h
+++ b/arch/m68k/include/asm/mc146818rtc.h
@@ -10,16 +10,16 @@
 
 #include <asm/atarihw.h>
 
-#define RTC_PORT(x)	(TT_RTC_BAS + 2*(x))
+#define ATARI_RTC_PORT(x)	(TT_RTC_BAS + 2*(x))
 #define RTC_ALWAYS_BCD	0
 
 #define CMOS_READ(addr) ({ \
-atari_outb_p((addr),RTC_PORT(0)); \
-atari_inb_p(RTC_PORT(1)); \
+atari_outb_p((addr), ATARI_RTC_PORT(0)); \
+atari_inb_p(ATARI_RTC_PORT(1)); \
 })
 #define CMOS_WRITE(val, addr) ({ \
-atari_outb_p((addr),RTC_PORT(0)); \
-atari_outb_p((val),RTC_PORT(1)); \
+atari_outb_p((addr), ATARI_RTC_PORT(0)); \
+atari_outb_p((val), ATARI_RTC_PORT(1)); \
 })
 #endif /* CONFIG_ATARI */
 
-- 
cgit v1.2.3


From d52eefb47d4eb6fe40d4c92bc711dd34a8ce1747 Mon Sep 17 00:00:00 2001
From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Date: Thu, 5 Dec 2013 13:47:27 -0500
Subject: ia64/xen: Remove Xen support for ia64

ia64 has not been supported by Xen since 4.2 so it's time to drop
Xen/ia64 from Linux as well.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/Kconfig                         |   12 -
 arch/ia64/Makefile                        |    2 -
 arch/ia64/configs/xen_domu_defconfig      |  199 -----
 arch/ia64/include/asm/acpi.h              |    2 -
 arch/ia64/include/asm/machvec.h           |    2 -
 arch/ia64/include/asm/machvec_xen.h       |   22 -
 arch/ia64/include/asm/meminit.h           |    1 -
 arch/ia64/include/asm/paravirt.h          |    1 -
 arch/ia64/include/asm/pvclock-abi.h       |    2 +-
 arch/ia64/include/asm/sync_bitops.h       |   51 --
 arch/ia64/include/asm/xen/events.h        |   41 --
 arch/ia64/include/asm/xen/hypercall.h     |  265 -------
 arch/ia64/include/asm/xen/hypervisor.h    |   61 --
 arch/ia64/include/asm/xen/inst.h          |  486 ------------
 arch/ia64/include/asm/xen/interface.h     |  363 ---------
 arch/ia64/include/asm/xen/irq.h           |   44 --
 arch/ia64/include/asm/xen/minstate.h      |  143 ----
 arch/ia64/include/asm/xen/page-coherent.h |   38 -
 arch/ia64/include/asm/xen/page.h          |   65 --
 arch/ia64/include/asm/xen/patchlist.h     |   38 -
 arch/ia64/include/asm/xen/privop.h        |  135 ----
 arch/ia64/include/asm/xen/xcom_hcall.h    |   51 --
 arch/ia64/include/asm/xen/xencomm.h       |   42 --
 arch/ia64/include/uapi/asm/break.h        |    9 -
 arch/ia64/kernel/acpi.c                   |    3 -
 arch/ia64/kernel/asm-offsets.c            |   32 -
 arch/ia64/kernel/head.S                   |    3 -
 arch/ia64/kernel/nr-irqs.c                |    4 -
 arch/ia64/kernel/paravirt_inst.h          |    3 -
 arch/ia64/kernel/paravirt_patchlist.h     |    4 -
 arch/ia64/kernel/vmlinux.lds.S            |    6 -
 arch/ia64/xen/Kconfig                     |   25 -
 arch/ia64/xen/Makefile                    |   37 -
 arch/ia64/xen/gate-data.S                 |    3 -
 arch/ia64/xen/grant-table.c               |   94 ---
 arch/ia64/xen/hypercall.S                 |   88 ---
 arch/ia64/xen/hypervisor.c                |   97 ---
 arch/ia64/xen/irq_xen.c                   |  443 -----------
 arch/ia64/xen/irq_xen.h                   |   34 -
 arch/ia64/xen/machvec.c                   |    4 -
 arch/ia64/xen/suspend.c                   |   59 --
 arch/ia64/xen/time.c                      |  257 -------
 arch/ia64/xen/time.h                      |   24 -
 arch/ia64/xen/xcom_hcall.c                |  441 -----------
 arch/ia64/xen/xen_pv_ops.c                | 1141 -----------------------------
 arch/ia64/xen/xencomm.c                   |  106 ---
 arch/ia64/xen/xenivt.S                    |   52 --
 arch/ia64/xen/xensetup.S                  |   80 --
 include/xen/interface/callback.h          |    2 +-
 include/xen/interface/io/protocols.h      |    3 -
 50 files changed, 2 insertions(+), 5118 deletions(-)
 delete mode 100644 arch/ia64/configs/xen_domu_defconfig
 delete mode 100644 arch/ia64/include/asm/machvec_xen.h
 delete mode 100644 arch/ia64/include/asm/sync_bitops.h
 delete mode 100644 arch/ia64/include/asm/xen/events.h
 delete mode 100644 arch/ia64/include/asm/xen/hypercall.h
 delete mode 100644 arch/ia64/include/asm/xen/hypervisor.h
 delete mode 100644 arch/ia64/include/asm/xen/inst.h
 delete mode 100644 arch/ia64/include/asm/xen/interface.h
 delete mode 100644 arch/ia64/include/asm/xen/irq.h
 delete mode 100644 arch/ia64/include/asm/xen/minstate.h
 delete mode 100644 arch/ia64/include/asm/xen/page-coherent.h
 delete mode 100644 arch/ia64/include/asm/xen/page.h
 delete mode 100644 arch/ia64/include/asm/xen/patchlist.h
 delete mode 100644 arch/ia64/include/asm/xen/privop.h
 delete mode 100644 arch/ia64/include/asm/xen/xcom_hcall.h
 delete mode 100644 arch/ia64/include/asm/xen/xencomm.h
 delete mode 100644 arch/ia64/xen/Kconfig
 delete mode 100644 arch/ia64/xen/Makefile
 delete mode 100644 arch/ia64/xen/gate-data.S
 delete mode 100644 arch/ia64/xen/grant-table.c
 delete mode 100644 arch/ia64/xen/hypercall.S
 delete mode 100644 arch/ia64/xen/hypervisor.c
 delete mode 100644 arch/ia64/xen/irq_xen.c
 delete mode 100644 arch/ia64/xen/irq_xen.h
 delete mode 100644 arch/ia64/xen/machvec.c
 delete mode 100644 arch/ia64/xen/suspend.c
 delete mode 100644 arch/ia64/xen/time.c
 delete mode 100644 arch/ia64/xen/time.h
 delete mode 100644 arch/ia64/xen/xcom_hcall.c
 delete mode 100644 arch/ia64/xen/xen_pv_ops.c
 delete mode 100644 arch/ia64/xen/xencomm.c
 delete mode 100644 arch/ia64/xen/xenivt.S
 delete mode 100644 arch/ia64/xen/xensetup.S

(limited to 'arch')

diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 4e4119b0e691..a8c3a11dc5ab 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -147,9 +147,6 @@ config PARAVIRT
 	  over full virtualization.  However, when run without a hypervisor
 	  the kernel is theoretically slower and slightly larger.
 
-
-source "arch/ia64/xen/Kconfig"
-
 endif
 
 choice
@@ -175,7 +172,6 @@ config IA64_GENERIC
 	  SGI-SN2		For SGI Altix systems
 	  SGI-UV		For SGI UV systems
 	  Ski-simulator		For the HP simulator <http://www.hpl.hp.com/research/linux/ski/>
-	  Xen-domU		For xen domU system
 
 	  If you don't know what to do, choose "generic".
 
@@ -231,14 +227,6 @@ config IA64_HP_SIM
 	bool "Ski-simulator"
 	select SWIOTLB
 
-config IA64_XEN_GUEST
-	bool "Xen guest"
-	select SWIOTLB
-	depends on XEN
-	help
-	  Build a kernel that runs on Xen guest domain. At this moment only
-	  16KB page size in supported.
-
 endchoice
 
 choice
diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile
index be7bfa12b705..f37238f45bcd 100644
--- a/arch/ia64/Makefile
+++ b/arch/ia64/Makefile
@@ -51,11 +51,9 @@ core-$(CONFIG_IA64_DIG_VTD) 	+= arch/ia64/dig/
 core-$(CONFIG_IA64_GENERIC) 	+= arch/ia64/dig/
 core-$(CONFIG_IA64_HP_ZX1)	+= arch/ia64/dig/
 core-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/dig/
-core-$(CONFIG_IA64_XEN_GUEST)	+= arch/ia64/dig/
 core-$(CONFIG_IA64_SGI_SN2)	+= arch/ia64/sn/
 core-$(CONFIG_IA64_SGI_UV)	+= arch/ia64/uv/
 core-$(CONFIG_KVM) 		+= arch/ia64/kvm/
-core-$(CONFIG_XEN)		+= arch/ia64/xen/
 
 drivers-$(CONFIG_PCI)		+= arch/ia64/pci/
 drivers-$(CONFIG_IA64_HP_SIM)	+= arch/ia64/hp/sim/
diff --git a/arch/ia64/configs/xen_domu_defconfig b/arch/ia64/configs/xen_domu_defconfig
deleted file mode 100644
index b025acfde5c1..000000000000
--- a/arch/ia64/configs/xen_domu_defconfig
+++ /dev/null
@@ -1,199 +0,0 @@
-CONFIG_EXPERIMENTAL=y
-CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_LOG_BUF_SHIFT=20
-CONFIG_SYSFS_DEPRECATED_V2=y
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_KALLSYMS_ALL=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODVERSIONS=y
-CONFIG_MODULE_SRCVERSION_ALL=y
-# CONFIG_BLK_DEV_BSG is not set
-CONFIG_PARAVIRT_GUEST=y
-CONFIG_IA64_XEN_GUEST=y
-CONFIG_MCKINLEY=y
-CONFIG_IA64_CYCLONE=y
-CONFIG_SMP=y
-CONFIG_NR_CPUS=16
-CONFIG_HOTPLUG_CPU=y
-CONFIG_PERMIT_BSP_REMOVE=y
-CONFIG_FORCE_CPEI_RETARGET=y
-CONFIG_IA64_MCA_RECOVERY=y
-CONFIG_PERFMON=y
-CONFIG_IA64_PALINFO=y
-CONFIG_KEXEC=y
-CONFIG_EFI_VARS=y
-CONFIG_BINFMT_MISC=m
-CONFIG_ACPI_PROCFS=y
-CONFIG_ACPI_BUTTON=m
-CONFIG_ACPI_FAN=m
-CONFIG_ACPI_PROCESSOR=m
-CONFIG_ACPI_CONTAINER=m
-CONFIG_HOTPLUG_PCI=y
-CONFIG_HOTPLUG_PCI_ACPI=m
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_ARPD=y
-CONFIG_SYN_COOKIES=y
-# CONFIG_INET_LRO is not set
-# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-CONFIG_BLK_DEV_LOOP=m
-CONFIG_BLK_DEV_CRYPTOLOOP=m
-CONFIG_BLK_DEV_NBD=m
-CONFIG_BLK_DEV_RAM=y
-CONFIG_IDE=y
-CONFIG_BLK_DEV_IDECD=y
-CONFIG_BLK_DEV_GENERIC=y
-CONFIG_BLK_DEV_CMD64X=y
-CONFIG_BLK_DEV_PIIX=y
-CONFIG_SCSI=y
-CONFIG_BLK_DEV_SD=y
-CONFIG_CHR_DEV_ST=m
-CONFIG_BLK_DEV_SR=m
-CONFIG_CHR_DEV_SG=m
-CONFIG_SCSI_SYM53C8XX_2=y
-CONFIG_SCSI_QLOGIC_1280=y
-CONFIG_MD=y
-CONFIG_BLK_DEV_MD=m
-CONFIG_MD_LINEAR=m
-CONFIG_MD_RAID0=m
-CONFIG_MD_RAID1=m
-CONFIG_MD_MULTIPATH=m
-CONFIG_BLK_DEV_DM=m
-CONFIG_DM_CRYPT=m
-CONFIG_DM_SNAPSHOT=m
-CONFIG_DM_MIRROR=m
-CONFIG_DM_ZERO=m
-CONFIG_FUSION=y
-CONFIG_FUSION_SPI=y
-CONFIG_FUSION_FC=y
-CONFIG_FUSION_CTL=y
-CONFIG_NETDEVICES=y
-CONFIG_DUMMY=m
-CONFIG_NET_ETHERNET=y
-CONFIG_NET_TULIP=y
-CONFIG_TULIP=m
-CONFIG_NET_PCI=y
-CONFIG_NET_VENDOR_INTEL=y
-CONFIG_E100=m
-CONFIG_E1000=y
-CONFIG_TIGON3=y
-CONFIG_NETCONSOLE=y
-# CONFIG_SERIO_SERPORT is not set
-CONFIG_GAMEPORT=m
-CONFIG_SERIAL_NONSTANDARD=y
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_NR_UARTS=6
-CONFIG_SERIAL_8250_EXTENDED=y
-CONFIG_SERIAL_8250_SHARE_IRQ=y
-# CONFIG_HW_RANDOM is not set
-CONFIG_EFI_RTC=y
-CONFIG_RAW_DRIVER=m
-CONFIG_HPET=y
-CONFIG_AGP=m
-CONFIG_DRM=m
-CONFIG_DRM_TDFX=m
-CONFIG_DRM_R128=m
-CONFIG_DRM_RADEON=m
-CONFIG_DRM_MGA=m
-CONFIG_DRM_SIS=m
-CONFIG_HID_GYRATION=y
-CONFIG_HID_NTRIG=y
-CONFIG_HID_PANTHERLORD=y
-CONFIG_HID_PETALYNX=y
-CONFIG_HID_SAMSUNG=y
-CONFIG_HID_SONY=y
-CONFIG_HID_SUNPLUS=y
-CONFIG_HID_TOPSEED=y
-CONFIG_USB=y
-CONFIG_USB_DEVICEFS=y
-CONFIG_USB_EHCI_HCD=m
-CONFIG_USB_OHCI_HCD=m
-CONFIG_USB_UHCI_HCD=y
-CONFIG_USB_STORAGE=m
-CONFIG_EXT2_FS=y
-CONFIG_EXT2_FS_XATTR=y
-CONFIG_EXT2_FS_POSIX_ACL=y
-CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
-CONFIG_REISERFS_FS=y
-CONFIG_REISERFS_FS_XATTR=y
-CONFIG_REISERFS_FS_POSIX_ACL=y
-CONFIG_REISERFS_FS_SECURITY=y
-CONFIG_XFS_FS=y
-CONFIG_AUTOFS_FS=y
-CONFIG_AUTOFS4_FS=y
-CONFIG_ISO9660_FS=m
-CONFIG_JOLIET=y
-CONFIG_UDF_FS=m
-CONFIG_VFAT_FS=y
-CONFIG_NTFS_FS=m
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_HUGETLBFS=y
-CONFIG_NFS_FS=m
-CONFIG_NFS_V3=y
-CONFIG_NFS_V4=y
-CONFIG_NFSD=m
-CONFIG_NFSD_V4=y
-CONFIG_SMB_FS=m
-CONFIG_SMB_NLS_DEFAULT=y
-CONFIG_CIFS=m
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_SGI_PARTITION=y
-CONFIG_EFI_PARTITION=y
-CONFIG_NLS_CODEPAGE_437=y
-CONFIG_NLS_CODEPAGE_737=m
-CONFIG_NLS_CODEPAGE_775=m
-CONFIG_NLS_CODEPAGE_850=m
-CONFIG_NLS_CODEPAGE_852=m
-CONFIG_NLS_CODEPAGE_855=m
-CONFIG_NLS_CODEPAGE_857=m
-CONFIG_NLS_CODEPAGE_860=m
-CONFIG_NLS_CODEPAGE_861=m
-CONFIG_NLS_CODEPAGE_862=m
-CONFIG_NLS_CODEPAGE_863=m
-CONFIG_NLS_CODEPAGE_864=m
-CONFIG_NLS_CODEPAGE_865=m
-CONFIG_NLS_CODEPAGE_866=m
-CONFIG_NLS_CODEPAGE_869=m
-CONFIG_NLS_CODEPAGE_936=m
-CONFIG_NLS_CODEPAGE_950=m
-CONFIG_NLS_CODEPAGE_932=m
-CONFIG_NLS_CODEPAGE_949=m
-CONFIG_NLS_CODEPAGE_874=m
-CONFIG_NLS_ISO8859_8=m
-CONFIG_NLS_CODEPAGE_1250=m
-CONFIG_NLS_CODEPAGE_1251=m
-CONFIG_NLS_ISO8859_1=y
-CONFIG_NLS_ISO8859_2=m
-CONFIG_NLS_ISO8859_3=m
-CONFIG_NLS_ISO8859_4=m
-CONFIG_NLS_ISO8859_5=m
-CONFIG_NLS_ISO8859_6=m
-CONFIG_NLS_ISO8859_7=m
-CONFIG_NLS_ISO8859_9=m
-CONFIG_NLS_ISO8859_13=m
-CONFIG_NLS_ISO8859_14=m
-CONFIG_NLS_ISO8859_15=m
-CONFIG_NLS_KOI8_R=m
-CONFIG_NLS_KOI8_U=m
-CONFIG_NLS_UTF8=m
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_KERNEL=y
-CONFIG_DEBUG_MUTEXES=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_IA64_GRANULE_16MB=y
-CONFIG_CRYPTO_ECB=m
-CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_MD5=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/ia64/include/asm/acpi.h b/arch/ia64/include/asm/acpi.h
index faa1bf0da815..d651102a4d45 100644
--- a/arch/ia64/include/asm/acpi.h
+++ b/arch/ia64/include/asm/acpi.h
@@ -111,8 +111,6 @@ static inline const char *acpi_get_sysname (void)
 	return "uv";
 # elif defined (CONFIG_IA64_DIG)
 	return "dig";
-# elif defined (CONFIG_IA64_XEN_GUEST)
-	return "xen";
 # elif defined(CONFIG_IA64_DIG_VTD)
 	return "dig_vtd";
 # else
diff --git a/arch/ia64/include/asm/machvec.h b/arch/ia64/include/asm/machvec.h
index 2d1ad4b11a85..9c39bdfc2da8 100644
--- a/arch/ia64/include/asm/machvec.h
+++ b/arch/ia64/include/asm/machvec.h
@@ -113,8 +113,6 @@ extern void machvec_tlb_migrate_finish (struct mm_struct *);
 #  include <asm/machvec_sn2.h>
 # elif defined (CONFIG_IA64_SGI_UV)
 #  include <asm/machvec_uv.h>
-# elif defined (CONFIG_IA64_XEN_GUEST)
-#  include <asm/machvec_xen.h>
 # elif defined (CONFIG_IA64_GENERIC)
 
 # ifdef MACHVEC_PLATFORM_HEADER
diff --git a/arch/ia64/include/asm/machvec_xen.h b/arch/ia64/include/asm/machvec_xen.h
deleted file mode 100644
index 8b8bd0eb3923..000000000000
--- a/arch/ia64/include/asm/machvec_xen.h
+++ /dev/null
@@ -1,22 +0,0 @@
-#ifndef _ASM_IA64_MACHVEC_XEN_h
-#define _ASM_IA64_MACHVEC_XEN_h
-
-extern ia64_mv_setup_t			dig_setup;
-extern ia64_mv_cpu_init_t		xen_cpu_init;
-extern ia64_mv_irq_init_t		xen_irq_init;
-extern ia64_mv_send_ipi_t		xen_platform_send_ipi;
-
-/*
- * This stuff has dual use!
- *
- * For a generic kernel, the macros are used to initialize the
- * platform's machvec structure.  When compiling a non-generic kernel,
- * the macros are used directly.
- */
-#define ia64_platform_name			"xen"
-#define platform_setup				dig_setup
-#define platform_cpu_init			xen_cpu_init
-#define platform_irq_init			xen_irq_init
-#define platform_send_ipi			xen_platform_send_ipi
-
-#endif /* _ASM_IA64_MACHVEC_XEN_h */
diff --git a/arch/ia64/include/asm/meminit.h b/arch/ia64/include/asm/meminit.h
index 61c7b1750b16..092f1c91b36c 100644
--- a/arch/ia64/include/asm/meminit.h
+++ b/arch/ia64/include/asm/meminit.h
@@ -18,7 +18,6 @@
  * 	- crash dumping code reserved region
  * 	- Kernel memory map built from EFI memory map
  * 	- ELF core header
- *	- xen start info if CONFIG_XEN
  *
  * More could be added if necessary
  */
diff --git a/arch/ia64/include/asm/paravirt.h b/arch/ia64/include/asm/paravirt.h
index b149b88ea795..b53518a98026 100644
--- a/arch/ia64/include/asm/paravirt.h
+++ b/arch/ia64/include/asm/paravirt.h
@@ -75,7 +75,6 @@ void *paravirt_get_gate_section(void);
 #ifdef CONFIG_PARAVIRT_GUEST
 
 #define PARAVIRT_HYPERVISOR_TYPE_DEFAULT	0
-#define PARAVIRT_HYPERVISOR_TYPE_XEN		1
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/ia64/include/asm/pvclock-abi.h b/arch/ia64/include/asm/pvclock-abi.h
index 44ef9ef8f5b3..42b233bedeb5 100644
--- a/arch/ia64/include/asm/pvclock-abi.h
+++ b/arch/ia64/include/asm/pvclock-abi.h
@@ -11,7 +11,7 @@
 /*
  * These structs MUST NOT be changed.
  * They are the ABI between hypervisor and guest OS.
- * Both Xen and KVM are using this.
+ * KVM is using this.
  *
  * pvclock_vcpu_time_info holds the system time and the tsc timestamp
  * of the last update. So the guest can use the tsc delta to get a
diff --git a/arch/ia64/include/asm/sync_bitops.h b/arch/ia64/include/asm/sync_bitops.h
deleted file mode 100644
index 593c12eeb270..000000000000
--- a/arch/ia64/include/asm/sync_bitops.h
+++ /dev/null
@@ -1,51 +0,0 @@
-#ifndef _ASM_IA64_SYNC_BITOPS_H
-#define _ASM_IA64_SYNC_BITOPS_H
-
-/*
- * Copyright (C) 2008 Isaku Yamahata <yamahata at valinux co jp>
- *
- * Based on synch_bitops.h which Dan Magenhaimer wrote.
- *
- * bit operations which provide guaranteed strong synchronisation
- * when communicating with Xen or other guest OSes running on other CPUs.
- */
-
-static inline void sync_set_bit(int nr, volatile void *addr)
-{
-	set_bit(nr, addr);
-}
-
-static inline void sync_clear_bit(int nr, volatile void *addr)
-{
-	clear_bit(nr, addr);
-}
-
-static inline void sync_change_bit(int nr, volatile void *addr)
-{
-	change_bit(nr, addr);
-}
-
-static inline int sync_test_and_set_bit(int nr, volatile void *addr)
-{
-	return test_and_set_bit(nr, addr);
-}
-
-static inline int sync_test_and_clear_bit(int nr, volatile void *addr)
-{
-	return test_and_clear_bit(nr, addr);
-}
-
-static inline int sync_test_and_change_bit(int nr, volatile void *addr)
-{
-	return test_and_change_bit(nr, addr);
-}
-
-static inline int sync_test_bit(int nr, const volatile void *addr)
-{
-	return test_bit(nr, addr);
-}
-
-#define sync_cmpxchg(ptr, old, new)				\
-	((__typeof__(*(ptr)))cmpxchg_acq((ptr), (old), (new)))
-
-#endif /* _ASM_IA64_SYNC_BITOPS_H */
diff --git a/arch/ia64/include/asm/xen/events.h b/arch/ia64/include/asm/xen/events.h
deleted file mode 100644
index baa74c82aa71..000000000000
--- a/arch/ia64/include/asm/xen/events.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/******************************************************************************
- * arch/ia64/include/asm/xen/events.h
- *
- * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
- *                    VA Linux Systems Japan K.K.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- */
-#ifndef _ASM_IA64_XEN_EVENTS_H
-#define _ASM_IA64_XEN_EVENTS_H
-
-enum ipi_vector {
-	XEN_RESCHEDULE_VECTOR,
-	XEN_IPI_VECTOR,
-	XEN_CMCP_VECTOR,
-	XEN_CPEP_VECTOR,
-
-	XEN_NR_IPIS,
-};
-
-static inline int xen_irqs_disabled(struct pt_regs *regs)
-{
-	return !(ia64_psr(regs)->i);
-}
-
-#define irq_ctx_init(cpu)	do { } while (0)
-
-#endif /* _ASM_IA64_XEN_EVENTS_H */
diff --git a/arch/ia64/include/asm/xen/hypercall.h b/arch/ia64/include/asm/xen/hypercall.h
deleted file mode 100644
index ed28bcd5bb85..000000000000
--- a/arch/ia64/include/asm/xen/hypercall.h
+++ /dev/null
@@ -1,265 +0,0 @@
-/******************************************************************************
- * hypercall.h
- *
- * Linux-specific hypervisor handling.
- *
- * Copyright (c) 2002-2004, K A Fraser
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation; or, when distributed
- * separately from the Linux kernel or incorporated into other
- * software packages, subject to the following license:
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this source file (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy, modify,
- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
- * and to permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#ifndef _ASM_IA64_XEN_HYPERCALL_H
-#define _ASM_IA64_XEN_HYPERCALL_H
-
-#include <xen/interface/xen.h>
-#include <xen/interface/physdev.h>
-#include <xen/interface/sched.h>
-#include <asm/xen/xcom_hcall.h>
-struct xencomm_handle;
-extern unsigned long __hypercall(unsigned long a1, unsigned long a2,
-				 unsigned long a3, unsigned long a4,
-				 unsigned long a5, unsigned long cmd);
-
-/*
- * Assembler stubs for hyper-calls.
- */
-
-#define _hypercall0(type, name)					\
-({								\
-	long __res;						\
-	__res = __hypercall(0, 0, 0, 0, 0, __HYPERVISOR_##name);\
-	(type)__res;						\
-})
-
-#define _hypercall1(type, name, a1)				\
-({								\
-	long __res;						\
-	__res = __hypercall((unsigned long)a1,			\
-			     0, 0, 0, 0, __HYPERVISOR_##name);	\
-	(type)__res;						\
-})
-
-#define _hypercall2(type, name, a1, a2)				\
-({								\
-	long __res;						\
-	__res = __hypercall((unsigned long)a1,			\
-			    (unsigned long)a2,			\
-			    0, 0, 0, __HYPERVISOR_##name);	\
-	(type)__res;						\
-})
-
-#define _hypercall3(type, name, a1, a2, a3)			\
-({								\
-	long __res;						\
-	__res = __hypercall((unsigned long)a1,			\
-			    (unsigned long)a2,			\
-			    (unsigned long)a3,			\
-			    0, 0, __HYPERVISOR_##name);		\
-	(type)__res;						\
-})
-
-#define _hypercall4(type, name, a1, a2, a3, a4)			\
-({								\
-	long __res;						\
-	__res = __hypercall((unsigned long)a1,			\
-			    (unsigned long)a2,			\
-			    (unsigned long)a3,			\
-			    (unsigned long)a4,			\
-			    0, __HYPERVISOR_##name);		\
-	(type)__res;						\
-})
-
-#define _hypercall5(type, name, a1, a2, a3, a4, a5)		\
-({								\
-	long __res;						\
-	__res = __hypercall((unsigned long)a1,			\
-			    (unsigned long)a2,			\
-			    (unsigned long)a3,			\
-			    (unsigned long)a4,			\
-			    (unsigned long)a5,			\
-			    __HYPERVISOR_##name);		\
-	(type)__res;						\
-})
-
-
-static inline int
-xencomm_arch_hypercall_sched_op(int cmd, struct xencomm_handle *arg)
-{
-	return _hypercall2(int, sched_op, cmd, arg);
-}
-
-static inline long
-HYPERVISOR_set_timer_op(u64 timeout)
-{
-	unsigned long timeout_hi = (unsigned long)(timeout >> 32);
-	unsigned long timeout_lo = (unsigned long)timeout;
-	return _hypercall2(long, set_timer_op, timeout_lo, timeout_hi);
-}
-
-static inline int
-xencomm_arch_hypercall_multicall(struct xencomm_handle *call_list,
-				 int nr_calls)
-{
-	return _hypercall2(int, multicall, call_list, nr_calls);
-}
-
-static inline int
-xencomm_arch_hypercall_memory_op(unsigned int cmd, struct xencomm_handle *arg)
-{
-	return _hypercall2(int, memory_op, cmd, arg);
-}
-
-static inline int
-xencomm_arch_hypercall_event_channel_op(int cmd, struct xencomm_handle *arg)
-{
-	return _hypercall2(int, event_channel_op, cmd, arg);
-}
-
-static inline int
-xencomm_arch_hypercall_xen_version(int cmd, struct xencomm_handle *arg)
-{
-	return _hypercall2(int, xen_version, cmd, arg);
-}
-
-static inline int
-xencomm_arch_hypercall_console_io(int cmd, int count,
-				  struct xencomm_handle *str)
-{
-	return _hypercall3(int, console_io, cmd, count, str);
-}
-
-static inline int
-xencomm_arch_hypercall_physdev_op(int cmd, struct xencomm_handle *arg)
-{
-	return _hypercall2(int, physdev_op, cmd, arg);
-}
-
-static inline int
-xencomm_arch_hypercall_grant_table_op(unsigned int cmd,
-				      struct xencomm_handle *uop,
-				      unsigned int count)
-{
-	return _hypercall3(int, grant_table_op, cmd, uop, count);
-}
-
-int HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count);
-
-extern int xencomm_arch_hypercall_suspend(struct xencomm_handle *arg);
-
-static inline int
-xencomm_arch_hypercall_callback_op(int cmd, struct xencomm_handle *arg)
-{
-	return _hypercall2(int, callback_op, cmd, arg);
-}
-
-static inline long
-xencomm_arch_hypercall_vcpu_op(int cmd, int cpu, void *arg)
-{
-	return _hypercall3(long, vcpu_op, cmd, cpu, arg);
-}
-
-static inline int
-HYPERVISOR_physdev_op(int cmd, void *arg)
-{
-	switch (cmd) {
-	case PHYSDEVOP_eoi:
-		return _hypercall1(int, ia64_fast_eoi,
-				   ((struct physdev_eoi *)arg)->irq);
-	default:
-		return xencomm_hypercall_physdev_op(cmd, arg);
-	}
-}
-
-static inline long
-xencomm_arch_hypercall_opt_feature(struct xencomm_handle *arg)
-{
-	return _hypercall1(long, opt_feature, arg);
-}
-
-/* for balloon driver */
-#define HYPERVISOR_update_va_mapping(va, new_val, flags) (0)
-
-/* Use xencomm to do hypercalls.  */
-#define HYPERVISOR_sched_op xencomm_hypercall_sched_op
-#define HYPERVISOR_event_channel_op xencomm_hypercall_event_channel_op
-#define HYPERVISOR_callback_op xencomm_hypercall_callback_op
-#define HYPERVISOR_multicall xencomm_hypercall_multicall
-#define HYPERVISOR_xen_version xencomm_hypercall_xen_version
-#define HYPERVISOR_console_io xencomm_hypercall_console_io
-#define HYPERVISOR_memory_op xencomm_hypercall_memory_op
-#define HYPERVISOR_suspend xencomm_hypercall_suspend
-#define HYPERVISOR_vcpu_op xencomm_hypercall_vcpu_op
-#define HYPERVISOR_opt_feature xencomm_hypercall_opt_feature
-
-/* to compile gnttab_copy_grant_page() in drivers/xen/core/gnttab.c */
-#define HYPERVISOR_mmu_update(req, count, success_count, domid) ({ BUG(); 0; })
-
-static inline int
-HYPERVISOR_shutdown(
-	unsigned int reason)
-{
-	struct sched_shutdown sched_shutdown = {
-		.reason = reason
-	};
-
-	int rc = HYPERVISOR_sched_op(SCHEDOP_shutdown, &sched_shutdown);
-
-	return rc;
-}
-
-/* for netfront.c, netback.c */
-#define MULTI_UVMFLAGS_INDEX 0 /* XXX any value */
-
-static inline void
-MULTI_update_va_mapping(
-	struct multicall_entry *mcl, unsigned long va,
-	pte_t new_val, unsigned long flags)
-{
-	mcl->op = __HYPERVISOR_update_va_mapping;
-	mcl->result = 0;
-}
-
-static inline void
-MULTI_grant_table_op(struct multicall_entry *mcl, unsigned int cmd,
-	void *uop, unsigned int count)
-{
-	mcl->op = __HYPERVISOR_grant_table_op;
-	mcl->args[0] = cmd;
-	mcl->args[1] = (unsigned long)uop;
-	mcl->args[2] = count;
-}
-
-static inline void
-MULTI_mmu_update(struct multicall_entry *mcl, struct mmu_update *req,
-		 int count, int *success_count, domid_t domid)
-{
-	mcl->op = __HYPERVISOR_mmu_update;
-	mcl->args[0] = (unsigned long)req;
-	mcl->args[1] = count;
-	mcl->args[2] = (unsigned long)success_count;
-	mcl->args[3] = domid;
-}
-
-#endif /* _ASM_IA64_XEN_HYPERCALL_H */
diff --git a/arch/ia64/include/asm/xen/hypervisor.h b/arch/ia64/include/asm/xen/hypervisor.h
deleted file mode 100644
index 67455c2ed2b1..000000000000
--- a/arch/ia64/include/asm/xen/hypervisor.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/******************************************************************************
- * hypervisor.h
- *
- * Linux-specific hypervisor handling.
- *
- * Copyright (c) 2002-2004, K A Fraser
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation; or, when distributed
- * separately from the Linux kernel or incorporated into other
- * software packages, subject to the following license:
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this source file (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy, modify,
- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
- * and to permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#ifndef _ASM_IA64_XEN_HYPERVISOR_H
-#define _ASM_IA64_XEN_HYPERVISOR_H
-
-#include <linux/err.h>
-#include <xen/interface/xen.h>
-#include <xen/interface/version.h>	/* to compile feature.c */
-#include <xen/features.h>		/* to comiple xen-netfront.c */
-#include <xen/xen.h>
-#include <asm/xen/hypercall.h>
-
-#ifdef CONFIG_XEN
-extern struct shared_info *HYPERVISOR_shared_info;
-extern struct start_info *xen_start_info;
-
-void __init xen_setup_vcpu_info_placement(void);
-void force_evtchn_callback(void);
-
-/* for drivers/xen/balloon/balloon.c */
-#ifdef CONFIG_XEN_SCRUB_PAGES
-#define scrub_pages(_p, _n) memset((void *)(_p), 0, (_n) << PAGE_SHIFT)
-#else
-#define scrub_pages(_p, _n) ((void)0)
-#endif
-
-/* For setup_arch() in arch/ia64/kernel/setup.c */
-void xen_ia64_enable_opt_feature(void);
-#endif
-
-#endif /* _ASM_IA64_XEN_HYPERVISOR_H */
diff --git a/arch/ia64/include/asm/xen/inst.h b/arch/ia64/include/asm/xen/inst.h
deleted file mode 100644
index c53a47611208..000000000000
--- a/arch/ia64/include/asm/xen/inst.h
+++ /dev/null
@@ -1,486 +0,0 @@
-/******************************************************************************
- * arch/ia64/include/asm/xen/inst.h
- *
- * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
- *                    VA Linux Systems Japan K.K.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- */
-
-#include <asm/xen/privop.h>
-
-#define ia64_ivt				xen_ivt
-#define DO_SAVE_MIN				XEN_DO_SAVE_MIN
-
-#define __paravirt_switch_to			xen_switch_to
-#define __paravirt_leave_syscall		xen_leave_syscall
-#define __paravirt_work_processed_syscall	xen_work_processed_syscall
-#define __paravirt_leave_kernel			xen_leave_kernel
-#define __paravirt_pending_syscall_end		xen_work_pending_syscall_end
-#define __paravirt_work_processed_syscall_target \
-						xen_work_processed_syscall
-
-#define paravirt_fsyscall_table			xen_fsyscall_table
-#define paravirt_fsys_bubble_down		xen_fsys_bubble_down
-
-#define MOV_FROM_IFA(reg)	\
-	movl reg = XSI_IFA;	\
-	;;			\
-	ld8 reg = [reg]
-
-#define MOV_FROM_ITIR(reg)	\
-	movl reg = XSI_ITIR;	\
-	;;			\
-	ld8 reg = [reg]
-
-#define MOV_FROM_ISR(reg)	\
-	movl reg = XSI_ISR;	\
-	;;			\
-	ld8 reg = [reg]
-
-#define MOV_FROM_IHA(reg)	\
-	movl reg = XSI_IHA;	\
-	;;			\
-	ld8 reg = [reg]
-
-#define MOV_FROM_IPSR(pred, reg)	\
-(pred)	movl reg = XSI_IPSR;		\
-	;;				\
-(pred)	ld8 reg = [reg]
-
-#define MOV_FROM_IIM(reg)	\
-	movl reg = XSI_IIM;	\
-	;;			\
-	ld8 reg = [reg]
-
-#define MOV_FROM_IIP(reg)	\
-	movl reg = XSI_IIP;	\
-	;;			\
-	ld8 reg = [reg]
-
-.macro __MOV_FROM_IVR reg, clob
-	.ifc "\reg", "r8"
-		XEN_HYPER_GET_IVR
-		.exitm
-	.endif
-	.ifc "\clob", "r8"
-		XEN_HYPER_GET_IVR
-		;;
-		mov \reg = r8
-		.exitm
-	.endif
-
-	mov \clob = r8
-	;;
-	XEN_HYPER_GET_IVR
-	;;
-	mov \reg = r8
-	;;
-	mov r8 = \clob
-.endm
-#define MOV_FROM_IVR(reg, clob)	__MOV_FROM_IVR reg, clob
-
-.macro __MOV_FROM_PSR pred, reg, clob
-	.ifc "\reg", "r8"
-		(\pred)	XEN_HYPER_GET_PSR;
-		.exitm
-	.endif
-	.ifc "\clob", "r8"
-		(\pred)	XEN_HYPER_GET_PSR
-		;;
-		(\pred)	mov \reg = r8
-		.exitm
-	.endif
-
-	(\pred)	mov \clob = r8
-	(\pred)	XEN_HYPER_GET_PSR
-	;;
-	(\pred)	mov \reg = r8
-	(\pred)	mov r8 = \clob
-.endm
-#define MOV_FROM_PSR(pred, reg, clob)	__MOV_FROM_PSR pred, reg, clob
-
-/* assuming ar.itc is read with interrupt disabled. */
-#define MOV_FROM_ITC(pred, pred_clob, reg, clob)		\
-(pred)	movl clob = XSI_ITC_OFFSET;				\
-	;;							\
-(pred)	ld8 clob = [clob];					\
-(pred)	mov reg = ar.itc;					\
-	;;							\
-(pred)	add reg = reg, clob;					\
-	;;							\
-(pred)	movl clob = XSI_ITC_LAST;				\
-	;;							\
-(pred)	ld8 clob = [clob];					\
-	;;							\
-(pred)	cmp.geu.unc pred_clob, p0 = clob, reg;			\
-	;;							\
-(pred_clob)	add reg = 1, clob;				\
-	;;							\
-(pred)	movl clob = XSI_ITC_LAST;				\
-	;;							\
-(pred)	st8 [clob] = reg
-
-
-#define MOV_TO_IFA(reg, clob)	\
-	movl clob = XSI_IFA;	\
-	;;			\
-	st8 [clob] = reg	\
-
-#define MOV_TO_ITIR(pred, reg, clob)	\
-(pred)	movl clob = XSI_ITIR;		\
-	;;				\
-(pred)	st8 [clob] = reg
-
-#define MOV_TO_IHA(pred, reg, clob)	\
-(pred)	movl clob = XSI_IHA;		\
-	;;				\
-(pred)	st8 [clob] = reg
-
-#define MOV_TO_IPSR(pred, reg, clob)	\
-(pred)	movl clob = XSI_IPSR;		\
-	;;				\
-(pred)	st8 [clob] = reg;		\
-	;;
-
-#define MOV_TO_IFS(pred, reg, clob)	\
-(pred)	movl clob = XSI_IFS;		\
-	;;				\
-(pred)	st8 [clob] = reg;		\
-	;;
-
-#define MOV_TO_IIP(reg, clob)	\
-	movl clob = XSI_IIP;	\
-	;;			\
-	st8 [clob] = reg
-
-.macro ____MOV_TO_KR kr, reg, clob0, clob1
-	.ifc "\clob0", "r9"
-		.error "clob0 \clob0 must not be r9"
-	.endif
-	.ifc "\clob1", "r8"
-		.error "clob1 \clob1 must not be r8"
-	.endif
-
-	.ifnc "\reg", "r9"
-		.ifnc "\clob1", "r9"
-			mov \clob1 = r9
-		.endif
-		mov r9 = \reg
-	.endif
-	.ifnc "\clob0", "r8"
-		mov \clob0 = r8
-	.endif
-	mov r8 = \kr
-	;;
-	XEN_HYPER_SET_KR
-
-	.ifnc "\reg", "r9"
-		.ifnc "\clob1", "r9"
-			mov r9 = \clob1
-		.endif
-	.endif
-	.ifnc "\clob0", "r8"
-		mov r8 = \clob0
-	.endif
-.endm
-
-.macro __MOV_TO_KR kr, reg, clob0, clob1
-	.ifc "\clob0", "r9"
-		____MOV_TO_KR \kr, \reg, \clob1, \clob0
-		.exitm
-	.endif
-	.ifc "\clob1", "r8"
-		____MOV_TO_KR \kr, \reg, \clob1, \clob0
-		.exitm
-	.endif
-
-	____MOV_TO_KR \kr, \reg, \clob0, \clob1
-.endm
-
-#define MOV_TO_KR(kr, reg, clob0, clob1) \
-	__MOV_TO_KR IA64_KR_ ## kr, reg, clob0, clob1
-
-
-.macro __ITC_I pred, reg, clob
-	.ifc "\reg", "r8"
-		(\pred)	XEN_HYPER_ITC_I
-		.exitm
-	.endif
-	.ifc "\clob", "r8"
-		(\pred)	mov r8 = \reg
-		;;
-		(\pred)	XEN_HYPER_ITC_I
-		.exitm
-	.endif
-
-	(\pred)	mov \clob = r8
-	(\pred)	mov r8 = \reg
-	;;
-	(\pred)	XEN_HYPER_ITC_I
-	;;
-	(\pred)	mov r8 = \clob
-	;;
-.endm
-#define ITC_I(pred, reg, clob)	__ITC_I pred, reg, clob
-
-.macro __ITC_D pred, reg, clob
-	.ifc "\reg", "r8"
-		(\pred)	XEN_HYPER_ITC_D
-		;;
-		.exitm
-	.endif
-	.ifc "\clob", "r8"
-		(\pred)	mov r8 = \reg
-		;;
-		(\pred)	XEN_HYPER_ITC_D
-		;;
-		.exitm
-	.endif
-
-	(\pred)	mov \clob = r8
-	(\pred)	mov r8 = \reg
-	;;
-	(\pred)	XEN_HYPER_ITC_D
-	;;
-	(\pred)	mov r8 = \clob
-	;;
-.endm
-#define ITC_D(pred, reg, clob)	__ITC_D pred, reg, clob
-
-.macro __ITC_I_AND_D pred_i, pred_d, reg, clob
-	.ifc "\reg", "r8"
-		(\pred_i)XEN_HYPER_ITC_I
-		;;
-		(\pred_d)XEN_HYPER_ITC_D
-		;;
-		.exitm
-	.endif
-	.ifc "\clob", "r8"
-		mov r8 = \reg
-		;;
-		(\pred_i)XEN_HYPER_ITC_I
-		;;
-		(\pred_d)XEN_HYPER_ITC_D
-		;;
-		.exitm
-	.endif
-
-	mov \clob = r8
-	mov r8 = \reg
-	;;
-	(\pred_i)XEN_HYPER_ITC_I
-	;;
-	(\pred_d)XEN_HYPER_ITC_D
-	;;
-	mov r8 = \clob
-	;;
-.endm
-#define ITC_I_AND_D(pred_i, pred_d, reg, clob) \
-	__ITC_I_AND_D pred_i, pred_d, reg, clob
-
-.macro __THASH pred, reg0, reg1, clob
-	.ifc "\reg0", "r8"
-		(\pred)	mov r8 = \reg1
-		(\pred)	XEN_HYPER_THASH
-		.exitm
-	.endc
-	.ifc "\reg1", "r8"
-		(\pred)	XEN_HYPER_THASH
-		;;
-		(\pred)	mov \reg0 = r8
-		;;
-		.exitm
-	.endif
-	.ifc "\clob", "r8"
-		(\pred)	mov r8 = \reg1
-		(\pred)	XEN_HYPER_THASH
-		;;
-		(\pred)	mov \reg0 = r8
-		;;
-		.exitm
-	.endif
-
-	(\pred)	mov \clob = r8
-	(\pred)	mov r8 = \reg1
-	(\pred)	XEN_HYPER_THASH
-	;;
-	(\pred)	mov \reg0 = r8
-	(\pred)	mov r8 = \clob
-	;;
-.endm
-#define THASH(pred, reg0, reg1, clob) __THASH pred, reg0, reg1, clob
-
-#define SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(clob0, clob1)	\
-	mov clob0 = 1;						\
-	movl clob1 = XSI_PSR_IC;				\
-	;;							\
-	st4 [clob1] = clob0					\
-	;;
-
-#define SSM_PSR_IC_AND_SRLZ_D(clob0, clob1)	\
-	;;					\
-	srlz.d;					\
-	mov clob1 = 1;				\
-	movl clob0 = XSI_PSR_IC;		\
-	;;					\
-	st4 [clob0] = clob1
-
-#define RSM_PSR_IC(clob)	\
-	movl clob = XSI_PSR_IC;	\
-	;;			\
-	st4 [clob] = r0;	\
-	;;
-
-/* pred will be clobbered */
-#define MASK_TO_PEND_OFS    (-1)
-#define SSM_PSR_I(pred, pred_clob, clob)				\
-(pred)	movl clob = XSI_PSR_I_ADDR					\
-	;;								\
-(pred)	ld8 clob = [clob]						\
-	;;								\
-	/* if (pred) vpsr.i = 1 */					\
-	/* if (pred) (vcpu->vcpu_info->evtchn_upcall_mask)=0 */		\
-(pred)	st1 [clob] = r0, MASK_TO_PEND_OFS				\
-	;;								\
-	/* if (vcpu->vcpu_info->evtchn_upcall_pending) */		\
-(pred)	ld1 clob = [clob]						\
-	;;								\
-(pred)	cmp.ne.unc pred_clob, p0 = clob, r0				\
-	;;								\
-(pred_clob)XEN_HYPER_SSM_I	/* do areal ssm psr.i */
-
-#define RSM_PSR_I(pred, clob0, clob1)	\
-	movl clob0 = XSI_PSR_I_ADDR;	\
-	mov clob1 = 1;			\
-	;;				\
-	ld8 clob0 = [clob0];		\
-	;;				\
-(pred)	st1 [clob0] = clob1
-
-#define RSM_PSR_I_IC(clob0, clob1, clob2)		\
-	movl clob0 = XSI_PSR_I_ADDR;			\
-	movl clob1 = XSI_PSR_IC;			\
-	;;						\
-	ld8 clob0 = [clob0];				\
-	mov clob2 = 1;					\
-	;;						\
-	/* note: clears both vpsr.i and vpsr.ic! */	\
-	st1 [clob0] = clob2;				\
-	st4 [clob1] = r0;				\
-	;;
-
-#define RSM_PSR_DT		\
-	XEN_HYPER_RSM_PSR_DT
-
-#define RSM_PSR_BE_I(clob0, clob1)	\
-	RSM_PSR_I(p0, clob0, clob1);	\
-	rum psr.be
-
-#define SSM_PSR_DT_AND_SRLZ_I	\
-	XEN_HYPER_SSM_PSR_DT
-
-#define BSW_0(clob0, clob1, clob2)			\
-	;;						\
-	/* r16-r31 all now hold bank1 values */		\
-	mov clob2 = ar.unat;				\
-	movl clob0 = XSI_BANK1_R16;			\
-	movl clob1 = XSI_BANK1_R16 + 8;			\
-	;;						\
-.mem.offset 0, 0; st8.spill [clob0] = r16, 16;		\
-.mem.offset 8, 0; st8.spill [clob1] = r17, 16;		\
-	;;						\
-.mem.offset 0, 0; st8.spill [clob0] = r18, 16;		\
-.mem.offset 8, 0; st8.spill [clob1] = r19, 16;		\
-	;;						\
-.mem.offset 0, 0; st8.spill [clob0] = r20, 16;		\
-.mem.offset 8, 0; st8.spill [clob1] = r21, 16;		\
-	;;						\
-.mem.offset 0, 0; st8.spill [clob0] = r22, 16;		\
-.mem.offset 8, 0; st8.spill [clob1] = r23, 16;		\
-	;;						\
-.mem.offset 0, 0; st8.spill [clob0] = r24, 16;		\
-.mem.offset 8, 0; st8.spill [clob1] = r25, 16;		\
-	;;						\
-.mem.offset 0, 0; st8.spill [clob0] = r26, 16;		\
-.mem.offset 8, 0; st8.spill [clob1] = r27, 16;		\
-	;;						\
-.mem.offset 0, 0; st8.spill [clob0] = r28, 16;		\
-.mem.offset 8, 0; st8.spill [clob1] = r29, 16;		\
-	;;						\
-.mem.offset 0, 0; st8.spill [clob0] = r30, 16;		\
-.mem.offset 8, 0; st8.spill [clob1] = r31, 16;		\
-	;;						\
-	mov clob1 = ar.unat;				\
-	movl clob0 = XSI_B1NAT;				\
-	;;						\
-	st8 [clob0] = clob1;				\
-	mov ar.unat = clob2;				\
-	movl clob0 = XSI_BANKNUM;			\
-	;;						\
-	st4 [clob0] = r0
-
-
-	/* FIXME: THIS CODE IS NOT NaT SAFE! */
-#define XEN_BSW_1(clob)			\
-	mov clob = ar.unat;		\
-	movl r30 = XSI_B1NAT;		\
-	;;				\
-	ld8 r30 = [r30];		\
-	mov r31 = 1;			\
-	;;				\
-	mov ar.unat = r30;		\
-	movl r30 = XSI_BANKNUM;		\
-	;;				\
-	st4 [r30] = r31;		\
-	movl r30 = XSI_BANK1_R16;	\
-	movl r31 = XSI_BANK1_R16+8;	\
-	;;				\
-	ld8.fill r16 = [r30], 16;	\
-	ld8.fill r17 = [r31], 16;	\
-	;;				\
-	ld8.fill r18 = [r30], 16;	\
-	ld8.fill r19 = [r31], 16;	\
-	;;				\
-	ld8.fill r20 = [r30], 16;	\
-	ld8.fill r21 = [r31], 16;	\
-	;;				\
-	ld8.fill r22 = [r30], 16;	\
-	ld8.fill r23 = [r31], 16;	\
-	;;				\
-	ld8.fill r24 = [r30], 16;	\
-	ld8.fill r25 = [r31], 16;	\
-	;;				\
-	ld8.fill r26 = [r30], 16;	\
-	ld8.fill r27 = [r31], 16;	\
-	;;				\
-	ld8.fill r28 = [r30], 16;	\
-	ld8.fill r29 = [r31], 16;	\
-	;;				\
-	ld8.fill r30 = [r30];		\
-	ld8.fill r31 = [r31];		\
-	;;				\
-	mov ar.unat = clob
-
-#define BSW_1(clob0, clob1)	XEN_BSW_1(clob1)
-
-
-#define COVER	\
-	XEN_HYPER_COVER
-
-#define RFI			\
-	XEN_HYPER_RFI;		\
-	dv_serialize_data
diff --git a/arch/ia64/include/asm/xen/interface.h b/arch/ia64/include/asm/xen/interface.h
deleted file mode 100644
index e88c5de27410..000000000000
--- a/arch/ia64/include/asm/xen/interface.h
+++ /dev/null
@@ -1,363 +0,0 @@
-/******************************************************************************
- * arch-ia64/hypervisor-if.h
- *
- * Guest OS interface to IA64 Xen.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Copyright by those who contributed. (in alphabetical order)
- *
- * Anthony Xu <anthony.xu@intel.com>
- * Eddie Dong <eddie.dong@intel.com>
- * Fred Yang <fred.yang@intel.com>
- * Kevin Tian <kevin.tian@intel.com>
- * Alex Williamson <alex.williamson@hp.com>
- * Chris Wright <chrisw@sous-sol.org>
- * Christian Limpach <Christian.Limpach@cl.cam.ac.uk>
- * Dietmar Hahn <dietmar.hahn@fujitsu-siemens.com>
- * Hollis Blanchard <hollisb@us.ibm.com>
- * Isaku Yamahata <yamahata@valinux.co.jp>
- * Jan Beulich <jbeulich@novell.com>
- * John Levon <john.levon@sun.com>
- * Kazuhiro Suzuki <kaz@jp.fujitsu.com>
- * Keir Fraser <keir.fraser@citrix.com>
- * Kouya Shimura <kouya@jp.fujitsu.com>
- * Masaki Kanno <kanno.masaki@jp.fujitsu.com>
- * Matt Chapman <matthewc@hp.com>
- * Matthew Chapman <matthewc@hp.com>
- * Samuel Thibault <samuel.thibault@eu.citrix.com>
- * Tomonari Horikoshi <t.horikoshi@jp.fujitsu.com>
- * Tristan Gingold <tgingold@free.fr>
- * Tsunehisa Doi <Doi.Tsunehisa@jp.fujitsu.com>
- * Yutaka Ezaki <yutaka.ezaki@jp.fujitsu.com>
- * Zhang Xin <xing.z.zhang@intel.com>
- * Zhang xiantao <xiantao.zhang@intel.com>
- * dan.magenheimer@hp.com
- * ian.pratt@cl.cam.ac.uk
- * michael.fetterman@cl.cam.ac.uk
- */
-
-#ifndef _ASM_IA64_XEN_INTERFACE_H
-#define _ASM_IA64_XEN_INTERFACE_H
-
-#define __DEFINE_GUEST_HANDLE(name, type)	\
-	typedef struct { type *p; } __guest_handle_ ## name
-
-#define DEFINE_GUEST_HANDLE_STRUCT(name)	\
-	__DEFINE_GUEST_HANDLE(name, struct name)
-#define DEFINE_GUEST_HANDLE(name)	__DEFINE_GUEST_HANDLE(name, name)
-#define GUEST_HANDLE(name)		__guest_handle_ ## name
-#define GUEST_HANDLE_64(name)		GUEST_HANDLE(name)
-#define set_xen_guest_handle(hnd, val)	do { (hnd).p = val; } while (0)
-
-#ifndef __ASSEMBLY__
-/* Explicitly size integers that represent pfns in the public interface
- * with Xen so that we could have one ABI that works for 32 and 64 bit
- * guests. */
-typedef unsigned long xen_pfn_t;
-typedef unsigned long xen_ulong_t;
-/* Guest handles for primitive C types. */
-__DEFINE_GUEST_HANDLE(uchar, unsigned char);
-__DEFINE_GUEST_HANDLE(uint, unsigned int);
-__DEFINE_GUEST_HANDLE(ulong, unsigned long);
-
-DEFINE_GUEST_HANDLE(char);
-DEFINE_GUEST_HANDLE(int);
-DEFINE_GUEST_HANDLE(long);
-DEFINE_GUEST_HANDLE(void);
-DEFINE_GUEST_HANDLE(uint64_t);
-DEFINE_GUEST_HANDLE(uint32_t);
-
-DEFINE_GUEST_HANDLE(xen_pfn_t);
-#define PRI_xen_pfn	"lx"
-#endif
-
-/* Arch specific VIRQs definition */
-#define VIRQ_ITC	VIRQ_ARCH_0	/* V. Virtual itc timer */
-#define VIRQ_MCA_CMC	VIRQ_ARCH_1	/* MCA cmc interrupt */
-#define VIRQ_MCA_CPE	VIRQ_ARCH_2	/* MCA cpe interrupt */
-
-/* Maximum number of virtual CPUs in multi-processor guests. */
-/* keep sizeof(struct shared_page) <= PAGE_SIZE.
- * this is checked in arch/ia64/xen/hypervisor.c. */
-#define MAX_VIRT_CPUS	64
-
-#ifndef __ASSEMBLY__
-
-#define INVALID_MFN	(~0UL)
-
-union vac {
-	unsigned long value;
-	struct {
-		int a_int:1;
-		int a_from_int_cr:1;
-		int a_to_int_cr:1;
-		int a_from_psr:1;
-		int a_from_cpuid:1;
-		int a_cover:1;
-		int a_bsw:1;
-		long reserved:57;
-	};
-};
-
-union vdc {
-	unsigned long value;
-	struct {
-		int d_vmsw:1;
-		int d_extint:1;
-		int d_ibr_dbr:1;
-		int d_pmc:1;
-		int d_to_pmd:1;
-		int d_itm:1;
-		long reserved:58;
-	};
-};
-
-struct mapped_regs {
-	union vac vac;
-	union vdc vdc;
-	unsigned long virt_env_vaddr;
-	unsigned long reserved1[29];
-	unsigned long vhpi;
-	unsigned long reserved2[95];
-	union {
-		unsigned long vgr[16];
-		unsigned long bank1_regs[16];	/* bank1 regs (r16-r31)
-						   when bank0 active */
-	};
-	union {
-		unsigned long vbgr[16];
-		unsigned long bank0_regs[16];	/* bank0 regs (r16-r31)
-						   when bank1 active */
-	};
-	unsigned long vnat;
-	unsigned long vbnat;
-	unsigned long vcpuid[5];
-	unsigned long reserved3[11];
-	unsigned long vpsr;
-	unsigned long vpr;
-	unsigned long reserved4[76];
-	union {
-		unsigned long vcr[128];
-		struct {
-			unsigned long dcr;	/* CR0 */
-			unsigned long itm;
-			unsigned long iva;
-			unsigned long rsv1[5];
-			unsigned long pta;	/* CR8 */
-			unsigned long rsv2[7];
-			unsigned long ipsr;	/* CR16 */
-			unsigned long isr;
-			unsigned long rsv3;
-			unsigned long iip;
-			unsigned long ifa;
-			unsigned long itir;
-			unsigned long iipa;
-			unsigned long ifs;
-			unsigned long iim;	/* CR24 */
-			unsigned long iha;
-			unsigned long rsv4[38];
-			unsigned long lid;	/* CR64 */
-			unsigned long ivr;
-			unsigned long tpr;
-			unsigned long eoi;
-			unsigned long irr[4];
-			unsigned long itv;	/* CR72 */
-			unsigned long pmv;
-			unsigned long cmcv;
-			unsigned long rsv5[5];
-			unsigned long lrr0;	/* CR80 */
-			unsigned long lrr1;
-			unsigned long rsv6[46];
-		};
-	};
-	union {
-		unsigned long reserved5[128];
-		struct {
-			unsigned long precover_ifs;
-			unsigned long unat;	/* not sure if this is needed
-						   until NaT arch is done */
-			int interrupt_collection_enabled; /* virtual psr.ic */
-
-			/* virtual interrupt deliverable flag is
-			 * evtchn_upcall_mask in shared info area now.
-			 * interrupt_mask_addr is the address
-			 * of evtchn_upcall_mask for current vcpu
-			 */
-			unsigned char *interrupt_mask_addr;
-			int pending_interruption;
-			unsigned char vpsr_pp;
-			unsigned char vpsr_dfh;
-			unsigned char hpsr_dfh;
-			unsigned char hpsr_mfh;
-			unsigned long reserved5_1[4];
-			int metaphysical_mode;	/* 1 = use metaphys mapping
-						   0 = use virtual */
-			int banknum;		/* 0 or 1, which virtual
-						   register bank is active */
-			unsigned long rrs[8];	/* region registers */
-			unsigned long krs[8];	/* kernel registers */
-			unsigned long tmp[16];	/* temp registers
-						   (e.g. for hyperprivops) */
-
-			/* itc paravirtualization
-			 * vAR.ITC = mAR.ITC + itc_offset
-			 * itc_last is one which was lastly passed to
-			 * the guest OS in order to prevent it from
-			 * going backwords.
-			 */
-			unsigned long itc_offset;
-			unsigned long itc_last;
-		};
-	};
-};
-
-struct arch_vcpu_info {
-	/* nothing */
-};
-
-/*
- * This structure is used for magic page in domain pseudo physical address
- * space and the result of XENMEM_machine_memory_map.
- * As the XENMEM_machine_memory_map result,
- * xen_memory_map::nr_entries indicates the size in bytes
- * including struct xen_ia64_memmap_info. Not the number of entries.
- */
-struct xen_ia64_memmap_info {
-	uint64_t efi_memmap_size;	/* size of EFI memory map */
-	uint64_t efi_memdesc_size;	/* size of an EFI memory map
-					 * descriptor */
-	uint32_t efi_memdesc_version;	/* memory descriptor version */
-	void *memdesc[0];		/* array of efi_memory_desc_t */
-};
-
-struct arch_shared_info {
-	/* PFN of the start_info page.	*/
-	unsigned long start_info_pfn;
-
-	/* Interrupt vector for event channel.	*/
-	int evtchn_vector;
-
-	/* PFN of memmap_info page */
-	unsigned int memmap_info_num_pages;	/* currently only = 1 case is
-						   supported. */
-	unsigned long memmap_info_pfn;
-
-	uint64_t pad[31];
-};
-
-struct xen_callback {
-	unsigned long ip;
-};
-typedef struct xen_callback xen_callback_t;
-
-#endif /* !__ASSEMBLY__ */
-
-#include <asm/pvclock-abi.h>
-
-/* Size of the shared_info area (this is not related to page size).  */
-#define XSI_SHIFT			14
-#define XSI_SIZE			(1 << XSI_SHIFT)
-/* Log size of mapped_regs area (64 KB - only 4KB is used).  */
-#define XMAPPEDREGS_SHIFT		12
-#define XMAPPEDREGS_SIZE		(1 << XMAPPEDREGS_SHIFT)
-/* Offset of XASI (Xen arch shared info) wrt XSI_BASE.	*/
-#define XMAPPEDREGS_OFS			XSI_SIZE
-
-/* Hyperprivops.  */
-#define HYPERPRIVOP_START		0x1
-#define HYPERPRIVOP_RFI			(HYPERPRIVOP_START + 0x0)
-#define HYPERPRIVOP_RSM_DT		(HYPERPRIVOP_START + 0x1)
-#define HYPERPRIVOP_SSM_DT		(HYPERPRIVOP_START + 0x2)
-#define HYPERPRIVOP_COVER		(HYPERPRIVOP_START + 0x3)
-#define HYPERPRIVOP_ITC_D		(HYPERPRIVOP_START + 0x4)
-#define HYPERPRIVOP_ITC_I		(HYPERPRIVOP_START + 0x5)
-#define HYPERPRIVOP_SSM_I		(HYPERPRIVOP_START + 0x6)
-#define HYPERPRIVOP_GET_IVR		(HYPERPRIVOP_START + 0x7)
-#define HYPERPRIVOP_GET_TPR		(HYPERPRIVOP_START + 0x8)
-#define HYPERPRIVOP_SET_TPR		(HYPERPRIVOP_START + 0x9)
-#define HYPERPRIVOP_EOI			(HYPERPRIVOP_START + 0xa)
-#define HYPERPRIVOP_SET_ITM		(HYPERPRIVOP_START + 0xb)
-#define HYPERPRIVOP_THASH		(HYPERPRIVOP_START + 0xc)
-#define HYPERPRIVOP_PTC_GA		(HYPERPRIVOP_START + 0xd)
-#define HYPERPRIVOP_ITR_D		(HYPERPRIVOP_START + 0xe)
-#define HYPERPRIVOP_GET_RR		(HYPERPRIVOP_START + 0xf)
-#define HYPERPRIVOP_SET_RR		(HYPERPRIVOP_START + 0x10)
-#define HYPERPRIVOP_SET_KR		(HYPERPRIVOP_START + 0x11)
-#define HYPERPRIVOP_FC			(HYPERPRIVOP_START + 0x12)
-#define HYPERPRIVOP_GET_CPUID		(HYPERPRIVOP_START + 0x13)
-#define HYPERPRIVOP_GET_PMD		(HYPERPRIVOP_START + 0x14)
-#define HYPERPRIVOP_GET_EFLAG		(HYPERPRIVOP_START + 0x15)
-#define HYPERPRIVOP_SET_EFLAG		(HYPERPRIVOP_START + 0x16)
-#define HYPERPRIVOP_RSM_BE		(HYPERPRIVOP_START + 0x17)
-#define HYPERPRIVOP_GET_PSR		(HYPERPRIVOP_START + 0x18)
-#define HYPERPRIVOP_SET_RR0_TO_RR4	(HYPERPRIVOP_START + 0x19)
-#define HYPERPRIVOP_MAX			(0x1a)
-
-/* Fast and light hypercalls.  */
-#define __HYPERVISOR_ia64_fast_eoi	__HYPERVISOR_arch_1
-
-/* Xencomm macros.  */
-#define XENCOMM_INLINE_MASK		0xf800000000000000UL
-#define XENCOMM_INLINE_FLAG		0x8000000000000000UL
-
-#ifndef __ASSEMBLY__
-
-/*
- * Optimization features.
- * The hypervisor may do some special optimizations for guests. This hypercall
- * can be used to switch on/of these special optimizations.
- */
-#define __HYPERVISOR_opt_feature	0x700UL
-
-#define XEN_IA64_OPTF_OFF		0x0
-#define XEN_IA64_OPTF_ON		0x1
-
-/*
- * If this feature is switched on, the hypervisor inserts the
- * tlb entries without calling the guests traphandler.
- * This is useful in guests using region 7 for identity mapping
- * like the linux kernel does.
- */
-#define XEN_IA64_OPTF_IDENT_MAP_REG7	1
-
-/* Identity mapping of region 4 addresses in HVM. */
-#define XEN_IA64_OPTF_IDENT_MAP_REG4	2
-
-/* Identity mapping of region 5 addresses in HVM. */
-#define XEN_IA64_OPTF_IDENT_MAP_REG5	3
-
-#define XEN_IA64_OPTF_IDENT_MAP_NOT_SET	 (0)
-
-struct xen_ia64_opt_feature {
-	unsigned long cmd;	/* Which feature */
-	unsigned char on;	/* Switch feature on/off */
-	union {
-		struct {
-			/* The page protection bit mask of the pte.
-			 * This will be or'ed with the pte. */
-			unsigned long pgprot;
-			unsigned long key;	/* A protection key for itir.*/
-		};
-	};
-};
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* _ASM_IA64_XEN_INTERFACE_H */
diff --git a/arch/ia64/include/asm/xen/irq.h b/arch/ia64/include/asm/xen/irq.h
deleted file mode 100644
index a90450983003..000000000000
--- a/arch/ia64/include/asm/xen/irq.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/******************************************************************************
- * arch/ia64/include/asm/xen/irq.h
- *
- * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
- *                    VA Linux Systems Japan K.K.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- */
-
-#ifndef _ASM_IA64_XEN_IRQ_H
-#define _ASM_IA64_XEN_IRQ_H
-
-/*
- * The flat IRQ space is divided into two regions:
- *  1. A one-to-one mapping of real physical IRQs. This space is only used
- *     if we have physical device-access privilege. This region is at the
- *     start of the IRQ space so that existing device drivers do not need
- *     to be modified to translate physical IRQ numbers into our IRQ space.
- *  3. A dynamic mapping of inter-domain and Xen-sourced virtual IRQs. These
- *     are bound using the provided bind/unbind functions.
- */
-
-#define XEN_PIRQ_BASE		0
-#define XEN_NR_PIRQS		256
-
-#define XEN_DYNIRQ_BASE		(XEN_PIRQ_BASE + XEN_NR_PIRQS)
-#define XEN_NR_DYNIRQS		(NR_CPUS * 8)
-
-#define XEN_NR_IRQS		(XEN_NR_PIRQS + XEN_NR_DYNIRQS)
-
-#endif /* _ASM_IA64_XEN_IRQ_H */
diff --git a/arch/ia64/include/asm/xen/minstate.h b/arch/ia64/include/asm/xen/minstate.h
deleted file mode 100644
index 00cf03e0cb82..000000000000
--- a/arch/ia64/include/asm/xen/minstate.h
+++ /dev/null
@@ -1,143 +0,0 @@
-
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-/* read ar.itc in advance, and use it before leaving bank 0 */
-#define XEN_ACCOUNT_GET_STAMP		\
-	MOV_FROM_ITC(pUStk, p6, r20, r2);
-#else
-#define XEN_ACCOUNT_GET_STAMP
-#endif
-
-/*
- * DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
- * the minimum state necessary that allows us to turn psr.ic back
- * on.
- *
- * Assumed state upon entry:
- *	psr.ic: off
- *	r31:	contains saved predicates (pr)
- *
- * Upon exit, the state is as follows:
- *	psr.ic: off
- *	 r2 = points to &pt_regs.r16
- *	 r8 = contents of ar.ccv
- *	 r9 = contents of ar.csd
- *	r10 = contents of ar.ssd
- *	r11 = FPSR_DEFAULT
- *	r12 = kernel sp (kernel virtual address)
- *	r13 = points to current task_struct (kernel virtual address)
- *	p15 = TRUE if psr.i is set in cr.ipsr
- *	predicate registers (other than p2, p3, and p15), b6, r3, r14, r15:
- *		preserved
- * CONFIG_XEN note: p6/p7 are not preserved
- *
- * Note that psr.ic is NOT turned on by this macro.  This is so that
- * we can pass interruption state as arguments to a handler.
- */
-#define XEN_DO_SAVE_MIN(__COVER,SAVE_IFS,EXTRA,WORKAROUND)					\
-	mov r16=IA64_KR(CURRENT);	/* M */							\
-	mov r27=ar.rsc;			/* M */							\
-	mov r20=r1;			/* A */							\
-	mov r25=ar.unat;		/* M */							\
-	MOV_FROM_IPSR(p0,r29);		/* M */							\
-	MOV_FROM_IIP(r28);		/* M */							\
-	mov r21=ar.fpsr;		/* M */							\
-	mov r26=ar.pfs;			/* I */							\
-	__COVER;			/* B;; (or nothing) */					\
-	adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16;						\
-	;;											\
-	ld1 r17=[r16];				/* load current->thread.on_ustack flag */	\
-	st1 [r16]=r0;				/* clear current->thread.on_ustack flag */	\
-	adds r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16						\
-	/* switch from user to kernel RBS: */							\
-	;;											\
-	invala;				/* M */							\
-	/* SAVE_IFS;*/ /* see xen special handling below */					\
-	cmp.eq pKStk,pUStk=r0,r17;		/* are we in kernel mode already? */		\
-	;;											\
-(pUStk)	mov ar.rsc=0;		/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */	\
-	;;											\
-(pUStk)	mov.m r24=ar.rnat;									\
-(pUStk)	addl r22=IA64_RBS_OFFSET,r1;			/* compute base of RBS */		\
-(pKStk) mov r1=sp;					/* get sp  */				\
-	;;											\
-(pUStk) lfetch.fault.excl.nt1 [r22];								\
-(pUStk)	addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1;	/* compute base of memory stack */	\
-(pUStk)	mov r23=ar.bspstore;				/* save ar.bspstore */			\
-	;;											\
-(pUStk)	mov ar.bspstore=r22;				/* switch to kernel RBS */		\
-(pKStk) addl r1=-IA64_PT_REGS_SIZE,r1;			/* if in kernel mode, use sp (r12) */	\
-	;;											\
-(pUStk)	mov r18=ar.bsp;										\
-(pUStk)	mov ar.rsc=0x3;		/* set eager mode, pl 0, little-endian, loadrs=0 */		\
-	adds r17=2*L1_CACHE_BYTES,r1;		/* really: biggest cache-line size */		\
-	adds r16=PT(CR_IPSR),r1;								\
-	;;											\
-	lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES;						\
-	st8 [r16]=r29;		/* save cr.ipsr */						\
-	;;											\
-	lfetch.fault.excl.nt1 [r17];								\
-	tbit.nz p15,p0=r29,IA64_PSR_I_BIT;							\
-	mov r29=b0										\
-	;;											\
-	WORKAROUND;										\
-	adds r16=PT(R8),r1;	/* initialize first base pointer */				\
-	adds r17=PT(R9),r1;	/* initialize second base pointer */				\
-(pKStk)	mov r18=r0;		/* make sure r18 isn't NaT */					\
-	;;											\
-.mem.offset 0,0; st8.spill [r16]=r8,16;								\
-.mem.offset 8,0; st8.spill [r17]=r9,16;								\
-        ;;											\
-.mem.offset 0,0; st8.spill [r16]=r10,24;							\
-	movl r8=XSI_PRECOVER_IFS;								\
-.mem.offset 8,0; st8.spill [r17]=r11,24;							\
-        ;;											\
-	/* xen special handling for possibly lazy cover */					\
-	/* SAVE_MIN case in dispatch_ia32_handler: mov r30=r0 */				\
-	ld8 r30=[r8];										\
-(pUStk)	sub r18=r18,r22;	/* r18=RSE.ndirty*8 */						\
-	st8 [r16]=r28,16;	/* save cr.iip */						\
-	;;											\
-	st8 [r17]=r30,16;	/* save cr.ifs */						\
-	mov r8=ar.ccv;										\
-	mov r9=ar.csd;										\
-	mov r10=ar.ssd;										\
-	movl r11=FPSR_DEFAULT;   /* L-unit */							\
-	;;											\
-	st8 [r16]=r25,16;	/* save ar.unat */						\
-	st8 [r17]=r26,16;	/* save ar.pfs */						\
-	shl r18=r18,16;		/* compute ar.rsc to be used for "loadrs" */			\
-	;;											\
-	st8 [r16]=r27,16;	/* save ar.rsc */						\
-(pUStk)	st8 [r17]=r24,16;	/* save ar.rnat */						\
-(pKStk)	adds r17=16,r17;	/* skip over ar_rnat field */					\
-	;;			/* avoid RAW on r16 & r17 */					\
-(pUStk)	st8 [r16]=r23,16;	/* save ar.bspstore */						\
-	st8 [r17]=r31,16;	/* save predicates */						\
-(pKStk)	adds r16=16,r16;	/* skip over ar_bspstore field */				\
-	;;											\
-	st8 [r16]=r29,16;	/* save b0 */							\
-	st8 [r17]=r18,16;	/* save ar.rsc value for "loadrs" */				\
-	cmp.eq pNonSys,pSys=r0,r0	/* initialize pSys=0, pNonSys=1 */			\
-	;;											\
-.mem.offset 0,0; st8.spill [r16]=r20,16;	/* save original r1 */				\
-.mem.offset 8,0; st8.spill [r17]=r12,16;							\
-	adds r12=-16,r1;	/* switch to kernel memory stack (with 16 bytes of scratch) */	\
-	;;											\
-.mem.offset 0,0; st8.spill [r16]=r13,16;							\
-.mem.offset 8,0; st8.spill [r17]=r21,16;	/* save ar.fpsr */				\
-	mov r13=IA64_KR(CURRENT);	/* establish `current' */				\
-	;;											\
-.mem.offset 0,0; st8.spill [r16]=r15,16;							\
-.mem.offset 8,0; st8.spill [r17]=r14,16;							\
-	;;											\
-.mem.offset 0,0; st8.spill [r16]=r2,16;								\
-.mem.offset 8,0; st8.spill [r17]=r3,16;								\
-	XEN_ACCOUNT_GET_STAMP									\
-	adds r2=IA64_PT_REGS_R16_OFFSET,r1;							\
-	;;											\
-	EXTRA;											\
-	movl r1=__gp;		/* establish kernel global pointer */				\
-	;;											\
-	ACCOUNT_SYS_ENTER									\
-	BSW_1(r3,r14);	/* switch back to bank 1 (must be last in insn group) */		\
-	;;
diff --git a/arch/ia64/include/asm/xen/page-coherent.h b/arch/ia64/include/asm/xen/page-coherent.h
deleted file mode 100644
index 96e42f97fa1f..000000000000
--- a/arch/ia64/include/asm/xen/page-coherent.h
+++ /dev/null
@@ -1,38 +0,0 @@
-#ifndef _ASM_IA64_XEN_PAGE_COHERENT_H
-#define _ASM_IA64_XEN_PAGE_COHERENT_H
-
-#include <asm/page.h>
-#include <linux/dma-attrs.h>
-#include <linux/dma-mapping.h>
-
-static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size,
-		dma_addr_t *dma_handle, gfp_t flags,
-		struct dma_attrs *attrs)
-{
-	void *vstart = (void*)__get_free_pages(flags, get_order(size));
-	*dma_handle = virt_to_phys(vstart);
-	return vstart;
-}
-
-static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
-		void *cpu_addr, dma_addr_t dma_handle,
-		struct dma_attrs *attrs)
-{
-	free_pages((unsigned long) cpu_addr, get_order(size));
-}
-
-static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
-	     unsigned long offset, size_t size, enum dma_data_direction dir,
-	     struct dma_attrs *attrs) { }
-
-static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
-		size_t size, enum dma_data_direction dir,
-		struct dma_attrs *attrs) { }
-
-static inline void xen_dma_sync_single_for_cpu(struct device *hwdev,
-		dma_addr_t handle, size_t size, enum dma_data_direction dir) { }
-
-static inline void xen_dma_sync_single_for_device(struct device *hwdev,
-		dma_addr_t handle, size_t size, enum dma_data_direction dir) { }
-
-#endif /* _ASM_IA64_XEN_PAGE_COHERENT_H */
diff --git a/arch/ia64/include/asm/xen/page.h b/arch/ia64/include/asm/xen/page.h
deleted file mode 100644
index 03441a780b5b..000000000000
--- a/arch/ia64/include/asm/xen/page.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/******************************************************************************
- * arch/ia64/include/asm/xen/page.h
- *
- * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
- *                    VA Linux Systems Japan K.K.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- */
-
-#ifndef _ASM_IA64_XEN_PAGE_H
-#define _ASM_IA64_XEN_PAGE_H
-
-#define INVALID_P2M_ENTRY	(~0UL)
-
-static inline unsigned long mfn_to_pfn(unsigned long mfn)
-{
-	return mfn;
-}
-
-static inline unsigned long pfn_to_mfn(unsigned long pfn)
-{
-	return pfn;
-}
-
-#define phys_to_machine_mapping_valid(_x)	(1)
-
-static inline void *mfn_to_virt(unsigned long mfn)
-{
-	return __va(mfn << PAGE_SHIFT);
-}
-
-static inline unsigned long virt_to_mfn(void *virt)
-{
-	return __pa(virt) >> PAGE_SHIFT;
-}
-
-/* for tpmfront.c */
-static inline unsigned long virt_to_machine(void *virt)
-{
-	return __pa(virt);
-}
-
-static inline void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
-{
-	/* nothing */
-}
-
-#define pte_mfn(_x)	pte_pfn(_x)
-#define mfn_pte(_x, _y)	__pte_ma(0)		/* unmodified use */
-#define __pte_ma(_x)	((pte_t) {(_x)})        /* unmodified use */
-
-#endif /* _ASM_IA64_XEN_PAGE_H */
diff --git a/arch/ia64/include/asm/xen/patchlist.h b/arch/ia64/include/asm/xen/patchlist.h
deleted file mode 100644
index eae944e88846..000000000000
--- a/arch/ia64/include/asm/xen/patchlist.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/******************************************************************************
- * arch/ia64/include/asm/xen/patchlist.h
- *
- * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
- *                    VA Linux Systems Japan K.K.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- */
-
-#define __paravirt_start_gate_fsyscall_patchlist		\
-	__xen_start_gate_fsyscall_patchlist
-#define __paravirt_end_gate_fsyscall_patchlist			\
-	__xen_end_gate_fsyscall_patchlist
-#define __paravirt_start_gate_brl_fsys_bubble_down_patchlist	\
-	__xen_start_gate_brl_fsys_bubble_down_patchlist
-#define __paravirt_end_gate_brl_fsys_bubble_down_patchlist	\
-	__xen_end_gate_brl_fsys_bubble_down_patchlist
-#define __paravirt_start_gate_vtop_patchlist			\
-	__xen_start_gate_vtop_patchlist
-#define __paravirt_end_gate_vtop_patchlist			\
-	__xen_end_gate_vtop_patchlist
-#define __paravirt_start_gate_mckinley_e9_patchlist		\
-	__xen_start_gate_mckinley_e9_patchlist
-#define __paravirt_end_gate_mckinley_e9_patchlist		\
-	__xen_end_gate_mckinley_e9_patchlist
diff --git a/arch/ia64/include/asm/xen/privop.h b/arch/ia64/include/asm/xen/privop.h
deleted file mode 100644
index fb4ec5e0b066..000000000000
--- a/arch/ia64/include/asm/xen/privop.h
+++ /dev/null
@@ -1,135 +0,0 @@
-#ifndef _ASM_IA64_XEN_PRIVOP_H
-#define _ASM_IA64_XEN_PRIVOP_H
-
-/*
- * Copyright (C) 2005 Hewlett-Packard Co
- *	Dan Magenheimer <dan.magenheimer@hp.com>
- *
- * Paravirtualizations of privileged operations for Xen/ia64
- *
- *
- * inline privop and paravirt_alt support
- * Copyright (c) 2007 Isaku Yamahata <yamahata at valinux co jp>
- *                    VA Linux Systems Japan K.K.
- *
- */
-
-#ifndef __ASSEMBLY__
-#include <linux/types.h>		/* arch-ia64.h requires uint64_t */
-#endif
-#include <asm/xen/interface.h>
-
-/* At 1 MB, before per-cpu space but still addressable using addl instead
-   of movl. */
-#define XSI_BASE			0xfffffffffff00000
-
-/* Address of mapped regs.  */
-#define XMAPPEDREGS_BASE		(XSI_BASE + XSI_SIZE)
-
-#ifdef __ASSEMBLY__
-#define XEN_HYPER_RFI			break HYPERPRIVOP_RFI
-#define XEN_HYPER_RSM_PSR_DT		break HYPERPRIVOP_RSM_DT
-#define XEN_HYPER_SSM_PSR_DT		break HYPERPRIVOP_SSM_DT
-#define XEN_HYPER_COVER			break HYPERPRIVOP_COVER
-#define XEN_HYPER_ITC_D			break HYPERPRIVOP_ITC_D
-#define XEN_HYPER_ITC_I			break HYPERPRIVOP_ITC_I
-#define XEN_HYPER_SSM_I			break HYPERPRIVOP_SSM_I
-#define XEN_HYPER_GET_IVR		break HYPERPRIVOP_GET_IVR
-#define XEN_HYPER_THASH			break HYPERPRIVOP_THASH
-#define XEN_HYPER_ITR_D			break HYPERPRIVOP_ITR_D
-#define XEN_HYPER_SET_KR		break HYPERPRIVOP_SET_KR
-#define XEN_HYPER_GET_PSR		break HYPERPRIVOP_GET_PSR
-#define XEN_HYPER_SET_RR0_TO_RR4	break HYPERPRIVOP_SET_RR0_TO_RR4
-
-#define XSI_IFS				(XSI_BASE + XSI_IFS_OFS)
-#define XSI_PRECOVER_IFS		(XSI_BASE + XSI_PRECOVER_IFS_OFS)
-#define XSI_IFA				(XSI_BASE + XSI_IFA_OFS)
-#define XSI_ISR				(XSI_BASE + XSI_ISR_OFS)
-#define XSI_IIM				(XSI_BASE + XSI_IIM_OFS)
-#define XSI_ITIR			(XSI_BASE + XSI_ITIR_OFS)
-#define XSI_PSR_I_ADDR			(XSI_BASE + XSI_PSR_I_ADDR_OFS)
-#define XSI_PSR_IC			(XSI_BASE + XSI_PSR_IC_OFS)
-#define XSI_IPSR			(XSI_BASE + XSI_IPSR_OFS)
-#define XSI_IIP				(XSI_BASE + XSI_IIP_OFS)
-#define XSI_B1NAT			(XSI_BASE + XSI_B1NATS_OFS)
-#define XSI_BANK1_R16			(XSI_BASE + XSI_BANK1_R16_OFS)
-#define XSI_BANKNUM			(XSI_BASE + XSI_BANKNUM_OFS)
-#define XSI_IHA				(XSI_BASE + XSI_IHA_OFS)
-#define XSI_ITC_OFFSET			(XSI_BASE + XSI_ITC_OFFSET_OFS)
-#define XSI_ITC_LAST			(XSI_BASE + XSI_ITC_LAST_OFS)
-#endif
-
-#ifndef __ASSEMBLY__
-
-/************************************************/
-/* Instructions paravirtualized for correctness */
-/************************************************/
-
-/* "fc" and "thash" are privilege-sensitive instructions, meaning they
- *  may have different semantics depending on whether they are executed
- *  at PL0 vs PL!=0.  When paravirtualized, these instructions mustn't
- *  be allowed to execute directly, lest incorrect semantics result. */
-extern void xen_fc(void *addr);
-extern unsigned long xen_thash(unsigned long addr);
-
-/* Note that "ttag" and "cover" are also privilege-sensitive; "ttag"
- * is not currently used (though it may be in a long-format VHPT system!)
- * and the semantics of cover only change if psr.ic is off which is very
- * rare (and currently non-existent outside of assembly code */
-
-/* There are also privilege-sensitive registers.  These registers are
- * readable at any privilege level but only writable at PL0. */
-extern unsigned long xen_get_cpuid(int index);
-extern unsigned long xen_get_pmd(int index);
-
-#ifndef ASM_SUPPORTED
-extern unsigned long xen_get_eflag(void);	/* see xen_ia64_getreg */
-extern void xen_set_eflag(unsigned long);	/* see xen_ia64_setreg */
-#endif
-
-/************************************************/
-/* Instructions paravirtualized for performance */
-/************************************************/
-
-/* Xen uses memory-mapped virtual privileged registers for access to many
- * performance-sensitive privileged registers.  Some, like the processor
- * status register (psr), are broken up into multiple memory locations.
- * Others, like "pend", are abstractions based on privileged registers.
- * "Pend" is guaranteed to be set if reading cr.ivr would return a
- * (non-spurious) interrupt. */
-#define XEN_MAPPEDREGS ((struct mapped_regs *)XMAPPEDREGS_BASE)
-
-#define XSI_PSR_I			\
-	(*XEN_MAPPEDREGS->interrupt_mask_addr)
-#define xen_get_virtual_psr_i()		\
-	(!XSI_PSR_I)
-#define xen_set_virtual_psr_i(_val)	\
-	({ XSI_PSR_I = (uint8_t)(_val) ? 0 : 1; })
-#define xen_set_virtual_psr_ic(_val)	\
-	({ XEN_MAPPEDREGS->interrupt_collection_enabled = _val ? 1 : 0; })
-#define xen_get_virtual_pend()		\
-	(*(((uint8_t *)XEN_MAPPEDREGS->interrupt_mask_addr) - 1))
-
-#ifndef ASM_SUPPORTED
-/* Although all privileged operations can be left to trap and will
- * be properly handled by Xen, some are frequent enough that we use
- * hyperprivops for performance. */
-extern unsigned long xen_get_psr(void);
-extern unsigned long xen_get_ivr(void);
-extern unsigned long xen_get_tpr(void);
-extern void xen_hyper_ssm_i(void);
-extern void xen_set_itm(unsigned long);
-extern void xen_set_tpr(unsigned long);
-extern void xen_eoi(unsigned long);
-extern unsigned long xen_get_rr(unsigned long index);
-extern void xen_set_rr(unsigned long index, unsigned long val);
-extern void xen_set_rr0_to_rr4(unsigned long val0, unsigned long val1,
-			       unsigned long val2, unsigned long val3,
-			       unsigned long val4);
-extern void xen_set_kr(unsigned long index, unsigned long val);
-extern void xen_ptcga(unsigned long addr, unsigned long size);
-#endif /* !ASM_SUPPORTED */
-
-#endif /* !__ASSEMBLY__ */
-
-#endif /* _ASM_IA64_XEN_PRIVOP_H */
diff --git a/arch/ia64/include/asm/xen/xcom_hcall.h b/arch/ia64/include/asm/xen/xcom_hcall.h
deleted file mode 100644
index 20b2950c71b6..000000000000
--- a/arch/ia64/include/asm/xen/xcom_hcall.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (C) 2006 Tristan Gingold <tristan.gingold@bull.net>, Bull SAS
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- */
-
-#ifndef _ASM_IA64_XEN_XCOM_HCALL_H
-#define _ASM_IA64_XEN_XCOM_HCALL_H
-
-/* These function creates inline or mini descriptor for the parameters and
-   calls the corresponding xencomm_arch_hypercall_X.
-   Architectures should defines HYPERVISOR_xxx as xencomm_hypercall_xxx unless
-   they want to use their own wrapper.  */
-extern int xencomm_hypercall_console_io(int cmd, int count, char *str);
-
-extern int xencomm_hypercall_event_channel_op(int cmd, void *op);
-
-extern int xencomm_hypercall_xen_version(int cmd, void *arg);
-
-extern int xencomm_hypercall_physdev_op(int cmd, void *op);
-
-extern int xencomm_hypercall_grant_table_op(unsigned int cmd, void *op,
-					    unsigned int count);
-
-extern int xencomm_hypercall_sched_op(int cmd, void *arg);
-
-extern int xencomm_hypercall_multicall(void *call_list, int nr_calls);
-
-extern int xencomm_hypercall_callback_op(int cmd, void *arg);
-
-extern int xencomm_hypercall_memory_op(unsigned int cmd, void *arg);
-
-extern int xencomm_hypercall_suspend(unsigned long srec);
-
-extern long xencomm_hypercall_vcpu_op(int cmd, int cpu, void *arg);
-
-extern long xencomm_hypercall_opt_feature(void *arg);
-
-#endif /* _ASM_IA64_XEN_XCOM_HCALL_H */
diff --git a/arch/ia64/include/asm/xen/xencomm.h b/arch/ia64/include/asm/xen/xencomm.h
deleted file mode 100644
index cded677bebf2..000000000000
--- a/arch/ia64/include/asm/xen/xencomm.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (C) 2006 Hollis Blanchard <hollisb@us.ibm.com>, IBM Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- */
-
-#ifndef _ASM_IA64_XEN_XENCOMM_H
-#define _ASM_IA64_XEN_XENCOMM_H
-
-#include <xen/xencomm.h>
-#include <asm/pgtable.h>
-
-/* Must be called before any hypercall.  */
-extern void xencomm_initialize(void);
-extern int xencomm_is_initialized(void);
-
-/* Check if virtual contiguity means physical contiguity
- * where the passed address is a pointer value in virtual address.
- * On ia64, identity mapping area in region 7 or the piece of region 5
- * that is mapped by itr[IA64_TR_KERNEL]/dtr[IA64_TR_KERNEL]
- */
-static inline int xencomm_is_phys_contiguous(unsigned long addr)
-{
-	return (PAGE_OFFSET <= addr &&
-		addr < (PAGE_OFFSET + (1UL << IA64_MAX_PHYS_BITS))) ||
-		(KERNEL_START <= addr &&
-		 addr < KERNEL_START + KERNEL_TR_PAGE_SIZE);
-}
-
-#endif /* _ASM_IA64_XEN_XENCOMM_H */
diff --git a/arch/ia64/include/uapi/asm/break.h b/arch/ia64/include/uapi/asm/break.h
index e90c40ec9edf..f03402039896 100644
--- a/arch/ia64/include/uapi/asm/break.h
+++ b/arch/ia64/include/uapi/asm/break.h
@@ -20,13 +20,4 @@
  */
 #define __IA64_BREAK_SYSCALL		0x100000
 
-/*
- * Xen specific break numbers:
- */
-#define __IA64_XEN_HYPERCALL		0x1000
-/* [__IA64_XEN_HYPERPRIVOP_START, __IA64_XEN_HYPERPRIVOP_MAX] is used
-   for xen hyperprivops */
-#define __IA64_XEN_HYPERPRIVOP_START	0x1
-#define __IA64_XEN_HYPERPRIVOP_MAX	0x1a
-
 #endif /* _ASM_IA64_BREAK_H */
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 59d52e3aef12..bfa19311e09c 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -53,7 +53,6 @@
 #include <asm/numa.h>
 #include <asm/sal.h>
 #include <asm/cyclone.h>
-#include <asm/xen/hypervisor.h>
 
 #define BAD_MADT_ENTRY(entry, end) (                                        \
 		(!entry) || (unsigned long)entry + sizeof(*entry) > end ||  \
@@ -120,8 +119,6 @@ acpi_get_sysname(void)
 			return "uv";
 		else
 			return "sn2";
-	} else if (xen_pv_domain() && !strcmp(hdr->oem_id, "XEN")) {
-		return "xen";
 	}
 
 #ifdef CONFIG_INTEL_IOMMU
diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c
index 46c9e3007315..60ef83e6db71 100644
--- a/arch/ia64/kernel/asm-offsets.c
+++ b/arch/ia64/kernel/asm-offsets.c
@@ -16,9 +16,6 @@
 #include <asm/sigcontext.h>
 #include <asm/mca.h>
 
-#include <asm/xen/interface.h>
-#include <asm/xen/hypervisor.h>
-
 #include "../kernel/sigframe.h"
 #include "../kernel/fsyscall_gtod_data.h"
 
@@ -290,33 +287,4 @@ void foo(void)
 	DEFINE(IA64_ITC_LASTCYCLE_OFFSET,
 		offsetof (struct itc_jitter_data_t, itc_lastcycle));
 
-#ifdef CONFIG_XEN
-	BLANK();
-
-	DEFINE(XEN_NATIVE_ASM, XEN_NATIVE);
-	DEFINE(XEN_PV_DOMAIN_ASM, XEN_PV_DOMAIN);
-
-#define DEFINE_MAPPED_REG_OFS(sym, field) \
-	DEFINE(sym, (XMAPPEDREGS_OFS + offsetof(struct mapped_regs, field)))
-
-	DEFINE_MAPPED_REG_OFS(XSI_PSR_I_ADDR_OFS, interrupt_mask_addr);
-	DEFINE_MAPPED_REG_OFS(XSI_IPSR_OFS, ipsr);
-	DEFINE_MAPPED_REG_OFS(XSI_IIP_OFS, iip);
-	DEFINE_MAPPED_REG_OFS(XSI_IFS_OFS, ifs);
-	DEFINE_MAPPED_REG_OFS(XSI_PRECOVER_IFS_OFS, precover_ifs);
-	DEFINE_MAPPED_REG_OFS(XSI_ISR_OFS, isr);
-	DEFINE_MAPPED_REG_OFS(XSI_IFA_OFS, ifa);
-	DEFINE_MAPPED_REG_OFS(XSI_IIPA_OFS, iipa);
-	DEFINE_MAPPED_REG_OFS(XSI_IIM_OFS, iim);
-	DEFINE_MAPPED_REG_OFS(XSI_IHA_OFS, iha);
-	DEFINE_MAPPED_REG_OFS(XSI_ITIR_OFS, itir);
-	DEFINE_MAPPED_REG_OFS(XSI_PSR_IC_OFS, interrupt_collection_enabled);
-	DEFINE_MAPPED_REG_OFS(XSI_BANKNUM_OFS, banknum);
-	DEFINE_MAPPED_REG_OFS(XSI_BANK0_R16_OFS, bank0_regs[0]);
-	DEFINE_MAPPED_REG_OFS(XSI_BANK1_R16_OFS, bank1_regs[0]);
-	DEFINE_MAPPED_REG_OFS(XSI_B0NATS_OFS, vbnat);
-	DEFINE_MAPPED_REG_OFS(XSI_B1NATS_OFS, vnat);
-	DEFINE_MAPPED_REG_OFS(XSI_ITC_OFFSET_OFS, itc_offset);
-	DEFINE_MAPPED_REG_OFS(XSI_ITC_LAST_OFS, itc_last);
-#endif /* CONFIG_XEN */
 }
diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S
index 991ca336b8a2..e6f80fcf013b 100644
--- a/arch/ia64/kernel/head.S
+++ b/arch/ia64/kernel/head.S
@@ -416,8 +416,6 @@ start_ap:
 
 default_setup_hook = 0		// Currently nothing needs to be done.
 
-	.weak xen_setup_hook
-
 	.global hypervisor_type
 hypervisor_type:
 	data8		PARAVIRT_HYPERVISOR_TYPE_DEFAULT
@@ -426,7 +424,6 @@ hypervisor_type:
 
 hypervisor_setup_hooks:
 	data8		default_setup_hook
-	data8		xen_setup_hook
 num_hypervisor_hooks = (. - hypervisor_setup_hooks) / 8
 	.previous
 
diff --git a/arch/ia64/kernel/nr-irqs.c b/arch/ia64/kernel/nr-irqs.c
index ee564575148e..f6769cd54bd9 100644
--- a/arch/ia64/kernel/nr-irqs.c
+++ b/arch/ia64/kernel/nr-irqs.c
@@ -10,15 +10,11 @@
 #include <linux/kbuild.h>
 #include <linux/threads.h>
 #include <asm/native/irq.h>
-#include <asm/xen/irq.h>
 
 void foo(void)
 {
 	union paravirt_nr_irqs_max {
 		char ia64_native_nr_irqs[IA64_NATIVE_NR_IRQS];
-#ifdef CONFIG_XEN
-		char xen_nr_irqs[XEN_NR_IRQS];
-#endif
 	};
 
 	DEFINE(NR_IRQS, sizeof (union paravirt_nr_irqs_max));
diff --git a/arch/ia64/kernel/paravirt_inst.h b/arch/ia64/kernel/paravirt_inst.h
index 64d6d810c64b..1ad7512b5f65 100644
--- a/arch/ia64/kernel/paravirt_inst.h
+++ b/arch/ia64/kernel/paravirt_inst.h
@@ -22,9 +22,6 @@
 
 #ifdef __IA64_ASM_PARAVIRTUALIZED_PVCHECK
 #include <asm/native/pvchk_inst.h>
-#elif defined(__IA64_ASM_PARAVIRTUALIZED_XEN)
-#include <asm/xen/inst.h>
-#include <asm/xen/minstate.h>
 #else
 #include <asm/native/inst.h>
 #endif
diff --git a/arch/ia64/kernel/paravirt_patchlist.h b/arch/ia64/kernel/paravirt_patchlist.h
index 0684aa6c6507..67cffc3643a3 100644
--- a/arch/ia64/kernel/paravirt_patchlist.h
+++ b/arch/ia64/kernel/paravirt_patchlist.h
@@ -20,9 +20,5 @@
  *
  */
 
-#if defined(__IA64_GATE_PARAVIRTUALIZED_XEN)
-#include <asm/xen/patchlist.h>
-#else
 #include <asm/native/patchlist.h>
-#endif
 
diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
index 0ccb28fab27e..84f8a52ac5ae 100644
--- a/arch/ia64/kernel/vmlinux.lds.S
+++ b/arch/ia64/kernel/vmlinux.lds.S
@@ -182,12 +182,6 @@ SECTIONS {
 		__start_gate_section = .;
 		*(.data..gate)
 		__stop_gate_section = .;
-#ifdef CONFIG_XEN
-		. = ALIGN(PAGE_SIZE);
-		__xen_start_gate_section = .;
-		*(.data..gate.xen)
-		__xen_stop_gate_section = .;
-#endif
 	}
 	/*
 	 * make sure the gate page doesn't expose
diff --git a/arch/ia64/xen/Kconfig b/arch/ia64/xen/Kconfig
deleted file mode 100644
index 5d8a06b0ddf7..000000000000
--- a/arch/ia64/xen/Kconfig
+++ /dev/null
@@ -1,25 +0,0 @@
-#
-# This Kconfig describes xen/ia64 options
-#
-
-config XEN
-	bool "Xen hypervisor support"
-	default y
-	depends on PARAVIRT && MCKINLEY && IA64_PAGE_SIZE_16KB
-	select XEN_XENCOMM
-	select NO_IDLE_HZ
-	# followings are required to save/restore.
-	select ARCH_SUSPEND_POSSIBLE
-	select SUSPEND
-	select PM_SLEEP
-	help
-	  Enable Xen hypervisor support.  Resulting kernel runs
-	  both as a guest OS on Xen and natively on hardware.
-
-config XEN_XENCOMM
-	depends on XEN
-	bool
-
-config NO_IDLE_HZ
-	depends on XEN
-	bool
diff --git a/arch/ia64/xen/Makefile b/arch/ia64/xen/Makefile
deleted file mode 100644
index e6f4a0a74228..000000000000
--- a/arch/ia64/xen/Makefile
+++ /dev/null
@@ -1,37 +0,0 @@
-#
-# Makefile for Xen components
-#
-
-obj-y := hypercall.o xenivt.o xensetup.o xen_pv_ops.o irq_xen.o \
-	 hypervisor.o xencomm.o xcom_hcall.o grant-table.o time.o suspend.o \
-	 gate-data.o
-
-obj-$(CONFIG_IA64_GENERIC) += machvec.o
-
-# The gate DSO image is built using a special linker script.
-include $(srctree)/arch/ia64/kernel/Makefile.gate
-
-# tell compiled for xen
-CPPFLAGS_gate.lds += -D__IA64_GATE_PARAVIRTUALIZED_XEN
-AFLAGS_gate.o += -D__IA64_ASM_PARAVIRTUALIZED_XEN -D__IA64_GATE_PARAVIRTUALIZED_XEN
-
-# use same file of native.
-$(obj)/gate.o: $(src)/../kernel/gate.S FORCE
-	$(call if_changed_dep,as_o_S)
-$(obj)/gate.lds: $(src)/../kernel/gate.lds.S FORCE
-	$(call if_changed_dep,cpp_lds_S)
-
-
-AFLAGS_xenivt.o += -D__IA64_ASM_PARAVIRTUALIZED_XEN
-
-# xen multi compile
-ASM_PARAVIRT_MULTI_COMPILE_SRCS = ivt.S entry.S fsys.S
-ASM_PARAVIRT_OBJS = $(addprefix xen-,$(ASM_PARAVIRT_MULTI_COMPILE_SRCS:.S=.o))
-obj-y += $(ASM_PARAVIRT_OBJS)
-define paravirtualized_xen
-AFLAGS_$(1) += -D__IA64_ASM_PARAVIRTUALIZED_XEN
-endef
-$(foreach o,$(ASM_PARAVIRT_OBJS),$(eval $(call paravirtualized_xen,$(o))))
-
-$(obj)/xen-%.o: $(src)/../kernel/%.S FORCE
-	$(call if_changed_dep,as_o_S)
diff --git a/arch/ia64/xen/gate-data.S b/arch/ia64/xen/gate-data.S
deleted file mode 100644
index 6f95b6b32a4e..000000000000
--- a/arch/ia64/xen/gate-data.S
+++ /dev/null
@@ -1,3 +0,0 @@
-	.section .data..gate.xen, "aw"
-
-	.incbin "arch/ia64/xen/gate.so"
diff --git a/arch/ia64/xen/grant-table.c b/arch/ia64/xen/grant-table.c
deleted file mode 100644
index c18281332f84..000000000000
--- a/arch/ia64/xen/grant-table.c
+++ /dev/null
@@ -1,94 +0,0 @@
-/******************************************************************************
- * arch/ia64/xen/grant-table.c
- *
- * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
- *                    VA Linux Systems Japan K.K.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- */
-
-#include <linux/module.h>
-#include <linux/vmalloc.h>
-#include <linux/slab.h>
-#include <linux/mm.h>
-
-#include <xen/interface/xen.h>
-#include <xen/interface/memory.h>
-#include <xen/grant_table.h>
-
-#include <asm/xen/hypervisor.h>
-
-/****************************************************************************
- * grant table hack
- * cmd: GNTTABOP_xxx
- */
-
-int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
-			   unsigned long max_nr_gframes,
-			   struct grant_entry **__shared)
-{
-	*__shared = __va(frames[0] << PAGE_SHIFT);
-	return 0;
-}
-
-void arch_gnttab_unmap_shared(struct grant_entry *shared,
-			      unsigned long nr_gframes)
-{
-	/* nothing */
-}
-
-static void
-gnttab_map_grant_ref_pre(struct gnttab_map_grant_ref *uop)
-{
-	uint32_t flags;
-
-	flags = uop->flags;
-
-	if (flags & GNTMAP_host_map) {
-		if (flags & GNTMAP_application_map) {
-			printk(KERN_DEBUG
-			       "GNTMAP_application_map is not supported yet: "
-			       "flags 0x%x\n", flags);
-			BUG();
-		}
-		if (flags & GNTMAP_contains_pte) {
-			printk(KERN_DEBUG
-			       "GNTMAP_contains_pte is not supported yet: "
-			       "flags 0x%x\n", flags);
-			BUG();
-		}
-	} else if (flags & GNTMAP_device_map) {
-		printk("GNTMAP_device_map is not supported yet 0x%x\n", flags);
-		BUG();	/* not yet. actually this flag is not used. */
-	} else {
-		BUG();
-	}
-}
-
-int
-HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count)
-{
-	if (cmd == GNTTABOP_map_grant_ref) {
-		unsigned int i;
-		for (i = 0; i < count; i++) {
-			gnttab_map_grant_ref_pre(
-				(struct gnttab_map_grant_ref *)uop + i);
-		}
-	}
-	return xencomm_hypercall_grant_table_op(cmd, uop, count);
-}
-
-EXPORT_SYMBOL(HYPERVISOR_grant_table_op);
diff --git a/arch/ia64/xen/hypercall.S b/arch/ia64/xen/hypercall.S
deleted file mode 100644
index 08847aa12583..000000000000
--- a/arch/ia64/xen/hypercall.S
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Support routines for Xen hypercalls
- *
- * Copyright (C) 2005 Dan Magenheimer <dan.magenheimer@hp.com>
- * Copyright (C) 2008 Yaozu (Eddie) Dong <eddie.dong@intel.com>
- */
-
-#include <asm/asmmacro.h>
-#include <asm/intrinsics.h>
-#include <asm/xen/privop.h>
-
-#ifdef __INTEL_COMPILER
-/*
- * Hypercalls without parameter.
- */
-#define __HCALL0(name,hcall)		\
-	GLOBAL_ENTRY(name);		\
-	break	hcall;			\
-	br.ret.sptk.many rp;		\
-	END(name)
-
-/*
- * Hypercalls with 1 parameter.
- */
-#define __HCALL1(name,hcall)		\
-	GLOBAL_ENTRY(name);		\
-	mov r8=r32;			\
-	break	hcall;			\
-	br.ret.sptk.many rp;		\
-	END(name)
-
-/*
- * Hypercalls with 2 parameters.
- */
-#define __HCALL2(name,hcall)		\
-	GLOBAL_ENTRY(name);		\
-	mov r8=r32;			\
-	mov r9=r33;			\
-	break	hcall;			\
-	br.ret.sptk.many rp;		\
-	END(name)
-
-__HCALL0(xen_get_psr, HYPERPRIVOP_GET_PSR)
-__HCALL0(xen_get_ivr, HYPERPRIVOP_GET_IVR)
-__HCALL0(xen_get_tpr, HYPERPRIVOP_GET_TPR)
-__HCALL0(xen_hyper_ssm_i, HYPERPRIVOP_SSM_I)
-
-__HCALL1(xen_set_tpr, HYPERPRIVOP_SET_TPR)
-__HCALL1(xen_eoi, HYPERPRIVOP_EOI)
-__HCALL1(xen_thash, HYPERPRIVOP_THASH)
-__HCALL1(xen_set_itm, HYPERPRIVOP_SET_ITM)
-__HCALL1(xen_get_rr, HYPERPRIVOP_GET_RR)
-__HCALL1(xen_fc, HYPERPRIVOP_FC)
-__HCALL1(xen_get_cpuid, HYPERPRIVOP_GET_CPUID)
-__HCALL1(xen_get_pmd, HYPERPRIVOP_GET_PMD)
-
-__HCALL2(xen_ptcga, HYPERPRIVOP_PTC_GA)
-__HCALL2(xen_set_rr, HYPERPRIVOP_SET_RR)
-__HCALL2(xen_set_kr, HYPERPRIVOP_SET_KR)
-
-GLOBAL_ENTRY(xen_set_rr0_to_rr4)
-	mov r8=r32
-	mov r9=r33
-	mov r10=r34
-	mov r11=r35
-	mov r14=r36
-	XEN_HYPER_SET_RR0_TO_RR4
-	br.ret.sptk.many rp
-	;;
-END(xen_set_rr0_to_rr4)
-#endif
-
-GLOBAL_ENTRY(xen_send_ipi)
-	mov r14=r32
-	mov r15=r33
-	mov r2=0x400
-	break 0x1000
-	;;
-	br.ret.sptk.many rp
-	;;
-END(xen_send_ipi)
-
-GLOBAL_ENTRY(__hypercall)
-	mov r2=r37
-	break 0x1000
-	br.ret.sptk.many b0
-	;;
-END(__hypercall)
diff --git a/arch/ia64/xen/hypervisor.c b/arch/ia64/xen/hypervisor.c
deleted file mode 100644
index fab62528a80b..000000000000
--- a/arch/ia64/xen/hypervisor.c
+++ /dev/null
@@ -1,97 +0,0 @@
-/******************************************************************************
- * arch/ia64/xen/hypervisor.c
- *
- * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
- *                    VA Linux Systems Japan K.K.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- */
-
-#include <linux/efi.h>
-#include <linux/export.h>
-#include <asm/xen/hypervisor.h>
-#include <asm/xen/privop.h>
-
-#include "irq_xen.h"
-
-struct shared_info *HYPERVISOR_shared_info __read_mostly =
-	(struct shared_info *)XSI_BASE;
-EXPORT_SYMBOL(HYPERVISOR_shared_info);
-
-DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
-
-struct start_info *xen_start_info;
-EXPORT_SYMBOL(xen_start_info);
-
-EXPORT_SYMBOL(xen_domain_type);
-
-EXPORT_SYMBOL(__hypercall);
-
-/* Stolen from arch/x86/xen/enlighten.c */
-/*
- * Flag to determine whether vcpu info placement is available on all
- * VCPUs.  We assume it is to start with, and then set it to zero on
- * the first failure.  This is because it can succeed on some VCPUs
- * and not others, since it can involve hypervisor memory allocation,
- * or because the guest failed to guarantee all the appropriate
- * constraints on all VCPUs (ie buffer can't cross a page boundary).
- *
- * Note that any particular CPU may be using a placed vcpu structure,
- * but we can only optimise if the all are.
- *
- * 0: not available, 1: available
- */
-
-static void __init xen_vcpu_setup(int cpu)
-{
-	/*
-	 * WARNING:
-	 * before changing MAX_VIRT_CPUS,
-	 * check that shared_info fits on a page
-	 */
-	BUILD_BUG_ON(sizeof(struct shared_info) > PAGE_SIZE);
-	per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
-}
-
-void __init xen_setup_vcpu_info_placement(void)
-{
-	int cpu;
-
-	for_each_possible_cpu(cpu)
-		xen_vcpu_setup(cpu);
-}
-
-void
-xen_cpu_init(void)
-{
-	xen_smp_intr_init();
-}
-
-/**************************************************************************
- * opt feature
- */
-void
-xen_ia64_enable_opt_feature(void)
-{
-	/* Enable region 7 identity map optimizations in Xen */
-	struct xen_ia64_opt_feature optf;
-
-	optf.cmd = XEN_IA64_OPTF_IDENT_MAP_REG7;
-	optf.on = XEN_IA64_OPTF_ON;
-	optf.pgprot = pgprot_val(PAGE_KERNEL);
-	optf.key = 0;	/* No key on linux. */
-	HYPERVISOR_opt_feature(&optf);
-}
diff --git a/arch/ia64/xen/irq_xen.c b/arch/ia64/xen/irq_xen.c
deleted file mode 100644
index efb74dafec4d..000000000000
--- a/arch/ia64/xen/irq_xen.c
+++ /dev/null
@@ -1,443 +0,0 @@
-/******************************************************************************
- * arch/ia64/xen/irq_xen.c
- *
- * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
- *                    VA Linux Systems Japan K.K.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- */
-
-#include <linux/cpu.h>
-
-#include <xen/interface/xen.h>
-#include <xen/interface/callback.h>
-#include <xen/events.h>
-
-#include <asm/xen/privop.h>
-
-#include "irq_xen.h"
-
-/***************************************************************************
- * pv_irq_ops
- * irq operations
- */
-
-static int
-xen_assign_irq_vector(int irq)
-{
-	struct physdev_irq irq_op;
-
-	irq_op.irq = irq;
-	if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op))
-		return -ENOSPC;
-
-	return irq_op.vector;
-}
-
-static void
-xen_free_irq_vector(int vector)
-{
-	struct physdev_irq irq_op;
-
-	if (vector < IA64_FIRST_DEVICE_VECTOR ||
-	    vector > IA64_LAST_DEVICE_VECTOR)
-		return;
-
-	irq_op.vector = vector;
-	if (HYPERVISOR_physdev_op(PHYSDEVOP_free_irq_vector, &irq_op))
-		printk(KERN_WARNING "%s: xen_free_irq_vector fail vector=%d\n",
-		       __func__, vector);
-}
-
-
-static DEFINE_PER_CPU(int, xen_timer_irq) = -1;
-static DEFINE_PER_CPU(int, xen_ipi_irq) = -1;
-static DEFINE_PER_CPU(int, xen_resched_irq) = -1;
-static DEFINE_PER_CPU(int, xen_cmc_irq) = -1;
-static DEFINE_PER_CPU(int, xen_cmcp_irq) = -1;
-static DEFINE_PER_CPU(int, xen_cpep_irq) = -1;
-#define NAME_SIZE	15
-static DEFINE_PER_CPU(char[NAME_SIZE], xen_timer_name);
-static DEFINE_PER_CPU(char[NAME_SIZE], xen_ipi_name);
-static DEFINE_PER_CPU(char[NAME_SIZE], xen_resched_name);
-static DEFINE_PER_CPU(char[NAME_SIZE], xen_cmc_name);
-static DEFINE_PER_CPU(char[NAME_SIZE], xen_cmcp_name);
-static DEFINE_PER_CPU(char[NAME_SIZE], xen_cpep_name);
-#undef NAME_SIZE
-
-struct saved_irq {
-	unsigned int irq;
-	struct irqaction *action;
-};
-/* 16 should be far optimistic value, since only several percpu irqs
- * are registered early.
- */
-#define MAX_LATE_IRQ	16
-static struct saved_irq saved_percpu_irqs[MAX_LATE_IRQ];
-static unsigned short late_irq_cnt;
-static unsigned short saved_irq_cnt;
-static int xen_slab_ready;
-
-#ifdef CONFIG_SMP
-#include <linux/sched.h>
-
-/* Dummy stub. Though we may check XEN_RESCHEDULE_VECTOR before __do_IRQ,
- * it ends up to issue several memory accesses upon percpu data and
- * thus adds unnecessary traffic to other paths.
- */
-static irqreturn_t
-xen_dummy_handler(int irq, void *dev_id)
-{
-	return IRQ_HANDLED;
-}
-
-static irqreturn_t
-xen_resched_handler(int irq, void *dev_id)
-{
-	scheduler_ipi();
-	return IRQ_HANDLED;
-}
-
-static struct irqaction xen_ipi_irqaction = {
-	.handler =	handle_IPI,
-	.flags =	IRQF_DISABLED,
-	.name =		"IPI"
-};
-
-static struct irqaction xen_resched_irqaction = {
-	.handler =	xen_resched_handler,
-	.flags =	IRQF_DISABLED,
-	.name =		"resched"
-};
-
-static struct irqaction xen_tlb_irqaction = {
-	.handler =	xen_dummy_handler,
-	.flags =	IRQF_DISABLED,
-	.name =		"tlb_flush"
-};
-#endif
-
-/*
- * This is xen version percpu irq registration, which needs bind
- * to xen specific evtchn sub-system. One trick here is that xen
- * evtchn binding interface depends on kmalloc because related
- * port needs to be freed at device/cpu down. So we cache the
- * registration on BSP before slab is ready and then deal them
- * at later point. For rest instances happening after slab ready,
- * we hook them to xen evtchn immediately.
- *
- * FIXME: MCA is not supported by far, and thus "nomca" boot param is
- * required.
- */
-static void
-__xen_register_percpu_irq(unsigned int cpu, unsigned int vec,
-			struct irqaction *action, int save)
-{
-	int irq = 0;
-
-	if (xen_slab_ready) {
-		switch (vec) {
-		case IA64_TIMER_VECTOR:
-			snprintf(per_cpu(xen_timer_name, cpu),
-				 sizeof(per_cpu(xen_timer_name, cpu)),
-				 "%s%d", action->name, cpu);
-			irq = bind_virq_to_irqhandler(VIRQ_ITC, cpu,
-				action->handler, action->flags,
-				per_cpu(xen_timer_name, cpu), action->dev_id);
-			per_cpu(xen_timer_irq, cpu) = irq;
-			break;
-		case IA64_IPI_RESCHEDULE:
-			snprintf(per_cpu(xen_resched_name, cpu),
-				 sizeof(per_cpu(xen_resched_name, cpu)),
-				 "%s%d", action->name, cpu);
-			irq = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR, cpu,
-				action->handler, action->flags,
-				per_cpu(xen_resched_name, cpu), action->dev_id);
-			per_cpu(xen_resched_irq, cpu) = irq;
-			break;
-		case IA64_IPI_VECTOR:
-			snprintf(per_cpu(xen_ipi_name, cpu),
-				 sizeof(per_cpu(xen_ipi_name, cpu)),
-				 "%s%d", action->name, cpu);
-			irq = bind_ipi_to_irqhandler(XEN_IPI_VECTOR, cpu,
-				action->handler, action->flags,
-				per_cpu(xen_ipi_name, cpu), action->dev_id);
-			per_cpu(xen_ipi_irq, cpu) = irq;
-			break;
-		case IA64_CMC_VECTOR:
-			snprintf(per_cpu(xen_cmc_name, cpu),
-				 sizeof(per_cpu(xen_cmc_name, cpu)),
-				 "%s%d", action->name, cpu);
-			irq = bind_virq_to_irqhandler(VIRQ_MCA_CMC, cpu,
-						action->handler,
-						action->flags,
-						per_cpu(xen_cmc_name, cpu),
-						action->dev_id);
-			per_cpu(xen_cmc_irq, cpu) = irq;
-			break;
-		case IA64_CMCP_VECTOR:
-			snprintf(per_cpu(xen_cmcp_name, cpu),
-				 sizeof(per_cpu(xen_cmcp_name, cpu)),
-				 "%s%d", action->name, cpu);
-			irq = bind_ipi_to_irqhandler(XEN_CMCP_VECTOR, cpu,
-						action->handler,
-						action->flags,
-						per_cpu(xen_cmcp_name, cpu),
-						action->dev_id);
-			per_cpu(xen_cmcp_irq, cpu) = irq;
-			break;
-		case IA64_CPEP_VECTOR:
-			snprintf(per_cpu(xen_cpep_name, cpu),
-				 sizeof(per_cpu(xen_cpep_name, cpu)),
-				 "%s%d", action->name, cpu);
-			irq = bind_ipi_to_irqhandler(XEN_CPEP_VECTOR, cpu,
-						action->handler,
-						action->flags,
-						per_cpu(xen_cpep_name, cpu),
-						action->dev_id);
-			per_cpu(xen_cpep_irq, cpu) = irq;
-			break;
-		case IA64_CPE_VECTOR:
-		case IA64_MCA_RENDEZ_VECTOR:
-		case IA64_PERFMON_VECTOR:
-		case IA64_MCA_WAKEUP_VECTOR:
-		case IA64_SPURIOUS_INT_VECTOR:
-			/* No need to complain, these aren't supported. */
-			break;
-		default:
-			printk(KERN_WARNING "Percpu irq %d is unsupported "
-			       "by xen!\n", vec);
-			break;
-		}
-		BUG_ON(irq < 0);
-
-		if (irq > 0) {
-			/*
-			 * Mark percpu.  Without this, migrate_irqs() will
-			 * mark the interrupt for migrations and trigger it
-			 * on cpu hotplug.
-			 */
-			irq_set_status_flags(irq, IRQ_PER_CPU);
-		}
-	}
-
-	/* For BSP, we cache registered percpu irqs, and then re-walk
-	 * them when initializing APs
-	 */
-	if (!cpu && save) {
-		BUG_ON(saved_irq_cnt == MAX_LATE_IRQ);
-		saved_percpu_irqs[saved_irq_cnt].irq = vec;
-		saved_percpu_irqs[saved_irq_cnt].action = action;
-		saved_irq_cnt++;
-		if (!xen_slab_ready)
-			late_irq_cnt++;
-	}
-}
-
-static void
-xen_register_percpu_irq(ia64_vector vec, struct irqaction *action)
-{
-	__xen_register_percpu_irq(smp_processor_id(), vec, action, 1);
-}
-
-static void
-xen_bind_early_percpu_irq(void)
-{
-	int i;
-
-	xen_slab_ready = 1;
-	/* There's no race when accessing this cached array, since only
-	 * BSP will face with such step shortly
-	 */
-	for (i = 0; i < late_irq_cnt; i++)
-		__xen_register_percpu_irq(smp_processor_id(),
-					  saved_percpu_irqs[i].irq,
-					  saved_percpu_irqs[i].action, 0);
-}
-
-/* FIXME: There's no obvious point to check whether slab is ready. So
- * a hack is used here by utilizing a late time hook.
- */
-
-#ifdef CONFIG_HOTPLUG_CPU
-static int unbind_evtchn_callback(struct notifier_block *nfb,
-				  unsigned long action, void *hcpu)
-{
-	unsigned int cpu = (unsigned long)hcpu;
-
-	if (action == CPU_DEAD) {
-		/* Unregister evtchn.  */
-		if (per_cpu(xen_cpep_irq, cpu) >= 0) {
-			unbind_from_irqhandler(per_cpu(xen_cpep_irq, cpu),
-					       NULL);
-			per_cpu(xen_cpep_irq, cpu) = -1;
-		}
-		if (per_cpu(xen_cmcp_irq, cpu) >= 0) {
-			unbind_from_irqhandler(per_cpu(xen_cmcp_irq, cpu),
-					       NULL);
-			per_cpu(xen_cmcp_irq, cpu) = -1;
-		}
-		if (per_cpu(xen_cmc_irq, cpu) >= 0) {
-			unbind_from_irqhandler(per_cpu(xen_cmc_irq, cpu), NULL);
-			per_cpu(xen_cmc_irq, cpu) = -1;
-		}
-		if (per_cpu(xen_ipi_irq, cpu) >= 0) {
-			unbind_from_irqhandler(per_cpu(xen_ipi_irq, cpu), NULL);
-			per_cpu(xen_ipi_irq, cpu) = -1;
-		}
-		if (per_cpu(xen_resched_irq, cpu) >= 0) {
-			unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu),
-					       NULL);
-			per_cpu(xen_resched_irq, cpu) = -1;
-		}
-		if (per_cpu(xen_timer_irq, cpu) >= 0) {
-			unbind_from_irqhandler(per_cpu(xen_timer_irq, cpu),
-					       NULL);
-			per_cpu(xen_timer_irq, cpu) = -1;
-		}
-	}
-	return NOTIFY_OK;
-}
-
-static struct notifier_block unbind_evtchn_notifier = {
-	.notifier_call = unbind_evtchn_callback,
-	.priority = 0
-};
-#endif
-
-void xen_smp_intr_init_early(unsigned int cpu)
-{
-#ifdef CONFIG_SMP
-	unsigned int i;
-
-	for (i = 0; i < saved_irq_cnt; i++)
-		__xen_register_percpu_irq(cpu, saved_percpu_irqs[i].irq,
-					  saved_percpu_irqs[i].action, 0);
-#endif
-}
-
-void xen_smp_intr_init(void)
-{
-#ifdef CONFIG_SMP
-	unsigned int cpu = smp_processor_id();
-	struct callback_register event = {
-		.type = CALLBACKTYPE_event,
-		.address = { .ip = (unsigned long)&xen_event_callback },
-	};
-
-	if (cpu == 0) {
-		/* Initialization was already done for boot cpu.  */
-#ifdef CONFIG_HOTPLUG_CPU
-		/* Register the notifier only once.  */
-		register_cpu_notifier(&unbind_evtchn_notifier);
-#endif
-		return;
-	}
-
-	/* This should be piggyback when setup vcpu guest context */
-	BUG_ON(HYPERVISOR_callback_op(CALLBACKOP_register, &event));
-#endif /* CONFIG_SMP */
-}
-
-void __init
-xen_irq_init(void)
-{
-	struct callback_register event = {
-		.type = CALLBACKTYPE_event,
-		.address = { .ip = (unsigned long)&xen_event_callback },
-	};
-
-	xen_init_IRQ();
-	BUG_ON(HYPERVISOR_callback_op(CALLBACKOP_register, &event));
-	late_time_init = xen_bind_early_percpu_irq;
-}
-
-void
-xen_platform_send_ipi(int cpu, int vector, int delivery_mode, int redirect)
-{
-#ifdef CONFIG_SMP
-	/* TODO: we need to call vcpu_up here */
-	if (unlikely(vector == ap_wakeup_vector)) {
-		/* XXX
-		 * This should be in __cpu_up(cpu) in ia64 smpboot.c
-		 * like x86. But don't want to modify it,
-		 * keep it untouched.
-		 */
-		xen_smp_intr_init_early(cpu);
-
-		xen_send_ipi(cpu, vector);
-		/* vcpu_prepare_and_up(cpu); */
-		return;
-	}
-#endif
-
-	switch (vector) {
-	case IA64_IPI_VECTOR:
-		xen_send_IPI_one(cpu, XEN_IPI_VECTOR);
-		break;
-	case IA64_IPI_RESCHEDULE:
-		xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
-		break;
-	case IA64_CMCP_VECTOR:
-		xen_send_IPI_one(cpu, XEN_CMCP_VECTOR);
-		break;
-	case IA64_CPEP_VECTOR:
-		xen_send_IPI_one(cpu, XEN_CPEP_VECTOR);
-		break;
-	case IA64_TIMER_VECTOR: {
-		/* this is used only once by check_sal_cache_flush()
-		   at boot time */
-		static int used = 0;
-		if (!used) {
-			xen_send_ipi(cpu, IA64_TIMER_VECTOR);
-			used = 1;
-			break;
-		}
-		/* fallthrough */
-	}
-	default:
-		printk(KERN_WARNING "Unsupported IPI type 0x%x\n",
-		       vector);
-		notify_remote_via_irq(0); /* defaults to 0 irq */
-		break;
-	}
-}
-
-static void __init
-xen_register_ipi(void)
-{
-#ifdef CONFIG_SMP
-	register_percpu_irq(IA64_IPI_VECTOR, &xen_ipi_irqaction);
-	register_percpu_irq(IA64_IPI_RESCHEDULE, &xen_resched_irqaction);
-	register_percpu_irq(IA64_IPI_LOCAL_TLB_FLUSH, &xen_tlb_irqaction);
-#endif
-}
-
-static void
-xen_resend_irq(unsigned int vector)
-{
-	(void)resend_irq_on_evtchn(vector);
-}
-
-const struct pv_irq_ops xen_irq_ops __initconst = {
-	.register_ipi = xen_register_ipi,
-
-	.assign_irq_vector = xen_assign_irq_vector,
-	.free_irq_vector = xen_free_irq_vector,
-	.register_percpu_irq = xen_register_percpu_irq,
-
-	.resend_irq = xen_resend_irq,
-};
diff --git a/arch/ia64/xen/irq_xen.h b/arch/ia64/xen/irq_xen.h
deleted file mode 100644
index 1778517b90fe..000000000000
--- a/arch/ia64/xen/irq_xen.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/******************************************************************************
- * arch/ia64/xen/irq_xen.h
- *
- * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
- *                    VA Linux Systems Japan K.K.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- */
-
-#ifndef IRQ_XEN_H
-#define IRQ_XEN_H
-
-extern void (*late_time_init)(void);
-extern char xen_event_callback;
-void __init xen_init_IRQ(void);
-
-extern const struct pv_irq_ops xen_irq_ops __initconst;
-extern void xen_smp_intr_init(void);
-extern void xen_send_ipi(int cpu, int vec);
-
-#endif /* IRQ_XEN_H */
diff --git a/arch/ia64/xen/machvec.c b/arch/ia64/xen/machvec.c
deleted file mode 100644
index 4ad588a7c279..000000000000
--- a/arch/ia64/xen/machvec.c
+++ /dev/null
@@ -1,4 +0,0 @@
-#define MACHVEC_PLATFORM_NAME           xen
-#define MACHVEC_PLATFORM_HEADER         <asm/machvec_xen.h>
-#include <asm/machvec_init.h>
-
diff --git a/arch/ia64/xen/suspend.c b/arch/ia64/xen/suspend.c
deleted file mode 100644
index 419c8620945a..000000000000
--- a/arch/ia64/xen/suspend.c
+++ /dev/null
@@ -1,59 +0,0 @@
-/******************************************************************************
- * arch/ia64/xen/suspend.c
- *
- * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
- *                    VA Linux Systems Japan K.K.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- * suspend/resume
- */
-
-#include <xen/xen-ops.h>
-#include <asm/xen/hypervisor.h>
-#include "time.h"
-
-void
-xen_mm_pin_all(void)
-{
-	/* nothing */
-}
-
-void
-xen_mm_unpin_all(void)
-{
-	/* nothing */
-}
-
-void
-xen_arch_pre_suspend()
-{
-	/* nothing */
-}
-
-void
-xen_arch_post_suspend(int suspend_cancelled)
-{
-	if (suspend_cancelled)
-		return;
-
-	xen_ia64_enable_opt_feature();
-	/* add more if necessary */
-}
-
-void xen_arch_resume(void)
-{
-	xen_timer_resume_on_aps();
-}
diff --git a/arch/ia64/xen/time.c b/arch/ia64/xen/time.c
deleted file mode 100644
index 1f8244a78bee..000000000000
--- a/arch/ia64/xen/time.c
+++ /dev/null
@@ -1,257 +0,0 @@
-/******************************************************************************
- * arch/ia64/xen/time.c
- *
- * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
- *                    VA Linux Systems Japan K.K.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- */
-
-#include <linux/delay.h>
-#include <linux/kernel_stat.h>
-#include <linux/posix-timers.h>
-#include <linux/irq.h>
-#include <linux/clocksource.h>
-
-#include <asm/timex.h>
-
-#include <asm/xen/hypervisor.h>
-
-#include <xen/interface/vcpu.h>
-
-#include "../kernel/fsyscall_gtod_data.h"
-
-static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate);
-static DEFINE_PER_CPU(unsigned long, xen_stolen_time);
-static DEFINE_PER_CPU(unsigned long, xen_blocked_time);
-
-/* taken from i386/kernel/time-xen.c */
-static void xen_init_missing_ticks_accounting(int cpu)
-{
-	struct vcpu_register_runstate_memory_area area;
-	struct vcpu_runstate_info *runstate = &per_cpu(xen_runstate, cpu);
-	int rc;
-
-	memset(runstate, 0, sizeof(*runstate));
-
-	area.addr.v = runstate;
-	rc = HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, cpu,
-				&area);
-	WARN_ON(rc && rc != -ENOSYS);
-
-	per_cpu(xen_blocked_time, cpu) = runstate->time[RUNSTATE_blocked];
-	per_cpu(xen_stolen_time, cpu) = runstate->time[RUNSTATE_runnable]
-					    + runstate->time[RUNSTATE_offline];
-}
-
-/*
- * Runstate accounting
- */
-/* stolen from arch/x86/xen/time.c */
-static void get_runstate_snapshot(struct vcpu_runstate_info *res)
-{
-	u64 state_time;
-	struct vcpu_runstate_info *state;
-
-	BUG_ON(preemptible());
-
-	state = &__get_cpu_var(xen_runstate);
-
-	/*
-	 * The runstate info is always updated by the hypervisor on
-	 * the current CPU, so there's no need to use anything
-	 * stronger than a compiler barrier when fetching it.
-	 */
-	do {
-		state_time = state->state_entry_time;
-		rmb();
-		*res = *state;
-		rmb();
-	} while (state->state_entry_time != state_time);
-}
-
-#define NS_PER_TICK (1000000000LL/HZ)
-
-static unsigned long
-consider_steal_time(unsigned long new_itm)
-{
-	unsigned long stolen, blocked;
-	unsigned long delta_itm = 0, stolentick = 0;
-	int cpu = smp_processor_id();
-	struct vcpu_runstate_info runstate;
-	struct task_struct *p = current;
-
-	get_runstate_snapshot(&runstate);
-
-	/*
-	 * Check for vcpu migration effect
-	 * In this case, itc value is reversed.
-	 * This causes huge stolen value.
-	 * This function just checks and reject this effect.
-	 */
-	if (!time_after_eq(runstate.time[RUNSTATE_blocked],
-			   per_cpu(xen_blocked_time, cpu)))
-		blocked = 0;
-
-	if (!time_after_eq(runstate.time[RUNSTATE_runnable] +
-			   runstate.time[RUNSTATE_offline],
-			   per_cpu(xen_stolen_time, cpu)))
-		stolen = 0;
-
-	if (!time_after(delta_itm + new_itm, ia64_get_itc()))
-		stolentick = ia64_get_itc() - new_itm;
-
-	do_div(stolentick, NS_PER_TICK);
-	stolentick++;
-
-	do_div(stolen, NS_PER_TICK);
-
-	if (stolen > stolentick)
-		stolen = stolentick;
-
-	stolentick -= stolen;
-	do_div(blocked, NS_PER_TICK);
-
-	if (blocked > stolentick)
-		blocked = stolentick;
-
-	if (stolen > 0 || blocked > 0) {
-		account_steal_ticks(stolen);
-		account_idle_ticks(blocked);
-		run_local_timers();
-
-		rcu_check_callbacks(cpu, user_mode(get_irq_regs()));
-
-		scheduler_tick();
-		run_posix_cpu_timers(p);
-		delta_itm += local_cpu_data->itm_delta * (stolen + blocked);
-
-		if (cpu == time_keeper_id)
-			xtime_update(stolen + blocked);
-
-		local_cpu_data->itm_next = delta_itm + new_itm;
-
-		per_cpu(xen_stolen_time, cpu) += NS_PER_TICK * stolen;
-		per_cpu(xen_blocked_time, cpu) += NS_PER_TICK * blocked;
-	}
-	return delta_itm;
-}
-
-static int xen_do_steal_accounting(unsigned long *new_itm)
-{
-	unsigned long delta_itm;
-	delta_itm = consider_steal_time(*new_itm);
-	*new_itm += delta_itm;
-	if (time_after(*new_itm, ia64_get_itc()) && delta_itm)
-		return 1;
-
-	return 0;
-}
-
-static void xen_itc_jitter_data_reset(void)
-{
-	u64 lcycle, ret;
-
-	do {
-		lcycle = itc_jitter_data.itc_lastcycle;
-		ret = cmpxchg(&itc_jitter_data.itc_lastcycle, lcycle, 0);
-	} while (unlikely(ret != lcycle));
-}
-
-/* based on xen_sched_clock() in arch/x86/xen/time.c. */
-/*
- * This relies on HAVE_UNSTABLE_SCHED_CLOCK. If it can't be defined,
- * something similar logic should be implemented here.
- */
-/*
- * Xen sched_clock implementation.  Returns the number of unstolen
- * nanoseconds, which is nanoseconds the VCPU spent in RUNNING+BLOCKED
- * states.
- */
-static unsigned long long xen_sched_clock(void)
-{
-	struct vcpu_runstate_info runstate;
-
-	unsigned long long now;
-	unsigned long long offset;
-	unsigned long long ret;
-
-	/*
-	 * Ideally sched_clock should be called on a per-cpu basis
-	 * anyway, so preempt should already be disabled, but that's
-	 * not current practice at the moment.
-	 */
-	preempt_disable();
-
-	/*
-	 * both ia64_native_sched_clock() and xen's runstate are
-	 * based on mAR.ITC. So difference of them makes sense.
-	 */
-	now = ia64_native_sched_clock();
-
-	get_runstate_snapshot(&runstate);
-
-	WARN_ON(runstate.state != RUNSTATE_running);
-
-	offset = 0;
-	if (now > runstate.state_entry_time)
-		offset = now - runstate.state_entry_time;
-	ret = runstate.time[RUNSTATE_blocked] +
-		runstate.time[RUNSTATE_running] +
-		offset;
-
-	preempt_enable();
-
-	return ret;
-}
-
-struct pv_time_ops xen_time_ops __initdata = {
-	.init_missing_ticks_accounting	= xen_init_missing_ticks_accounting,
-	.do_steal_accounting		= xen_do_steal_accounting,
-	.clocksource_resume		= xen_itc_jitter_data_reset,
-	.sched_clock			= xen_sched_clock,
-};
-
-/* Called after suspend, to resume time.  */
-static void xen_local_tick_resume(void)
-{
-	/* Just trigger a tick.  */
-	ia64_cpu_local_tick();
-	touch_softlockup_watchdog();
-}
-
-void
-xen_timer_resume(void)
-{
-	unsigned int cpu;
-
-	xen_local_tick_resume();
-
-	for_each_online_cpu(cpu)
-		xen_init_missing_ticks_accounting(cpu);
-}
-
-static void ia64_cpu_local_tick_fn(void *unused)
-{
-	xen_local_tick_resume();
-	xen_init_missing_ticks_accounting(smp_processor_id());
-}
-
-void
-xen_timer_resume_on_aps(void)
-{
-	smp_call_function(&ia64_cpu_local_tick_fn, NULL, 1);
-}
diff --git a/arch/ia64/xen/time.h b/arch/ia64/xen/time.h
deleted file mode 100644
index f98d7e1a42f0..000000000000
--- a/arch/ia64/xen/time.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/******************************************************************************
- * arch/ia64/xen/time.h
- *
- * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
- *                    VA Linux Systems Japan K.K.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- */
-
-extern struct pv_time_ops xen_time_ops __initdata;
-void xen_timer_resume_on_aps(void);
diff --git a/arch/ia64/xen/xcom_hcall.c b/arch/ia64/xen/xcom_hcall.c
deleted file mode 100644
index ccaf7431f7c8..000000000000
--- a/arch/ia64/xen/xcom_hcall.c
+++ /dev/null
@@ -1,441 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- *
- *          Tristan Gingold <tristan.gingold@bull.net>
- *
- *          Copyright (c) 2007
- *          Isaku Yamahata <yamahata at valinux co jp>
- *                          VA Linux Systems Japan K.K.
- *          consolidate mini and inline version.
- */
-
-#include <linux/module.h>
-#include <xen/interface/xen.h>
-#include <xen/interface/memory.h>
-#include <xen/interface/grant_table.h>
-#include <xen/interface/callback.h>
-#include <xen/interface/vcpu.h>
-#include <asm/xen/hypervisor.h>
-#include <asm/xen/xencomm.h>
-
-/* Xencomm notes:
- * This file defines hypercalls to be used by xencomm.  The hypercalls simply
- * create inlines or mini descriptors for pointers and then call the raw arch
- * hypercall xencomm_arch_hypercall_XXX
- *
- * If the arch wants to directly use these hypercalls, simply define macros
- * in asm/xen/hypercall.h, eg:
- *  #define HYPERVISOR_sched_op xencomm_hypercall_sched_op
- *
- * The arch may also define HYPERVISOR_xxx as a function and do more operations
- * before/after doing the hypercall.
- *
- * Note: because only inline or mini descriptors are created these functions
- * must only be called with in kernel memory parameters.
- */
-
-int
-xencomm_hypercall_console_io(int cmd, int count, char *str)
-{
-	/* xen early printk uses console io hypercall before
-	 * xencomm initialization. In that case, we just ignore it.
-	 */
-	if (!xencomm_is_initialized())
-		return 0;
-
-	return xencomm_arch_hypercall_console_io
-		(cmd, count, xencomm_map_no_alloc(str, count));
-}
-EXPORT_SYMBOL_GPL(xencomm_hypercall_console_io);
-
-int
-xencomm_hypercall_event_channel_op(int cmd, void *op)
-{
-	struct xencomm_handle *desc;
-	desc = xencomm_map_no_alloc(op, sizeof(struct evtchn_op));
-	if (desc == NULL)
-		return -EINVAL;
-
-	return xencomm_arch_hypercall_event_channel_op(cmd, desc);
-}
-EXPORT_SYMBOL_GPL(xencomm_hypercall_event_channel_op);
-
-int
-xencomm_hypercall_xen_version(int cmd, void *arg)
-{
-	struct xencomm_handle *desc;
-	unsigned int argsize;
-
-	switch (cmd) {
-	case XENVER_version:
-		/* do not actually pass an argument */
-		return xencomm_arch_hypercall_xen_version(cmd, 0);
-	case XENVER_extraversion:
-		argsize = sizeof(struct xen_extraversion);
-		break;
-	case XENVER_compile_info:
-		argsize = sizeof(struct xen_compile_info);
-		break;
-	case XENVER_capabilities:
-		argsize = sizeof(struct xen_capabilities_info);
-		break;
-	case XENVER_changeset:
-		argsize = sizeof(struct xen_changeset_info);
-		break;
-	case XENVER_platform_parameters:
-		argsize = sizeof(struct xen_platform_parameters);
-		break;
-	case XENVER_get_features:
-		argsize = (arg == NULL) ? 0 : sizeof(struct xen_feature_info);
-		break;
-
-	default:
-		printk(KERN_DEBUG
-		       "%s: unknown version op %d\n", __func__, cmd);
-		return -ENOSYS;
-	}
-
-	desc = xencomm_map_no_alloc(arg, argsize);
-	if (desc == NULL)
-		return -EINVAL;
-
-	return xencomm_arch_hypercall_xen_version(cmd, desc);
-}
-EXPORT_SYMBOL_GPL(xencomm_hypercall_xen_version);
-
-int
-xencomm_hypercall_physdev_op(int cmd, void *op)
-{
-	unsigned int argsize;
-
-	switch (cmd) {
-	case PHYSDEVOP_apic_read:
-	case PHYSDEVOP_apic_write:
-		argsize = sizeof(struct physdev_apic);
-		break;
-	case PHYSDEVOP_alloc_irq_vector:
-	case PHYSDEVOP_free_irq_vector:
-		argsize = sizeof(struct physdev_irq);
-		break;
-	case PHYSDEVOP_irq_status_query:
-		argsize = sizeof(struct physdev_irq_status_query);
-		break;
-
-	default:
-		printk(KERN_DEBUG
-		       "%s: unknown physdev op %d\n", __func__, cmd);
-		return -ENOSYS;
-	}
-
-	return xencomm_arch_hypercall_physdev_op
-		(cmd, xencomm_map_no_alloc(op, argsize));
-}
-
-static int
-xencommize_grant_table_op(struct xencomm_mini **xc_area,
-			  unsigned int cmd, void *op, unsigned int count,
-			  struct xencomm_handle **desc)
-{
-	struct xencomm_handle *desc1;
-	unsigned int argsize;
-
-	switch (cmd) {
-	case GNTTABOP_map_grant_ref:
-		argsize = sizeof(struct gnttab_map_grant_ref);
-		break;
-	case GNTTABOP_unmap_grant_ref:
-		argsize = sizeof(struct gnttab_unmap_grant_ref);
-		break;
-	case GNTTABOP_setup_table:
-	{
-		struct gnttab_setup_table *setup = op;
-
-		argsize = sizeof(*setup);
-
-		if (count != 1)
-			return -EINVAL;
-		desc1 = __xencomm_map_no_alloc
-			(xen_guest_handle(setup->frame_list),
-			 setup->nr_frames *
-			 sizeof(*xen_guest_handle(setup->frame_list)),
-			 *xc_area);
-		if (desc1 == NULL)
-			return -EINVAL;
-		(*xc_area)++;
-		set_xen_guest_handle(setup->frame_list, (void *)desc1);
-		break;
-	}
-	case GNTTABOP_dump_table:
-		argsize = sizeof(struct gnttab_dump_table);
-		break;
-	case GNTTABOP_transfer:
-		argsize = sizeof(struct gnttab_transfer);
-		break;
-	case GNTTABOP_copy:
-		argsize = sizeof(struct gnttab_copy);
-		break;
-	case GNTTABOP_query_size:
-		argsize = sizeof(struct gnttab_query_size);
-		break;
-	default:
-		printk(KERN_DEBUG "%s: unknown hypercall grant table op %d\n",
-		       __func__, cmd);
-		BUG();
-	}
-
-	*desc = __xencomm_map_no_alloc(op, count * argsize, *xc_area);
-	if (*desc == NULL)
-		return -EINVAL;
-	(*xc_area)++;
-
-	return 0;
-}
-
-int
-xencomm_hypercall_grant_table_op(unsigned int cmd, void *op,
-				 unsigned int count)
-{
-	int rc;
-	struct xencomm_handle *desc;
-	XENCOMM_MINI_ALIGNED(xc_area, 2);
-
-	rc = xencommize_grant_table_op(&xc_area, cmd, op, count, &desc);
-	if (rc)
-		return rc;
-
-	return xencomm_arch_hypercall_grant_table_op(cmd, desc, count);
-}
-EXPORT_SYMBOL_GPL(xencomm_hypercall_grant_table_op);
-
-int
-xencomm_hypercall_sched_op(int cmd, void *arg)
-{
-	struct xencomm_handle *desc;
-	unsigned int argsize;
-
-	switch (cmd) {
-	case SCHEDOP_yield:
-	case SCHEDOP_block:
-		argsize = 0;
-		break;
-	case SCHEDOP_shutdown:
-		argsize = sizeof(struct sched_shutdown);
-		break;
-	case SCHEDOP_poll:
-	{
-		struct sched_poll *poll = arg;
-		struct xencomm_handle *ports;
-
-		argsize = sizeof(struct sched_poll);
-		ports = xencomm_map_no_alloc(xen_guest_handle(poll->ports),
-				     sizeof(*xen_guest_handle(poll->ports)));
-
-		set_xen_guest_handle(poll->ports, (void *)ports);
-		break;
-	}
-	default:
-		printk(KERN_DEBUG "%s: unknown sched op %d\n", __func__, cmd);
-		return -ENOSYS;
-	}
-
-	desc = xencomm_map_no_alloc(arg, argsize);
-	if (desc == NULL)
-		return -EINVAL;
-
-	return xencomm_arch_hypercall_sched_op(cmd, desc);
-}
-EXPORT_SYMBOL_GPL(xencomm_hypercall_sched_op);
-
-int
-xencomm_hypercall_multicall(void *call_list, int nr_calls)
-{
-	int rc;
-	int i;
-	struct multicall_entry *mce;
-	struct xencomm_handle *desc;
-	XENCOMM_MINI_ALIGNED(xc_area, nr_calls * 2);
-
-	for (i = 0; i < nr_calls; i++) {
-		mce = (struct multicall_entry *)call_list + i;
-
-		switch (mce->op) {
-		case __HYPERVISOR_update_va_mapping:
-		case __HYPERVISOR_mmu_update:
-			/* No-op on ia64.  */
-			break;
-		case __HYPERVISOR_grant_table_op:
-			rc = xencommize_grant_table_op
-				(&xc_area,
-				 mce->args[0], (void *)mce->args[1],
-				 mce->args[2], &desc);
-			if (rc)
-				return rc;
-			mce->args[1] = (unsigned long)desc;
-			break;
-		case __HYPERVISOR_memory_op:
-		default:
-			printk(KERN_DEBUG
-			       "%s: unhandled multicall op entry op %lu\n",
-			       __func__, mce->op);
-			return -ENOSYS;
-		}
-	}
-
-	desc = xencomm_map_no_alloc(call_list,
-				    nr_calls * sizeof(struct multicall_entry));
-	if (desc == NULL)
-		return -EINVAL;
-
-	return xencomm_arch_hypercall_multicall(desc, nr_calls);
-}
-EXPORT_SYMBOL_GPL(xencomm_hypercall_multicall);
-
-int
-xencomm_hypercall_callback_op(int cmd, void *arg)
-{
-	unsigned int argsize;
-	switch (cmd) {
-	case CALLBACKOP_register:
-		argsize = sizeof(struct callback_register);
-		break;
-	case CALLBACKOP_unregister:
-		argsize = sizeof(struct callback_unregister);
-		break;
-	default:
-		printk(KERN_DEBUG
-		       "%s: unknown callback op %d\n", __func__, cmd);
-		return -ENOSYS;
-	}
-
-	return xencomm_arch_hypercall_callback_op
-		(cmd, xencomm_map_no_alloc(arg, argsize));
-}
-
-static int
-xencommize_memory_reservation(struct xencomm_mini *xc_area,
-			      struct xen_memory_reservation *mop)
-{
-	struct xencomm_handle *desc;
-
-	desc = __xencomm_map_no_alloc(xen_guest_handle(mop->extent_start),
-			mop->nr_extents *
-			sizeof(*xen_guest_handle(mop->extent_start)),
-			xc_area);
-	if (desc == NULL)
-		return -EINVAL;
-
-	set_xen_guest_handle(mop->extent_start, (void *)desc);
-	return 0;
-}
-
-int
-xencomm_hypercall_memory_op(unsigned int cmd, void *arg)
-{
-	GUEST_HANDLE(xen_pfn_t) extent_start_va[2] = { {NULL}, {NULL} };
-	struct xen_memory_reservation *xmr = NULL;
-	int rc;
-	struct xencomm_handle *desc;
-	unsigned int argsize;
-	XENCOMM_MINI_ALIGNED(xc_area, 2);
-
-	switch (cmd) {
-	case XENMEM_increase_reservation:
-	case XENMEM_decrease_reservation:
-	case XENMEM_populate_physmap:
-		xmr = (struct xen_memory_reservation *)arg;
-		set_xen_guest_handle(extent_start_va[0],
-				     xen_guest_handle(xmr->extent_start));
-
-		argsize = sizeof(*xmr);
-		rc = xencommize_memory_reservation(xc_area, xmr);
-		if (rc)
-			return rc;
-		xc_area++;
-		break;
-
-	case XENMEM_maximum_ram_page:
-		argsize = 0;
-		break;
-
-	case XENMEM_add_to_physmap:
-		argsize = sizeof(struct xen_add_to_physmap);
-		break;
-
-	default:
-		printk(KERN_DEBUG "%s: unknown memory op %d\n", __func__, cmd);
-		return -ENOSYS;
-	}
-
-	desc = xencomm_map_no_alloc(arg, argsize);
-	if (desc == NULL)
-		return -EINVAL;
-
-	rc = xencomm_arch_hypercall_memory_op(cmd, desc);
-
-	switch (cmd) {
-	case XENMEM_increase_reservation:
-	case XENMEM_decrease_reservation:
-	case XENMEM_populate_physmap:
-		set_xen_guest_handle(xmr->extent_start,
-				     xen_guest_handle(extent_start_va[0]));
-		break;
-	}
-
-	return rc;
-}
-EXPORT_SYMBOL_GPL(xencomm_hypercall_memory_op);
-
-int
-xencomm_hypercall_suspend(unsigned long srec)
-{
-	struct sched_shutdown arg;
-
-	arg.reason = SHUTDOWN_suspend;
-
-	return xencomm_arch_hypercall_sched_op(
-		SCHEDOP_shutdown, xencomm_map_no_alloc(&arg, sizeof(arg)));
-}
-
-long
-xencomm_hypercall_vcpu_op(int cmd, int cpu, void *arg)
-{
-	unsigned int argsize;
-	switch (cmd) {
-	case VCPUOP_register_runstate_memory_area: {
-		struct vcpu_register_runstate_memory_area *area =
-			(struct vcpu_register_runstate_memory_area *)arg;
-		argsize = sizeof(*arg);
-		set_xen_guest_handle(area->addr.h,
-		     (void *)xencomm_map_no_alloc(area->addr.v,
-						  sizeof(area->addr.v)));
-		break;
-	}
-
-	default:
-		printk(KERN_DEBUG "%s: unknown vcpu op %d\n", __func__, cmd);
-		return -ENOSYS;
-	}
-
-	return xencomm_arch_hypercall_vcpu_op(cmd, cpu,
-					xencomm_map_no_alloc(arg, argsize));
-}
-
-long
-xencomm_hypercall_opt_feature(void *arg)
-{
-	return xencomm_arch_hypercall_opt_feature(
-		xencomm_map_no_alloc(arg,
-				     sizeof(struct xen_ia64_opt_feature)));
-}
diff --git a/arch/ia64/xen/xen_pv_ops.c b/arch/ia64/xen/xen_pv_ops.c
deleted file mode 100644
index 3e8d350fdf39..000000000000
--- a/arch/ia64/xen/xen_pv_ops.c
+++ /dev/null
@@ -1,1141 +0,0 @@
-/******************************************************************************
- * arch/ia64/xen/xen_pv_ops.c
- *
- * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
- *                    VA Linux Systems Japan K.K.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- */
-
-#include <linux/console.h>
-#include <linux/irq.h>
-#include <linux/kernel.h>
-#include <linux/pm.h>
-#include <linux/unistd.h>
-
-#include <asm/xen/hypervisor.h>
-#include <asm/xen/xencomm.h>
-#include <asm/xen/privop.h>
-
-#include "irq_xen.h"
-#include "time.h"
-
-/***************************************************************************
- * general info
- */
-static struct pv_info xen_info __initdata = {
-	.kernel_rpl = 2,	/* or 1: determin at runtime */
-	.paravirt_enabled = 1,
-	.name = "Xen/ia64",
-};
-
-#define IA64_RSC_PL_SHIFT	2
-#define IA64_RSC_PL_BIT_SIZE	2
-#define IA64_RSC_PL_MASK	\
-	(((1UL << IA64_RSC_PL_BIT_SIZE) - 1) << IA64_RSC_PL_SHIFT)
-
-static void __init
-xen_info_init(void)
-{
-	/* Xenified Linux/ia64 may run on pl = 1 or 2.
-	 * determin at run time. */
-	unsigned long rsc = ia64_getreg(_IA64_REG_AR_RSC);
-	unsigned int rpl = (rsc & IA64_RSC_PL_MASK) >> IA64_RSC_PL_SHIFT;
-	xen_info.kernel_rpl = rpl;
-}
-
-/***************************************************************************
- * pv_init_ops
- * initialization hooks.
- */
-
-static void
-xen_panic_hypercall(struct unw_frame_info *info, void *arg)
-{
-	current->thread.ksp = (__u64)info->sw - 16;
-	HYPERVISOR_shutdown(SHUTDOWN_crash);
-	/* we're never actually going to get here... */
-}
-
-static int
-xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
-{
-	unw_init_running(xen_panic_hypercall, NULL);
-	/* we're never actually going to get here... */
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block xen_panic_block = {
-	xen_panic_event, NULL, 0 /* try to go last */
-};
-
-static void xen_pm_power_off(void)
-{
-	local_irq_disable();
-	HYPERVISOR_shutdown(SHUTDOWN_poweroff);
-}
-
-static void __init
-xen_banner(void)
-{
-	printk(KERN_INFO
-	       "Running on Xen! pl = %d start_info_pfn=0x%lx nr_pages=%ld "
-	       "flags=0x%x\n",
-	       xen_info.kernel_rpl,
-	       HYPERVISOR_shared_info->arch.start_info_pfn,
-	       xen_start_info->nr_pages, xen_start_info->flags);
-}
-
-static int __init
-xen_reserve_memory(struct rsvd_region *region)
-{
-	region->start = (unsigned long)__va(
-		(HYPERVISOR_shared_info->arch.start_info_pfn << PAGE_SHIFT));
-	region->end   = region->start + PAGE_SIZE;
-	return 1;
-}
-
-static void __init
-xen_arch_setup_early(void)
-{
-	struct shared_info *s;
-	BUG_ON(!xen_pv_domain());
-
-	s = HYPERVISOR_shared_info;
-	xen_start_info = __va(s->arch.start_info_pfn << PAGE_SHIFT);
-
-	/* Must be done before any hypercall.  */
-	xencomm_initialize();
-
-	xen_setup_features();
-	/* Register a call for panic conditions. */
-	atomic_notifier_chain_register(&panic_notifier_list,
-				       &xen_panic_block);
-	pm_power_off = xen_pm_power_off;
-
-	xen_ia64_enable_opt_feature();
-}
-
-static void __init
-xen_arch_setup_console(char **cmdline_p)
-{
-	add_preferred_console("xenboot", 0, NULL);
-	add_preferred_console("tty", 0, NULL);
-	/* use hvc_xen */
-	add_preferred_console("hvc", 0, NULL);
-
-#if !defined(CONFIG_VT) || !defined(CONFIG_DUMMY_CONSOLE)
-	conswitchp = NULL;
-#endif
-}
-
-static int __init
-xen_arch_setup_nomca(void)
-{
-	return 1;
-}
-
-static void __init
-xen_post_smp_prepare_boot_cpu(void)
-{
-	xen_setup_vcpu_info_placement();
-}
-
-#ifdef ASM_SUPPORTED
-static unsigned long __init_or_module
-xen_patch_bundle(void *sbundle, void *ebundle, unsigned long type);
-#endif
-static void __init
-xen_patch_branch(unsigned long tag, unsigned long type);
-
-static const struct pv_init_ops xen_init_ops __initconst = {
-	.banner = xen_banner,
-
-	.reserve_memory = xen_reserve_memory,
-
-	.arch_setup_early = xen_arch_setup_early,
-	.arch_setup_console = xen_arch_setup_console,
-	.arch_setup_nomca = xen_arch_setup_nomca,
-
-	.post_smp_prepare_boot_cpu = xen_post_smp_prepare_boot_cpu,
-#ifdef ASM_SUPPORTED
-	.patch_bundle = xen_patch_bundle,
-#endif
-	.patch_branch = xen_patch_branch,
-};
-
-/***************************************************************************
- * pv_fsys_data
- * addresses for fsys
- */
-
-extern unsigned long xen_fsyscall_table[NR_syscalls];
-extern char xen_fsys_bubble_down[];
-struct pv_fsys_data xen_fsys_data __initdata = {
-	.fsyscall_table = (unsigned long *)xen_fsyscall_table,
-	.fsys_bubble_down = (void *)xen_fsys_bubble_down,
-};
-
-/***************************************************************************
- * pv_patchdata
- * patchdata addresses
- */
-
-#define DECLARE(name)							\
-	extern unsigned long __xen_start_gate_##name##_patchlist[];	\
-	extern unsigned long __xen_end_gate_##name##_patchlist[]
-
-DECLARE(fsyscall);
-DECLARE(brl_fsys_bubble_down);
-DECLARE(vtop);
-DECLARE(mckinley_e9);
-
-extern unsigned long __xen_start_gate_section[];
-
-#define ASSIGN(name)							\
-	.start_##name##_patchlist =					\
-		(unsigned long)__xen_start_gate_##name##_patchlist,	\
-	.end_##name##_patchlist =					\
-		(unsigned long)__xen_end_gate_##name##_patchlist
-
-static struct pv_patchdata xen_patchdata __initdata = {
-	ASSIGN(fsyscall),
-	ASSIGN(brl_fsys_bubble_down),
-	ASSIGN(vtop),
-	ASSIGN(mckinley_e9),
-
-	.gate_section = (void*)__xen_start_gate_section,
-};
-
-/***************************************************************************
- * pv_cpu_ops
- * intrinsics hooks.
- */
-
-#ifndef ASM_SUPPORTED
-static void
-xen_set_itm_with_offset(unsigned long val)
-{
-	/* ia64_cpu_local_tick() calls this with interrupt enabled. */
-	/* WARN_ON(!irqs_disabled()); */
-	xen_set_itm(val - XEN_MAPPEDREGS->itc_offset);
-}
-
-static unsigned long
-xen_get_itm_with_offset(void)
-{
-	/* unused at this moment */
-	printk(KERN_DEBUG "%s is called.\n", __func__);
-
-	WARN_ON(!irqs_disabled());
-	return ia64_native_getreg(_IA64_REG_CR_ITM) +
-		XEN_MAPPEDREGS->itc_offset;
-}
-
-/* ia64_set_itc() is only called by
- * cpu_init() with ia64_set_itc(0) and ia64_sync_itc().
- * So XEN_MAPPEDRESG->itc_offset cal be considered as almost constant.
- */
-static void
-xen_set_itc(unsigned long val)
-{
-	unsigned long mitc;
-
-	WARN_ON(!irqs_disabled());
-	mitc = ia64_native_getreg(_IA64_REG_AR_ITC);
-	XEN_MAPPEDREGS->itc_offset = val - mitc;
-	XEN_MAPPEDREGS->itc_last = val;
-}
-
-static unsigned long
-xen_get_itc(void)
-{
-	unsigned long res;
-	unsigned long itc_offset;
-	unsigned long itc_last;
-	unsigned long ret_itc_last;
-
-	itc_offset = XEN_MAPPEDREGS->itc_offset;
-	do {
-		itc_last = XEN_MAPPEDREGS->itc_last;
-		res = ia64_native_getreg(_IA64_REG_AR_ITC);
-		res += itc_offset;
-		if (itc_last >= res)
-			res = itc_last + 1;
-		ret_itc_last = cmpxchg(&XEN_MAPPEDREGS->itc_last,
-				       itc_last, res);
-	} while (unlikely(ret_itc_last != itc_last));
-	return res;
-
-#if 0
-	/* ia64_itc_udelay() calls ia64_get_itc() with interrupt enabled.
-	   Should it be paravirtualized instead? */
-	WARN_ON(!irqs_disabled());
-	itc_offset = XEN_MAPPEDREGS->itc_offset;
-	itc_last = XEN_MAPPEDREGS->itc_last;
-	res = ia64_native_getreg(_IA64_REG_AR_ITC);
-	res += itc_offset;
-	if (itc_last >= res)
-		res = itc_last + 1;
-	XEN_MAPPEDREGS->itc_last = res;
-	return res;
-#endif
-}
-
-static void xen_setreg(int regnum, unsigned long val)
-{
-	switch (regnum) {
-	case _IA64_REG_AR_KR0 ... _IA64_REG_AR_KR7:
-		xen_set_kr(regnum - _IA64_REG_AR_KR0, val);
-		break;
-	case _IA64_REG_AR_ITC:
-		xen_set_itc(val);
-		break;
-	case _IA64_REG_CR_TPR:
-		xen_set_tpr(val);
-		break;
-	case _IA64_REG_CR_ITM:
-		xen_set_itm_with_offset(val);
-		break;
-	case _IA64_REG_CR_EOI:
-		xen_eoi(val);
-		break;
-	default:
-		ia64_native_setreg_func(regnum, val);
-		break;
-	}
-}
-
-static unsigned long xen_getreg(int regnum)
-{
-	unsigned long res;
-
-	switch (regnum) {
-	case _IA64_REG_PSR:
-		res = xen_get_psr();
-		break;
-	case _IA64_REG_AR_ITC:
-		res = xen_get_itc();
-		break;
-	case _IA64_REG_CR_ITM:
-		res = xen_get_itm_with_offset();
-		break;
-	case _IA64_REG_CR_IVR:
-		res = xen_get_ivr();
-		break;
-	case _IA64_REG_CR_TPR:
-		res = xen_get_tpr();
-		break;
-	default:
-		res = ia64_native_getreg_func(regnum);
-		break;
-	}
-	return res;
-}
-
-/* turning on interrupts is a bit more complicated.. write to the
- * memory-mapped virtual psr.i bit first (to avoid race condition),
- * then if any interrupts were pending, we have to execute a hyperprivop
- * to ensure the pending interrupt gets delivered; else we're done! */
-static void
-xen_ssm_i(void)
-{
-	int old = xen_get_virtual_psr_i();
-	xen_set_virtual_psr_i(1);
-	barrier();
-	if (!old && xen_get_virtual_pend())
-		xen_hyper_ssm_i();
-}
-
-/* turning off interrupts can be paravirtualized simply by writing
- * to a memory-mapped virtual psr.i bit (implemented as a 16-bit bool) */
-static void
-xen_rsm_i(void)
-{
-	xen_set_virtual_psr_i(0);
-	barrier();
-}
-
-static unsigned long
-xen_get_psr_i(void)
-{
-	return xen_get_virtual_psr_i() ? IA64_PSR_I : 0;
-}
-
-static void
-xen_intrin_local_irq_restore(unsigned long mask)
-{
-	if (mask & IA64_PSR_I)
-		xen_ssm_i();
-	else
-		xen_rsm_i();
-}
-#else
-#define __DEFINE_FUNC(name, code)					\
-	extern const char xen_ ## name ## _direct_start[];		\
-	extern const char xen_ ## name ## _direct_end[];		\
-	asm (".align 32\n"						\
-	     ".proc xen_" #name "\n"					\
-	     "xen_" #name ":\n"						\
-	     "xen_" #name "_direct_start:\n"				\
-	     code							\
-	     "xen_" #name "_direct_end:\n"				\
-	     "br.cond.sptk.many b6\n"					\
-	     ".endp xen_" #name "\n")
-
-#define DEFINE_VOID_FUNC0(name, code)		\
-	extern void				\
-	xen_ ## name (void);			\
-	__DEFINE_FUNC(name, code)
-
-#define DEFINE_VOID_FUNC1(name, code)		\
-	extern void				\
-	xen_ ## name (unsigned long arg);	\
-	__DEFINE_FUNC(name, code)
-
-#define DEFINE_VOID_FUNC1_VOID(name, code)	\
-	extern void				\
-	xen_ ## name (void *arg);		\
-	__DEFINE_FUNC(name, code)
-
-#define DEFINE_VOID_FUNC2(name, code)		\
-	extern void				\
-	xen_ ## name (unsigned long arg0,	\
-		      unsigned long arg1);	\
-	__DEFINE_FUNC(name, code)
-
-#define DEFINE_FUNC0(name, code)		\
-	extern unsigned long			\
-	xen_ ## name (void);			\
-	__DEFINE_FUNC(name, code)
-
-#define DEFINE_FUNC1(name, type, code)		\
-	extern unsigned long			\
-	xen_ ## name (type arg);		\
-	__DEFINE_FUNC(name, code)
-
-#define XEN_PSR_I_ADDR_ADDR     (XSI_BASE + XSI_PSR_I_ADDR_OFS)
-
-/*
- * static void xen_set_itm_with_offset(unsigned long val)
- *        xen_set_itm(val - XEN_MAPPEDREGS->itc_offset);
- */
-/* 2 bundles */
-DEFINE_VOID_FUNC1(set_itm_with_offset,
-		  "mov r2 = " __stringify(XSI_BASE) " + "
-		  __stringify(XSI_ITC_OFFSET_OFS) "\n"
-		  ";;\n"
-		  "ld8 r3 = [r2]\n"
-		  ";;\n"
-		  "sub r8 = r8, r3\n"
-		  "break " __stringify(HYPERPRIVOP_SET_ITM) "\n");
-
-/*
- * static unsigned long xen_get_itm_with_offset(void)
- *    return ia64_native_getreg(_IA64_REG_CR_ITM) + XEN_MAPPEDREGS->itc_offset;
- */
-/* 2 bundles */
-DEFINE_FUNC0(get_itm_with_offset,
-	     "mov r2 = " __stringify(XSI_BASE) " + "
-	     __stringify(XSI_ITC_OFFSET_OFS) "\n"
-	     ";;\n"
-	     "ld8 r3 = [r2]\n"
-	     "mov r8 = cr.itm\n"
-	     ";;\n"
-	     "add r8 = r8, r2\n");
-
-/*
- * static void xen_set_itc(unsigned long val)
- *	unsigned long mitc;
- *
- *	WARN_ON(!irqs_disabled());
- *	mitc = ia64_native_getreg(_IA64_REG_AR_ITC);
- *	XEN_MAPPEDREGS->itc_offset = val - mitc;
- *	XEN_MAPPEDREGS->itc_last = val;
- */
-/* 2 bundles */
-DEFINE_VOID_FUNC1(set_itc,
-		  "mov r2 = " __stringify(XSI_BASE) " + "
-		  __stringify(XSI_ITC_LAST_OFS) "\n"
-		  "mov r3 = ar.itc\n"
-		  ";;\n"
-		  "sub r3 = r8, r3\n"
-		  "st8 [r2] = r8, "
-		  __stringify(XSI_ITC_LAST_OFS) " - "
-		  __stringify(XSI_ITC_OFFSET_OFS) "\n"
-		  ";;\n"
-		  "st8 [r2] = r3\n");
-
-/*
- * static unsigned long xen_get_itc(void)
- *	unsigned long res;
- *	unsigned long itc_offset;
- *	unsigned long itc_last;
- *	unsigned long ret_itc_last;
- *
- *	itc_offset = XEN_MAPPEDREGS->itc_offset;
- *	do {
- *		itc_last = XEN_MAPPEDREGS->itc_last;
- *		res = ia64_native_getreg(_IA64_REG_AR_ITC);
- *		res += itc_offset;
- *		if (itc_last >= res)
- *			res = itc_last + 1;
- *		ret_itc_last = cmpxchg(&XEN_MAPPEDREGS->itc_last,
- *				       itc_last, res);
- *	} while (unlikely(ret_itc_last != itc_last));
- *	return res;
- */
-/* 5 bundles */
-DEFINE_FUNC0(get_itc,
-	     "mov r2 = " __stringify(XSI_BASE) " + "
-	     __stringify(XSI_ITC_OFFSET_OFS) "\n"
-	     ";;\n"
-	     "ld8 r9 = [r2], " __stringify(XSI_ITC_LAST_OFS) " - "
-	     __stringify(XSI_ITC_OFFSET_OFS) "\n"
-					/* r9 = itc_offset */
-					/* r2 = XSI_ITC_OFFSET */
-	     "888:\n"
-	     "mov r8 = ar.itc\n"	/* res = ar.itc */
-	     ";;\n"
-	     "ld8 r3 = [r2]\n"		/* r3 = itc_last */
-	     "add r8 = r8, r9\n"	/* res = ar.itc + itc_offset */
-	     ";;\n"
-	     "cmp.gtu p6, p0 = r3, r8\n"
-	     ";;\n"
-	     "(p6) add r8 = 1, r3\n"	/* if (itc_last > res) itc_last + 1 */
-	     ";;\n"
-	     "mov ar.ccv = r8\n"
-	     ";;\n"
-	     "cmpxchg8.acq r10 = [r2], r8, ar.ccv\n"
-	     ";;\n"
-	     "cmp.ne p6, p0 = r10, r3\n"
-	     "(p6) hint @pause\n"
-	     "(p6) br.cond.spnt 888b\n");
-
-DEFINE_VOID_FUNC1_VOID(fc,
-		       "break " __stringify(HYPERPRIVOP_FC) "\n");
-
-/*
- * psr_i_addr_addr = XEN_PSR_I_ADDR_ADDR
- * masked_addr = *psr_i_addr_addr
- * pending_intr_addr = masked_addr - 1
- * if (val & IA64_PSR_I) {
- *   masked = *masked_addr
- *   *masked_addr = 0:xen_set_virtual_psr_i(1)
- *   compiler barrier
- *   if (masked) {
- *      uint8_t pending = *pending_intr_addr;
- *      if (pending)
- *              XEN_HYPER_SSM_I
- *   }
- * } else {
- *   *masked_addr = 1:xen_set_virtual_psr_i(0)
- * }
- */
-/* 6 bundles */
-DEFINE_VOID_FUNC1(intrin_local_irq_restore,
-		  /* r8 = input value: 0 or IA64_PSR_I
-		   * p6 =  (flags & IA64_PSR_I)
-		   *    = if clause
-		   * p7 = !(flags & IA64_PSR_I)
-		   *    = else clause
-		   */
-		  "cmp.ne p6, p7 = r8, r0\n"
-		  "mov r9 = " __stringify(XEN_PSR_I_ADDR_ADDR) "\n"
-		  ";;\n"
-		  /* r9 = XEN_PSR_I_ADDR */
-		  "ld8 r9 = [r9]\n"
-		  ";;\n"
-
-		  /* r10 = masked previous value */
-		  "(p6)	ld1.acq r10 = [r9]\n"
-		  ";;\n"
-
-		  /* p8 = !masked interrupt masked previously? */
-		  "(p6)	cmp.ne.unc p8, p0 = r10, r0\n"
-
-		  /* p7 = else clause */
-		  "(p7)	mov r11 = 1\n"
-		  ";;\n"
-		  /* masked = 1 */
-		  "(p7)	st1.rel [r9] = r11\n"
-
-		  /* p6 = if clause */
-		  /* masked = 0
-		   * r9 = masked_addr - 1
-		   *    = pending_intr_addr
-		   */
-		  "(p8)	st1.rel [r9] = r0, -1\n"
-		  ";;\n"
-		  /* r8 = pending_intr */
-		  "(p8)	ld1.acq r11 = [r9]\n"
-		  ";;\n"
-		  /* p9 = interrupt pending? */
-		  "(p8)	cmp.ne.unc p9, p10 = r11, r0\n"
-		  ";;\n"
-		  "(p10) mf\n"
-		  /* issue hypercall to trigger interrupt */
-		  "(p9)	break " __stringify(HYPERPRIVOP_SSM_I) "\n");
-
-DEFINE_VOID_FUNC2(ptcga,
-		  "break " __stringify(HYPERPRIVOP_PTC_GA) "\n");
-DEFINE_VOID_FUNC2(set_rr,
-		  "break " __stringify(HYPERPRIVOP_SET_RR) "\n");
-
-/*
- * tmp = XEN_MAPPEDREGS->interrupt_mask_addr = XEN_PSR_I_ADDR_ADDR;
- * tmp = *tmp
- * tmp = *tmp;
- * psr_i = tmp? 0: IA64_PSR_I;
- */
-/* 4 bundles */
-DEFINE_FUNC0(get_psr_i,
-	     "mov r9 = " __stringify(XEN_PSR_I_ADDR_ADDR) "\n"
-	     ";;\n"
-	     "ld8 r9 = [r9]\n"			/* r9 = XEN_PSR_I_ADDR */
-	     "mov r8 = 0\n"			/* psr_i = 0 */
-	     ";;\n"
-	     "ld1.acq r9 = [r9]\n"		/* r9 = XEN_PSR_I */
-	     ";;\n"
-	     "cmp.eq.unc p6, p0 = r9, r0\n"	/* p6 = (XEN_PSR_I != 0) */
-	     ";;\n"
-	     "(p6) mov r8 = " __stringify(1 << IA64_PSR_I_BIT) "\n");
-
-DEFINE_FUNC1(thash, unsigned long,
-	     "break " __stringify(HYPERPRIVOP_THASH) "\n");
-DEFINE_FUNC1(get_cpuid, int,
-	     "break " __stringify(HYPERPRIVOP_GET_CPUID) "\n");
-DEFINE_FUNC1(get_pmd, int,
-	     "break " __stringify(HYPERPRIVOP_GET_PMD) "\n");
-DEFINE_FUNC1(get_rr, unsigned long,
-	     "break " __stringify(HYPERPRIVOP_GET_RR) "\n");
-
-/*
- * void xen_privop_ssm_i(void)
- *
- * int masked = !xen_get_virtual_psr_i();
- *	// masked = *(*XEN_MAPPEDREGS->interrupt_mask_addr)
- * xen_set_virtual_psr_i(1)
- *	// *(*XEN_MAPPEDREGS->interrupt_mask_addr) = 0
- * // compiler barrier
- * if (masked) {
- *	uint8_t* pend_int_addr =
- *		(uint8_t*)(*XEN_MAPPEDREGS->interrupt_mask_addr) - 1;
- *	uint8_t pending = *pend_int_addr;
- *	if (pending)
- *		XEN_HYPER_SSM_I
- * }
- */
-/* 4 bundles */
-DEFINE_VOID_FUNC0(ssm_i,
-		  "mov r8 = " __stringify(XEN_PSR_I_ADDR_ADDR) "\n"
-		  ";;\n"
-		  "ld8 r8 = [r8]\n"		/* r8 = XEN_PSR_I_ADDR */
-		  ";;\n"
-		  "ld1.acq r9 = [r8]\n"		/* r9 = XEN_PSR_I */
-		  ";;\n"
-		  "st1.rel [r8] = r0, -1\n"	/* psr_i = 0. enable interrupt
-						 * r8 = XEN_PSR_I_ADDR - 1
-						 *    = pend_int_addr
-						 */
-		  "cmp.eq.unc p0, p6 = r9, r0\n"/* p6 = !XEN_PSR_I
-						 * previously interrupt
-						 * masked?
-						 */
-		  ";;\n"
-		  "(p6) ld1.acq r8 = [r8]\n"	/* r8 = xen_pend_int */
-		  ";;\n"
-		  "(p6) cmp.eq.unc p6, p7 = r8, r0\n"	/*interrupt pending?*/
-		  ";;\n"
-		  /* issue hypercall to get interrupt */
-		  "(p7) break " __stringify(HYPERPRIVOP_SSM_I) "\n"
-		  ";;\n");
-
-/*
- * psr_i_addr_addr = XEN_MAPPEDREGS->interrupt_mask_addr
- *		   = XEN_PSR_I_ADDR_ADDR;
- * psr_i_addr = *psr_i_addr_addr;
- * *psr_i_addr = 1;
- */
-/* 2 bundles */
-DEFINE_VOID_FUNC0(rsm_i,
-		  "mov r8 = " __stringify(XEN_PSR_I_ADDR_ADDR) "\n"
-						/* r8 = XEN_PSR_I_ADDR */
-		  "mov r9 = 1\n"
-		  ";;\n"
-		  "ld8 r8 = [r8]\n"		/* r8 = XEN_PSR_I */
-		  ";;\n"
-		  "st1.rel [r8] = r9\n");	/* XEN_PSR_I = 1 */
-
-extern void
-xen_set_rr0_to_rr4(unsigned long val0, unsigned long val1,
-		   unsigned long val2, unsigned long val3,
-		   unsigned long val4);
-__DEFINE_FUNC(set_rr0_to_rr4,
-	      "break " __stringify(HYPERPRIVOP_SET_RR0_TO_RR4) "\n");
-
-
-extern unsigned long xen_getreg(int regnum);
-#define __DEFINE_GET_REG(id, privop)					\
-	"mov r2 = " __stringify(_IA64_REG_ ## id) "\n"			\
-	";;\n"								\
-	"cmp.eq p6, p0 = r2, r8\n"					\
-	";;\n"								\
-	"(p6) break " __stringify(HYPERPRIVOP_GET_ ## privop) "\n"	\
-	"(p6) br.cond.sptk.many b6\n"					\
-	";;\n"
-
-__DEFINE_FUNC(getreg,
-	      __DEFINE_GET_REG(PSR, PSR)
-
-	      /* get_itc */
-	      "mov r2 = " __stringify(_IA64_REG_AR_ITC) "\n"
-	      ";;\n"
-	      "cmp.eq p6, p0 = r2, r8\n"
-	      ";;\n"
-	      "(p6) br.cond.spnt xen_get_itc\n"
-	      ";;\n"
-
-	      /* get itm */
-	      "mov r2 = " __stringify(_IA64_REG_CR_ITM) "\n"
-	      ";;\n"
-	      "cmp.eq p6, p0 = r2, r8\n"
-	      ";;\n"
-	      "(p6) br.cond.spnt xen_get_itm_with_offset\n"
-	      ";;\n"
-
-	      __DEFINE_GET_REG(CR_IVR, IVR)
-	      __DEFINE_GET_REG(CR_TPR, TPR)
-
-	      /* fall back */
-	      "movl r2 = ia64_native_getreg_func\n"
-	      ";;\n"
-	      "mov b7 = r2\n"
-	      ";;\n"
-	      "br.cond.sptk.many b7\n");
-
-extern void xen_setreg(int regnum, unsigned long val);
-#define __DEFINE_SET_REG(id, privop)					\
-	"mov r2 = " __stringify(_IA64_REG_ ## id) "\n"			\
-	";;\n"								\
-	"cmp.eq p6, p0 = r2, r9\n"					\
-	";;\n"								\
-	"(p6) break " __stringify(HYPERPRIVOP_ ## privop) "\n"		\
-	"(p6) br.cond.sptk.many b6\n"					\
-	";;\n"
-
-__DEFINE_FUNC(setreg,
-	      /* kr0 .. kr 7*/
-	      /*
-	       * if (_IA64_REG_AR_KR0 <= regnum &&
-	       *     regnum <= _IA64_REG_AR_KR7) {
-	       *     register __index asm ("r8") = regnum - _IA64_REG_AR_KR0
-	       *     register __val asm ("r9") = val
-	       *    "break HYPERPRIVOP_SET_KR"
-	       * }
-	       */
-	      "mov r17 = r9\n"
-	      "mov r2 = " __stringify(_IA64_REG_AR_KR0) "\n"
-	      ";;\n"
-	      "cmp.ge p6, p0 = r9, r2\n"
-	      "sub r17 = r17, r2\n"
-	      ";;\n"
-	      "(p6) cmp.ge.unc p7, p0 = "
-	      __stringify(_IA64_REG_AR_KR7) " - " __stringify(_IA64_REG_AR_KR0)
-	      ", r17\n"
-	      ";;\n"
-	      "(p7) mov r9 = r8\n"
-	      ";;\n"
-	      "(p7) mov r8 = r17\n"
-	      "(p7) break " __stringify(HYPERPRIVOP_SET_KR) "\n"
-
-	      /* set itm */
-	      "mov r2 = " __stringify(_IA64_REG_CR_ITM) "\n"
-	      ";;\n"
-	      "cmp.eq p6, p0 = r2, r8\n"
-	      ";;\n"
-	      "(p6) br.cond.spnt xen_set_itm_with_offset\n"
-
-	      /* set itc */
-	      "mov r2 = " __stringify(_IA64_REG_AR_ITC) "\n"
-	      ";;\n"
-	      "cmp.eq p6, p0 = r2, r8\n"
-	      ";;\n"
-	      "(p6) br.cond.spnt xen_set_itc\n"
-
-	      __DEFINE_SET_REG(CR_TPR, SET_TPR)
-	      __DEFINE_SET_REG(CR_EOI, EOI)
-
-	      /* fall back */
-	      "movl r2 = ia64_native_setreg_func\n"
-	      ";;\n"
-	      "mov b7 = r2\n"
-	      ";;\n"
-	      "br.cond.sptk.many b7\n");
-#endif
-
-static const struct pv_cpu_ops xen_cpu_ops __initconst = {
-	.fc		= xen_fc,
-	.thash		= xen_thash,
-	.get_cpuid	= xen_get_cpuid,
-	.get_pmd	= xen_get_pmd,
-	.getreg		= xen_getreg,
-	.setreg		= xen_setreg,
-	.ptcga		= xen_ptcga,
-	.get_rr		= xen_get_rr,
-	.set_rr		= xen_set_rr,
-	.set_rr0_to_rr4	= xen_set_rr0_to_rr4,
-	.ssm_i		= xen_ssm_i,
-	.rsm_i		= xen_rsm_i,
-	.get_psr_i	= xen_get_psr_i,
-	.intrin_local_irq_restore
-			= xen_intrin_local_irq_restore,
-};
-
-/******************************************************************************
- * replacement of hand written assembly codes.
- */
-
-extern char xen_switch_to;
-extern char xen_leave_syscall;
-extern char xen_work_processed_syscall;
-extern char xen_leave_kernel;
-
-const struct pv_cpu_asm_switch xen_cpu_asm_switch = {
-	.switch_to		= (unsigned long)&xen_switch_to,
-	.leave_syscall		= (unsigned long)&xen_leave_syscall,
-	.work_processed_syscall	= (unsigned long)&xen_work_processed_syscall,
-	.leave_kernel		= (unsigned long)&xen_leave_kernel,
-};
-
-/***************************************************************************
- * pv_iosapic_ops
- * iosapic read/write hooks.
- */
-static void
-xen_pcat_compat_init(void)
-{
-	/* nothing */
-}
-
-static struct irq_chip*
-xen_iosapic_get_irq_chip(unsigned long trigger)
-{
-	return NULL;
-}
-
-static unsigned int
-xen_iosapic_read(char __iomem *iosapic, unsigned int reg)
-{
-	struct physdev_apic apic_op;
-	int ret;
-
-	apic_op.apic_physbase = (unsigned long)iosapic -
-					__IA64_UNCACHED_OFFSET;
-	apic_op.reg = reg;
-	ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op);
-	if (ret)
-		return ret;
-	return apic_op.value;
-}
-
-static void
-xen_iosapic_write(char __iomem *iosapic, unsigned int reg, u32 val)
-{
-	struct physdev_apic apic_op;
-
-	apic_op.apic_physbase = (unsigned long)iosapic -
-					__IA64_UNCACHED_OFFSET;
-	apic_op.reg = reg;
-	apic_op.value = val;
-	HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op);
-}
-
-static struct pv_iosapic_ops xen_iosapic_ops __initdata = {
-	.pcat_compat_init = xen_pcat_compat_init,
-	.__get_irq_chip = xen_iosapic_get_irq_chip,
-
-	.__read = xen_iosapic_read,
-	.__write = xen_iosapic_write,
-};
-
-/***************************************************************************
- * pv_ops initialization
- */
-
-void __init
-xen_setup_pv_ops(void)
-{
-	xen_info_init();
-	pv_info = xen_info;
-	pv_init_ops = xen_init_ops;
-	pv_fsys_data = xen_fsys_data;
-	pv_patchdata = xen_patchdata;
-	pv_cpu_ops = xen_cpu_ops;
-	pv_iosapic_ops = xen_iosapic_ops;
-	pv_irq_ops = xen_irq_ops;
-	pv_time_ops = xen_time_ops;
-
-	paravirt_cpu_asm_init(&xen_cpu_asm_switch);
-}
-
-#ifdef ASM_SUPPORTED
-/***************************************************************************
- * binary pacthing
- * pv_init_ops.patch_bundle
- */
-
-#define DEFINE_FUNC_GETREG(name, privop)				\
-	DEFINE_FUNC0(get_ ## name,					\
-		     "break "__stringify(HYPERPRIVOP_GET_ ## privop) "\n")
-
-DEFINE_FUNC_GETREG(psr, PSR);
-DEFINE_FUNC_GETREG(eflag, EFLAG);
-DEFINE_FUNC_GETREG(ivr, IVR);
-DEFINE_FUNC_GETREG(tpr, TPR);
-
-#define DEFINE_FUNC_SET_KR(n)						\
-	DEFINE_VOID_FUNC0(set_kr ## n,					\
-			  ";;\n"					\
-			  "mov r9 = r8\n"				\
-			  "mov r8 = " #n "\n"				\
-			  "break " __stringify(HYPERPRIVOP_SET_KR) "\n")
-
-DEFINE_FUNC_SET_KR(0);
-DEFINE_FUNC_SET_KR(1);
-DEFINE_FUNC_SET_KR(2);
-DEFINE_FUNC_SET_KR(3);
-DEFINE_FUNC_SET_KR(4);
-DEFINE_FUNC_SET_KR(5);
-DEFINE_FUNC_SET_KR(6);
-DEFINE_FUNC_SET_KR(7);
-
-#define __DEFINE_FUNC_SETREG(name, privop)				\
-	DEFINE_VOID_FUNC0(name,						\
-			  "break "__stringify(HYPERPRIVOP_ ## privop) "\n")
-
-#define DEFINE_FUNC_SETREG(name, privop)			\
-	__DEFINE_FUNC_SETREG(set_ ## name, SET_ ## privop)
-
-DEFINE_FUNC_SETREG(eflag, EFLAG);
-DEFINE_FUNC_SETREG(tpr, TPR);
-__DEFINE_FUNC_SETREG(eoi, EOI);
-
-extern const char xen_check_events[];
-extern const char __xen_intrin_local_irq_restore_direct_start[];
-extern const char __xen_intrin_local_irq_restore_direct_end[];
-extern const unsigned long __xen_intrin_local_irq_restore_direct_reloc;
-
-asm (
-	".align 32\n"
-	".proc xen_check_events\n"
-	"xen_check_events:\n"
-	/* masked = 0
-	 * r9 = masked_addr - 1
-	 *    = pending_intr_addr
-	 */
-	"st1.rel [r9] = r0, -1\n"
-	";;\n"
-	/* r8 = pending_intr */
-	"ld1.acq r11 = [r9]\n"
-	";;\n"
-	/* p9 = interrupt pending? */
-	"cmp.ne p9, p10 = r11, r0\n"
-	";;\n"
-	"(p10) mf\n"
-	/* issue hypercall to trigger interrupt */
-	"(p9) break " __stringify(HYPERPRIVOP_SSM_I) "\n"
-	"br.cond.sptk.many b6\n"
-	".endp xen_check_events\n"
-	"\n"
-	".align 32\n"
-	".proc __xen_intrin_local_irq_restore_direct\n"
-	"__xen_intrin_local_irq_restore_direct:\n"
-	"__xen_intrin_local_irq_restore_direct_start:\n"
-	"1:\n"
-	"{\n"
-	"cmp.ne p6, p7 = r8, r0\n"
-	"mov r17 = ip\n" /* get ip to calc return address */
-	"mov r9 = "__stringify(XEN_PSR_I_ADDR_ADDR) "\n"
-	";;\n"
-	"}\n"
-	"{\n"
-	/* r9 = XEN_PSR_I_ADDR */
-	"ld8 r9 = [r9]\n"
-	";;\n"
-	/* r10 = masked previous value */
-	"(p6) ld1.acq r10 = [r9]\n"
-	"adds r17 =  1f - 1b, r17\n" /* calculate return address */
-	";;\n"
-	"}\n"
-	"{\n"
-	/* p8 = !masked interrupt masked previously? */
-	"(p6) cmp.ne.unc p8, p0 = r10, r0\n"
-	"\n"
-	/* p7 = else clause */
-	"(p7) mov r11 = 1\n"
-	";;\n"
-	"(p8) mov b6 = r17\n" /* set return address */
-	"}\n"
-	"{\n"
-	/* masked = 1 */
-	"(p7) st1.rel [r9] = r11\n"
-	"\n"
-	"[99:]\n"
-	"(p8) brl.cond.dptk.few xen_check_events\n"
-	"}\n"
-	/* pv calling stub is 5 bundles. fill nop to adjust return address */
-	"{\n"
-	"nop 0\n"
-	"nop 0\n"
-	"nop 0\n"
-	"}\n"
-	"1:\n"
-	"__xen_intrin_local_irq_restore_direct_end:\n"
-	".endp __xen_intrin_local_irq_restore_direct\n"
-	"\n"
-	".align 8\n"
-	"__xen_intrin_local_irq_restore_direct_reloc:\n"
-	"data8 99b\n"
-);
-
-static struct paravirt_patch_bundle_elem xen_patch_bundle_elems[]
-__initdata_or_module =
-{
-#define XEN_PATCH_BUNDLE_ELEM(name, type)		\
-	{						\
-		(void*)xen_ ## name ## _direct_start,	\
-		(void*)xen_ ## name ## _direct_end,	\
-		PARAVIRT_PATCH_TYPE_ ## type,		\
-	}
-
-	XEN_PATCH_BUNDLE_ELEM(fc, FC),
-	XEN_PATCH_BUNDLE_ELEM(thash, THASH),
-	XEN_PATCH_BUNDLE_ELEM(get_cpuid, GET_CPUID),
-	XEN_PATCH_BUNDLE_ELEM(get_pmd, GET_PMD),
-	XEN_PATCH_BUNDLE_ELEM(ptcga, PTCGA),
-	XEN_PATCH_BUNDLE_ELEM(get_rr, GET_RR),
-	XEN_PATCH_BUNDLE_ELEM(set_rr, SET_RR),
-	XEN_PATCH_BUNDLE_ELEM(set_rr0_to_rr4, SET_RR0_TO_RR4),
-	XEN_PATCH_BUNDLE_ELEM(ssm_i, SSM_I),
-	XEN_PATCH_BUNDLE_ELEM(rsm_i, RSM_I),
-	XEN_PATCH_BUNDLE_ELEM(get_psr_i, GET_PSR_I),
-	{
-		(void*)__xen_intrin_local_irq_restore_direct_start,
-		(void*)__xen_intrin_local_irq_restore_direct_end,
-		PARAVIRT_PATCH_TYPE_INTRIN_LOCAL_IRQ_RESTORE,
-	},
-
-#define XEN_PATCH_BUNDLE_ELEM_GETREG(name, reg)			\
-	{							\
-		xen_get_ ## name ## _direct_start,		\
-		xen_get_ ## name ## _direct_end,		\
-		PARAVIRT_PATCH_TYPE_GETREG + _IA64_REG_ ## reg, \
-	}
-
-	XEN_PATCH_BUNDLE_ELEM_GETREG(psr, PSR),
-	XEN_PATCH_BUNDLE_ELEM_GETREG(eflag, AR_EFLAG),
-
-	XEN_PATCH_BUNDLE_ELEM_GETREG(ivr, CR_IVR),
-	XEN_PATCH_BUNDLE_ELEM_GETREG(tpr, CR_TPR),
-
-	XEN_PATCH_BUNDLE_ELEM_GETREG(itc, AR_ITC),
-	XEN_PATCH_BUNDLE_ELEM_GETREG(itm_with_offset, CR_ITM),
-
-
-#define __XEN_PATCH_BUNDLE_ELEM_SETREG(name, reg)		\
-	{							\
-		xen_ ## name ## _direct_start,			\
-		xen_ ## name ## _direct_end,			\
-		PARAVIRT_PATCH_TYPE_SETREG + _IA64_REG_ ## reg, \
-	}
-
-#define XEN_PATCH_BUNDLE_ELEM_SETREG(name, reg)			\
-	__XEN_PATCH_BUNDLE_ELEM_SETREG(set_ ## name, reg)
-
-	XEN_PATCH_BUNDLE_ELEM_SETREG(kr0, AR_KR0),
-	XEN_PATCH_BUNDLE_ELEM_SETREG(kr1, AR_KR1),
-	XEN_PATCH_BUNDLE_ELEM_SETREG(kr2, AR_KR2),
-	XEN_PATCH_BUNDLE_ELEM_SETREG(kr3, AR_KR3),
-	XEN_PATCH_BUNDLE_ELEM_SETREG(kr4, AR_KR4),
-	XEN_PATCH_BUNDLE_ELEM_SETREG(kr5, AR_KR5),
-	XEN_PATCH_BUNDLE_ELEM_SETREG(kr6, AR_KR6),
-	XEN_PATCH_BUNDLE_ELEM_SETREG(kr7, AR_KR7),
-
-	XEN_PATCH_BUNDLE_ELEM_SETREG(eflag, AR_EFLAG),
-	XEN_PATCH_BUNDLE_ELEM_SETREG(tpr, CR_TPR),
-	__XEN_PATCH_BUNDLE_ELEM_SETREG(eoi, CR_EOI),
-
-	XEN_PATCH_BUNDLE_ELEM_SETREG(itc, AR_ITC),
-	XEN_PATCH_BUNDLE_ELEM_SETREG(itm_with_offset, CR_ITM),
-};
-
-static unsigned long __init_or_module
-xen_patch_bundle(void *sbundle, void *ebundle, unsigned long type)
-{
-	const unsigned long nelems = sizeof(xen_patch_bundle_elems) /
-		sizeof(xen_patch_bundle_elems[0]);
-	unsigned long used;
-	const struct paravirt_patch_bundle_elem *found;
-
-	used = __paravirt_patch_apply_bundle(sbundle, ebundle, type,
-					     xen_patch_bundle_elems, nelems,
-					     &found);
-
-	if (found == NULL)
-		/* fallback */
-		return ia64_native_patch_bundle(sbundle, ebundle, type);
-	if (used == 0)
-		return used;
-
-	/* relocation */
-	switch (type) {
-	case PARAVIRT_PATCH_TYPE_INTRIN_LOCAL_IRQ_RESTORE: {
-		unsigned long reloc =
-			__xen_intrin_local_irq_restore_direct_reloc;
-		unsigned long reloc_offset = reloc - (unsigned long)
-			__xen_intrin_local_irq_restore_direct_start;
-		unsigned long tag = (unsigned long)sbundle + reloc_offset;
-		paravirt_patch_reloc_brl(tag, xen_check_events);
-		break;
-	}
-	default:
-		/* nothing */
-		break;
-	}
-	return used;
-}
-#endif /* ASM_SUPPOTED */
-
-const struct paravirt_patch_branch_target xen_branch_target[]
-__initconst = {
-#define PARAVIRT_BR_TARGET(name, type)			\
-	{						\
-		&xen_ ## name,				\
-		PARAVIRT_PATCH_TYPE_BR_ ## type,	\
-	}
-	PARAVIRT_BR_TARGET(switch_to, SWITCH_TO),
-	PARAVIRT_BR_TARGET(leave_syscall, LEAVE_SYSCALL),
-	PARAVIRT_BR_TARGET(work_processed_syscall, WORK_PROCESSED_SYSCALL),
-	PARAVIRT_BR_TARGET(leave_kernel, LEAVE_KERNEL),
-};
-
-static void __init
-xen_patch_branch(unsigned long tag, unsigned long type)
-{
-	__paravirt_patch_apply_branch(tag, type, xen_branch_target,
-					ARRAY_SIZE(xen_branch_target));
-}
diff --git a/arch/ia64/xen/xencomm.c b/arch/ia64/xen/xencomm.c
deleted file mode 100644
index 73d903ca2d64..000000000000
--- a/arch/ia64/xen/xencomm.c
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (C) 2006 Hollis Blanchard <hollisb@us.ibm.com>, IBM Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- */
-
-#include <linux/mm.h>
-#include <linux/err.h>
-
-static unsigned long kernel_virtual_offset;
-static int is_xencomm_initialized;
-
-/* for xen early printk. It uses console io hypercall which uses xencomm.
- * However early printk may use it before xencomm initialization.
- */
-int
-xencomm_is_initialized(void)
-{
-	return is_xencomm_initialized;
-}
-
-void
-xencomm_initialize(void)
-{
-	kernel_virtual_offset = KERNEL_START - ia64_tpa(KERNEL_START);
-	is_xencomm_initialized = 1;
-}
-
-/* Translate virtual address to physical address.  */
-unsigned long
-xencomm_vtop(unsigned long vaddr)
-{
-	struct page *page;
-	struct vm_area_struct *vma;
-
-	if (vaddr == 0)
-		return 0UL;
-
-	if (REGION_NUMBER(vaddr) == 5) {
-		pgd_t *pgd;
-		pud_t *pud;
-		pmd_t *pmd;
-		pte_t *ptep;
-
-		/* On ia64, TASK_SIZE refers to current.  It is not initialized
-		   during boot.
-		   Furthermore the kernel is relocatable and __pa() doesn't
-		   work on  addresses.  */
-		if (vaddr >= KERNEL_START
-		    && vaddr < (KERNEL_START + KERNEL_TR_PAGE_SIZE))
-			return vaddr - kernel_virtual_offset;
-
-		/* In kernel area -- virtually mapped.  */
-		pgd = pgd_offset_k(vaddr);
-		if (pgd_none(*pgd) || pgd_bad(*pgd))
-			return ~0UL;
-
-		pud = pud_offset(pgd, vaddr);
-		if (pud_none(*pud) || pud_bad(*pud))
-			return ~0UL;
-
-		pmd = pmd_offset(pud, vaddr);
-		if (pmd_none(*pmd) || pmd_bad(*pmd))
-			return ~0UL;
-
-		ptep = pte_offset_kernel(pmd, vaddr);
-		if (!ptep)
-			return ~0UL;
-
-		return (pte_val(*ptep) & _PFN_MASK) | (vaddr & ~PAGE_MASK);
-	}
-
-	if (vaddr > TASK_SIZE) {
-		/* percpu variables */
-		if (REGION_NUMBER(vaddr) == 7 &&
-		    REGION_OFFSET(vaddr) >= (1ULL << IA64_MAX_PHYS_BITS))
-			ia64_tpa(vaddr);
-
-		/* kernel address */
-		return __pa(vaddr);
-	}
-
-	/* XXX double-check (lack of) locking */
-	vma = find_extend_vma(current->mm, vaddr);
-	if (!vma)
-		return ~0UL;
-
-	/* We assume the page is modified.  */
-	page = follow_page(vma, vaddr, FOLL_WRITE | FOLL_TOUCH);
-	if (IS_ERR_OR_NULL(page))
-		return ~0UL;
-
-	return (page_to_pfn(page) << PAGE_SHIFT) | (vaddr & ~PAGE_MASK);
-}
diff --git a/arch/ia64/xen/xenivt.S b/arch/ia64/xen/xenivt.S
deleted file mode 100644
index 3e71d50584d9..000000000000
--- a/arch/ia64/xen/xenivt.S
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * arch/ia64/xen/ivt.S
- *
- * Copyright (C) 2005 Hewlett-Packard Co
- *	Dan Magenheimer <dan.magenheimer@hp.com>
- *
- * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
- *                    VA Linux Systems Japan K.K.
- *                    pv_ops.
- */
-
-#include <asm/asmmacro.h>
-#include <asm/kregs.h>
-#include <asm/pgtable.h>
-
-#include "../kernel/minstate.h"
-
-	.section .text,"ax"
-GLOBAL_ENTRY(xen_event_callback)
-	mov r31=pr		// prepare to save predicates
-	;;
-	SAVE_MIN_WITH_COVER	// uses r31; defines r2 and r3
-	;;
-	movl r3=XSI_PSR_IC
-	mov r14=1
-	;;
-	st4 [r3]=r14
-	;;
-	adds r3=8,r2		// set up second base pointer for SAVE_REST
-	srlz.i			// ensure everybody knows psr.ic is back on
-	;;
-	SAVE_REST
-	;;
-1:
-	alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group
-	add out0=16,sp		// pass pointer to pt_regs as first arg
-	;;
-	br.call.sptk.many b0=xen_evtchn_do_upcall
-	;;
-	movl r20=XSI_PSR_I_ADDR
-	;;
-	ld8 r20=[r20]
-	;;
-	adds r20=-1,r20		// vcpu_info->evtchn_upcall_pending
-	;;
-	ld1 r20=[r20]
-	;;
-	cmp.ne p6,p0=r20,r0	// if there are pending events,
-	(p6) br.spnt.few 1b	// call evtchn_do_upcall again.
-	br.sptk.many xen_leave_kernel	// we know ia64_leave_kernel is
-					// paravirtualized as xen_leave_kernel
-END(xen_event_callback)
diff --git a/arch/ia64/xen/xensetup.S b/arch/ia64/xen/xensetup.S
deleted file mode 100644
index e29519ebe2d2..000000000000
--- a/arch/ia64/xen/xensetup.S
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Support routines for Xen
- *
- * Copyright (C) 2005 Dan Magenheimer <dan.magenheimer@hp.com>
- */
-
-#include <asm/processor.h>
-#include <asm/asmmacro.h>
-#include <asm/pgtable.h>
-#include <asm/paravirt.h>
-#include <asm/xen/privop.h>
-#include <linux/elfnote.h>
-#include <linux/init.h>
-#include <xen/interface/elfnote.h>
-
-	.section .data..read_mostly
-	.align 8
-	.global xen_domain_type
-xen_domain_type:
-	data4 XEN_NATIVE_ASM
-	.previous
-
-	__INIT
-ENTRY(startup_xen)
-	// Calculate load offset.
-	// The constant, LOAD_OFFSET, can't be used because the boot
-	// loader doesn't always load to the LMA specified by the vmlinux.lds.
-	mov r9=ip	// must be the first instruction to make sure
-			// that r9 = the physical address of startup_xen.
-			// Usually r9 = startup_xen - LOAD_OFFSET
-	movl r8=startup_xen
-	;;
-	sub r9=r9,r8	// Usually r9 = -LOAD_OFFSET.
-
-	mov r10=PARAVIRT_HYPERVISOR_TYPE_XEN
-	movl r11=_start
-	;;
-	add r11=r11,r9
-	movl r8=hypervisor_type
-	;;
-	add r8=r8,r9
-	mov b0=r11
-	;;
-	st8 [r8]=r10
-	br.cond.sptk.many b0
-	;;
-END(startup_xen)
-
-	ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS,	.asciz "linux")
-	ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION,	.asciz "2.6")
-	ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION,	.asciz "xen-3.0")
-	ELFNOTE(Xen, XEN_ELFNOTE_ENTRY,		data8.ua startup_xen - LOAD_OFFSET)
-
-#define isBP	p3	// are we the Bootstrap Processor?
-
-GLOBAL_ENTRY(xen_setup_hook)
-	mov r8=XEN_PV_DOMAIN_ASM
-(isBP)	movl r9=xen_domain_type;;
-(isBP)	st4 [r9]=r8
-	movl r10=xen_ivt;;
-
-	mov cr.iva=r10
-
-	/* Set xsi base.  */
-#define FW_HYPERCALL_SET_SHARED_INFO_VA			0x600
-(isBP)	mov r2=FW_HYPERCALL_SET_SHARED_INFO_VA
-(isBP)	movl r28=XSI_BASE;;
-(isBP)	break 0x1000;;
-
-	/* setup pv_ops */
-(isBP)	mov r4=rp
-	;;
-(isBP)	br.call.sptk.many rp=xen_setup_pv_ops
-	;;
-(isBP)	mov rp=r4
-	;;
-
-	br.ret.sptk.many rp
-	;;
-END(xen_setup_hook)
diff --git a/include/xen/interface/callback.h b/include/xen/interface/callback.h
index 8c5fa0e20155..dc3193f4b581 100644
--- a/include/xen/interface/callback.h
+++ b/include/xen/interface/callback.h
@@ -36,7 +36,7 @@
  * @extra_args == Operation-specific extra arguments (NULL if none).
  */
 
-/* ia64, x86: Callback for event delivery. */
+/* x86: Callback for event delivery. */
 #define CALLBACKTYPE_event                 0
 
 /* x86: Failsafe callback when guest state cannot be restored by Xen. */
diff --git a/include/xen/interface/io/protocols.h b/include/xen/interface/io/protocols.h
index 056744b4b05e..545a14ba0bb3 100644
--- a/include/xen/interface/io/protocols.h
+++ b/include/xen/interface/io/protocols.h
@@ -3,7 +3,6 @@
 
 #define XEN_IO_PROTO_ABI_X86_32     "x86_32-abi"
 #define XEN_IO_PROTO_ABI_X86_64     "x86_64-abi"
-#define XEN_IO_PROTO_ABI_IA64       "ia64-abi"
 #define XEN_IO_PROTO_ABI_POWERPC64  "powerpc64-abi"
 #define XEN_IO_PROTO_ABI_ARM        "arm-abi"
 
@@ -11,8 +10,6 @@
 # define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_32
 #elif defined(__x86_64__)
 # define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_64
-#elif defined(__ia64__)
-# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_IA64
 #elif defined(__powerpc64__)
 # define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_POWERPC64
 #elif defined(__arm__) || defined(__aarch64__)
-- 
cgit v1.2.3


From 67905540e8b8eaf51e621cfd2ef15641d6d5b9a7 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@free-electrons.com>
Date: Thu, 7 Nov 2013 12:01:48 +0100
Subject: clocksource: Add Allwinner SoCs HS timers driver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Most of the Allwinner SoCs (at this time, all but the A10) also have a
High Speed timers that are not using the 24MHz oscillator as a source
but rather the AHB clock running much faster.

The IP is slightly different between the A10s/A13 and the one used in
the A20/A31, since the latter have 4 timers available, while the former
have only 2 of them.

[dlezcano] : Fixed conflict with b788beda "Order Kconfig options
		alphabetically"

Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Tested-by: Emilio López <emilio@elopez.com.ar>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 .../bindings/timer/allwinner,sun5i-a13-hstimer.txt |  22 +++
 arch/arm/mach-sunxi/Kconfig                        |   1 +
 drivers/clocksource/Kconfig                        |   4 +
 drivers/clocksource/Makefile                       |   1 +
 drivers/clocksource/timer-sun5i.c                  | 192 +++++++++++++++++++++
 5 files changed, 220 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/timer/allwinner,sun5i-a13-hstimer.txt
 create mode 100644 drivers/clocksource/timer-sun5i.c

(limited to 'arch')

diff --git a/Documentation/devicetree/bindings/timer/allwinner,sun5i-a13-hstimer.txt b/Documentation/devicetree/bindings/timer/allwinner,sun5i-a13-hstimer.txt
new file mode 100644
index 000000000000..7c26154b8bbb
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/allwinner,sun5i-a13-hstimer.txt
@@ -0,0 +1,22 @@
+Allwinner SoCs High Speed Timer Controller
+
+Required properties:
+
+- compatible :	should be "allwinner,sun5i-a13-hstimer" or
+		"allwinner,sun7i-a20-hstimer"
+- reg : Specifies base physical address and size of the registers.
+- interrupts :	The interrupts of these timers (2 for the sun5i IP, 4 for the sun7i
+		one)
+- clocks: phandle to the source clock (usually the AHB clock)
+
+Example:
+
+timer@01c60000 {
+	compatible = "allwinner,sun7i-a20-hstimer";
+	reg = <0x01c60000 0x1000>;
+	interrupts = <0 51 1>,
+		     <0 52 1>,
+		     <0 53 1>,
+		     <0 54 1>;
+	clocks = <&ahb1_gates 19>;
+};
diff --git a/arch/arm/mach-sunxi/Kconfig b/arch/arm/mach-sunxi/Kconfig
index c9e72c89066a..bce0d4277f71 100644
--- a/arch/arm/mach-sunxi/Kconfig
+++ b/arch/arm/mach-sunxi/Kconfig
@@ -12,3 +12,4 @@ config ARCH_SUNXI
 	select PINCTRL_SUNXI
 	select SPARSE_IRQ
 	select SUN4I_TIMER
+	select SUN5I_HSTIMER
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index bdb953e15d2a..884eeff8e32d 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -37,6 +37,10 @@ config SUN4I_TIMER
 	select CLKSRC_MMIO
 	bool
 
+config SUN5I_HSTIMER
+	select CLKSRC_MMIO
+	bool
+
 config VT8500_TIMER
 	bool
 
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index 33621efb9148..358358d87b6d 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -22,6 +22,7 @@ obj-$(CONFIG_ARCH_MOXART)	+= moxart_timer.o
 obj-$(CONFIG_ARCH_MXS)		+= mxs_timer.o
 obj-$(CONFIG_ARCH_PRIMA2)	+= timer-prima2.o
 obj-$(CONFIG_SUN4I_TIMER)	+= sun4i_timer.o
+obj-$(CONFIG_SUN5I_HSTIMER)	+= timer-sun5i.o
 obj-$(CONFIG_ARCH_TEGRA)	+= tegra20_timer.o
 obj-$(CONFIG_VT8500_TIMER)	+= vt8500_timer.o
 obj-$(CONFIG_ARCH_NSPIRE)	+= zevio-timer.o
diff --git a/drivers/clocksource/timer-sun5i.c b/drivers/clocksource/timer-sun5i.c
new file mode 100644
index 000000000000..bddc52233d2a
--- /dev/null
+++ b/drivers/clocksource/timer-sun5i.c
@@ -0,0 +1,192 @@
+/*
+ * Allwinner SoCs hstimer driver.
+ *
+ * Copyright (C) 2013 Maxime Ripard
+ *
+ * Maxime Ripard <maxime.ripard@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/clk.h>
+#include <linux/clockchips.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/irqreturn.h>
+#include <linux/sched_clock.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+
+#define TIMER_IRQ_EN_REG		0x00
+#define TIMER_IRQ_EN(val)			BIT(val)
+#define TIMER_IRQ_ST_REG		0x04
+#define TIMER_CTL_REG(val)		(0x20 * (val) + 0x10)
+#define TIMER_CTL_ENABLE			BIT(0)
+#define TIMER_CTL_RELOAD			BIT(1)
+#define TIMER_CTL_CLK_PRES(val)			(((val) & 0x7) << 4)
+#define TIMER_CTL_ONESHOT			BIT(7)
+#define TIMER_INTVAL_LO_REG(val)	(0x20 * (val) + 0x14)
+#define TIMER_INTVAL_HI_REG(val)	(0x20 * (val) + 0x18)
+#define TIMER_CNTVAL_LO_REG(val)	(0x20 * (val) + 0x1c)
+#define TIMER_CNTVAL_HI_REG(val)	(0x20 * (val) + 0x20)
+
+#define TIMER_SYNC_TICKS	3
+
+static void __iomem *timer_base;
+static u32 ticks_per_jiffy;
+
+/*
+ * When we disable a timer, we need to wait at least for 2 cycles of
+ * the timer source clock. We will use for that the clocksource timer
+ * that is already setup and runs at the same frequency than the other
+ * timers, and we never will be disabled.
+ */
+static void sun5i_clkevt_sync(void)
+{
+	u32 old = readl(timer_base + TIMER_CNTVAL_LO_REG(1));
+
+	while ((old - readl(timer_base + TIMER_CNTVAL_LO_REG(1))) < TIMER_SYNC_TICKS)
+		cpu_relax();
+}
+
+static void sun5i_clkevt_time_stop(u8 timer)
+{
+	u32 val = readl(timer_base + TIMER_CTL_REG(timer));
+	writel(val & ~TIMER_CTL_ENABLE, timer_base + TIMER_CTL_REG(timer));
+
+	sun5i_clkevt_sync();
+}
+
+static void sun5i_clkevt_time_setup(u8 timer, u32 delay)
+{
+	writel(delay, timer_base + TIMER_INTVAL_LO_REG(timer));
+}
+
+static void sun5i_clkevt_time_start(u8 timer, bool periodic)
+{
+	u32 val = readl(timer_base + TIMER_CTL_REG(timer));
+
+	if (periodic)
+		val &= ~TIMER_CTL_ONESHOT;
+	else
+		val |= TIMER_CTL_ONESHOT;
+
+	writel(val | TIMER_CTL_ENABLE | TIMER_CTL_RELOAD,
+	       timer_base + TIMER_CTL_REG(timer));
+}
+
+static void sun5i_clkevt_mode(enum clock_event_mode mode,
+			      struct clock_event_device *clk)
+{
+	switch (mode) {
+	case CLOCK_EVT_MODE_PERIODIC:
+		sun5i_clkevt_time_stop(0);
+		sun5i_clkevt_time_setup(0, ticks_per_jiffy);
+		sun5i_clkevt_time_start(0, true);
+		break;
+	case CLOCK_EVT_MODE_ONESHOT:
+		sun5i_clkevt_time_stop(0);
+		sun5i_clkevt_time_start(0, false);
+		break;
+	case CLOCK_EVT_MODE_UNUSED:
+	case CLOCK_EVT_MODE_SHUTDOWN:
+	default:
+		sun5i_clkevt_time_stop(0);
+		break;
+	}
+}
+
+static int sun5i_clkevt_next_event(unsigned long evt,
+				   struct clock_event_device *unused)
+{
+	sun5i_clkevt_time_stop(0);
+	sun5i_clkevt_time_setup(0, evt - TIMER_SYNC_TICKS);
+	sun5i_clkevt_time_start(0, false);
+
+	return 0;
+}
+
+static struct clock_event_device sun5i_clockevent = {
+	.name = "sun5i_tick",
+	.rating = 340,
+	.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
+	.set_mode = sun5i_clkevt_mode,
+	.set_next_event = sun5i_clkevt_next_event,
+};
+
+
+static irqreturn_t sun5i_timer_interrupt(int irq, void *dev_id)
+{
+	struct clock_event_device *evt = (struct clock_event_device *)dev_id;
+
+	writel(0x1, timer_base + TIMER_IRQ_ST_REG);
+	evt->event_handler(evt);
+
+	return IRQ_HANDLED;
+}
+
+static struct irqaction sun5i_timer_irq = {
+	.name = "sun5i_timer0",
+	.flags = IRQF_TIMER | IRQF_IRQPOLL,
+	.handler = sun5i_timer_interrupt,
+	.dev_id = &sun5i_clockevent,
+};
+
+static u32 sun5i_timer_sched_read(void)
+{
+	return ~readl(timer_base + TIMER_CNTVAL_LO_REG(1));
+}
+
+static void __init sun5i_timer_init(struct device_node *node)
+{
+	unsigned long rate;
+	struct clk *clk;
+	int ret, irq;
+	u32 val;
+
+	timer_base = of_iomap(node, 0);
+	if (!timer_base)
+		panic("Can't map registers");
+
+	irq = irq_of_parse_and_map(node, 0);
+	if (irq <= 0)
+		panic("Can't parse IRQ");
+
+	clk = of_clk_get(node, 0);
+	if (IS_ERR(clk))
+		panic("Can't get timer clock");
+	clk_prepare_enable(clk);
+	rate = clk_get_rate(clk);
+
+	writel(~0, timer_base + TIMER_INTVAL_LO_REG(1));
+	writel(TIMER_CTL_ENABLE | TIMER_CTL_RELOAD,
+	       timer_base + TIMER_CTL_REG(1));
+
+	setup_sched_clock(sun5i_timer_sched_read, 32, rate);
+	clocksource_mmio_init(timer_base + TIMER_CNTVAL_LO_REG(1), node->name,
+			      rate, 340, 32, clocksource_mmio_readl_down);
+
+	ticks_per_jiffy = DIV_ROUND_UP(rate, HZ);
+
+	ret = setup_irq(irq, &sun5i_timer_irq);
+	if (ret)
+		pr_warn("failed to setup irq %d\n", irq);
+
+	/* Enable timer0 interrupt */
+	val = readl(timer_base + TIMER_IRQ_EN_REG);
+	writel(val | TIMER_IRQ_EN(0), timer_base + TIMER_IRQ_EN_REG);
+
+	sun5i_clockevent.cpumask = cpu_possible_mask;
+	sun5i_clockevent.irq = irq;
+
+	clockevents_config_and_register(&sun5i_clockevent, rate,
+					TIMER_SYNC_TICKS, 0xffffffff);
+}
+CLOCKSOURCE_OF_DECLARE(sun5i_a13, "allwinner,sun5i-a13-hstimer",
+		       sun5i_timer_init);
+CLOCKSOURCE_OF_DECLARE(sun7i_a20, "allwinner,sun7i-a20-hstimer",
+		       sun5i_timer_init);
-- 
cgit v1.2.3


From f2b5002889cd2ca25d1dfe522755ade701f49044 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@free-electrons.com>
Date: Thu, 7 Nov 2013 12:01:48 +0100
Subject: ARM: sun5i: a10s: Add support for the High Speed Timers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Allwinner A10s has support for two high speed timers. Now that we
have a driver to support it, we can enable them in the device tree.

Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Tested-by: Emilio López <emilio@elopez.com.ar>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 arch/arm/boot/dts/sun5i-a10s.dtsi | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/sun5i-a10s.dtsi b/arch/arm/boot/dts/sun5i-a10s.dtsi
index 52476742a104..e674c94c7206 100644
--- a/arch/arm/boot/dts/sun5i-a10s.dtsi
+++ b/arch/arm/boot/dts/sun5i-a10s.dtsi
@@ -332,5 +332,12 @@
 			clock-frequency = <100000>;
 			status = "disabled";
 		};
+
+		timer@01c60000 {
+			compatible = "allwinner,sun5i-a13-hstimer";
+			reg = <0x01c60000 0x1000>;
+			interrupts = <82>, <83>;
+			clocks = <&ahb_gates 28>;
+		};
 	};
 };
-- 
cgit v1.2.3


From 4411902a13e6b64873dc21abafeb57db335efcf1 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@free-electrons.com>
Date: Thu, 7 Nov 2013 12:01:48 +0100
Subject: ARM: sun5i: a13: Add support for the High Speed Timers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Allwinner A13 has support for two high speed timers. Now that we
have a driver to support it, we can enable them in the device tree.

Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Tested-by: Emilio López <emilio@elopez.com.ar>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 arch/arm/boot/dts/sun5i-a13.dtsi | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/sun5i-a13.dtsi b/arch/arm/boot/dts/sun5i-a13.dtsi
index ce8ef2a45be0..1ccd75d37f49 100644
--- a/arch/arm/boot/dts/sun5i-a13.dtsi
+++ b/arch/arm/boot/dts/sun5i-a13.dtsi
@@ -273,5 +273,12 @@
 			clock-frequency = <100000>;
 			status = "disabled";
 		};
+
+		timer@01c60000 {
+			compatible = "allwinner,sun5i-a13-hstimer";
+			reg = <0x01c60000 0x1000>;
+			interrupts = <82>, <83>;
+			clocks = <&ahb_gates 28>;
+		};
 	};
 };
-- 
cgit v1.2.3


From 31f8ad387e4306ec1fb2a01c5cd0d648b5e9bff5 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@free-electrons.com>
Date: Thu, 7 Nov 2013 12:01:48 +0100
Subject: ARM: sun7i: a20: Add support for the High Speed Timers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Allwinner A20 has support for four high speed timers. Apart for the
number of timers (4 vs 2), it's basically the same logic than the high
speed timers found in the sun5i chips.

Now that we have a driver to support it, we can enable them in the
device tree.

[dlezcano] : Fixed conflict with 428abbb8 "Enable the I2C controllers"

Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Tested-by: Emilio López <emilio@elopez.com.ar>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 arch/arm/boot/dts/sun7i-a20.dtsi | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi
index e46cfedde74c..ee6cec7b0c90 100644
--- a/arch/arm/boot/dts/sun7i-a20.dtsi
+++ b/arch/arm/boot/dts/sun7i-a20.dtsi
@@ -395,6 +395,16 @@
 			status = "disabled";
 		};
 
+		hstimer@01c60000 {
+			compatible = "allwinner,sun7i-a20-hstimer";
+			reg = <0x01c60000 0x1000>;
+			interrupts = <0 81 1>,
+				     <0 82 1>,
+				     <0 83 1>,
+				     <0 84 1>;
+			clocks = <&ahb_gates 28>;
+		};
+
 		gic: interrupt-controller@01c81000 {
 			compatible = "arm,cortex-a7-gic", "arm,cortex-a15-gic";
 			reg = <0x01c81000 0x1000>,
-- 
cgit v1.2.3


From d8af4ce490e92adfa123ae79899332bd829903e7 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 12 Dec 2013 14:46:42 +0100
Subject: x86/traps: Clean up error exception handler definitions

So I was reading the exception handler generation code and got a real
headache looking at the unstructured mess that our DO_ERROR*()
generation code is today.

Make it more readable.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Link: http://lkml.kernel.org/n/tip-kuabysiykvUJpgus35lhnhvs@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/traps.c | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index b857ed890b4c..57409f6b8c62 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -211,21 +211,17 @@ dotraplinkage void do_##name(struct pt_regs *regs, long error_code)	\
 	exception_exit(prev_state);					\
 }
 
-DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error, FPE_INTDIV,
-		regs->ip)
-DO_ERROR(X86_TRAP_OF, SIGSEGV, "overflow", overflow)
-DO_ERROR(X86_TRAP_BR, SIGSEGV, "bounds", bounds)
-DO_ERROR_INFO(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN,
-		regs->ip)
-DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun",
-		coprocessor_segment_overrun)
-DO_ERROR(X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS)
-DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present)
+DO_ERROR_INFO(X86_TRAP_DE,     SIGFPE,  "divide error",			divide_error,		     FPE_INTDIV, regs->ip )
+DO_ERROR     (X86_TRAP_OF,     SIGSEGV, "overflow",			overflow					  )
+DO_ERROR     (X86_TRAP_BR,     SIGSEGV, "bounds",			bounds						  )
+DO_ERROR_INFO(X86_TRAP_UD,     SIGILL,  "invalid opcode",		invalid_op,		     ILL_ILLOPN, regs->ip )
+DO_ERROR     (X86_TRAP_OLD_MF, SIGFPE,  "coprocessor segment overrun",	coprocessor_segment_overrun			  )
+DO_ERROR     (X86_TRAP_TS,     SIGSEGV, "invalid TSS",			invalid_TSS					  )
+DO_ERROR     (X86_TRAP_NP,     SIGBUS,  "segment not present",		segment_not_present				  )
 #ifdef CONFIG_X86_32
-DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment)
+DO_ERROR     (X86_TRAP_SS,     SIGBUS,  "stack segment",		stack_segment					  )
 #endif
-DO_ERROR_INFO(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check,
-		BUS_ADRALN, 0)
+DO_ERROR_INFO(X86_TRAP_AC,     SIGBUS,  "alignment check",		alignment_check,	     BUS_ADRALN, 0	  )
 
 #ifdef CONFIG_X86_64
 /* Runs on IST stack */
-- 
cgit v1.2.3


From 919fc6e34831d1c2b58bfb5ae261dc3facc9b269 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 11 Dec 2013 13:59:11 -0800
Subject: powerpc: Full barrier for smp_mb__after_unlock_lock()

The powerpc lock acquisition sequence is as follows:

	lwarx; cmpwi; bne; stwcx.; lwsync;

Lock release is as follows:

	lwsync; stw;

If CPU 0 does a store (say, x=1) then a lock release, and CPU 1
does a lock acquisition then a load (say, r1=y), then there is
no guarantee of a full memory barrier between the store to 'x'
and the load from 'y'. To see this, suppose that CPUs 0 and 1
are hardware threads in the same core that share a store buffer,
and that CPU 2 is in some other core, and that CPU 2 does the
following:

	y = 1; sync; r2 = x;

If 'x' and 'y' are both initially zero, then the lock
acquisition and release sequences above can result in r1 and r2
both being equal to zero, which could not happen if unlock+lock
was a full barrier.

This commit therefore makes powerpc's
smp_mb__after_unlock_lock() be a full barrier.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Reviewed-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: linuxppc-dev@lists.ozlabs.org
Cc: <linux-arch@vger.kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/1386799151-2219-8-git-send-email-paulmck@linux.vnet.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/powerpc/include/asm/spinlock.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index 5f54a744dcc5..f6e78d63fb6a 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -28,6 +28,8 @@
 #include <asm/synch.h>
 #include <asm/ppc-opcode.h>
 
+#define smp_mb__after_unlock_lock()	smp_mb()  /* Full ordering for lock. */
+
 #define arch_spin_is_locked(x)		((x)->slock != 0)
 
 #ifdef CONFIG_PPC64
-- 
cgit v1.2.3


From dea24190fbfb340dea5224e32161955514bef31b Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Tue, 3 Dec 2013 10:06:29 +0100
Subject: s390/smp: only send external call ipi if needed

If the per cpu ec_mask bit of the receiving cpu is already set there is
no need to send an ipi, since a different cpu has already sent an ipi
and the receiving cpu has not yet executed the external call ipi handler.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/smp.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index dc4a53465060..86b291323c62 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -159,9 +159,9 @@ static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit)
 {
 	int order;
 
-	set_bit(ec_bit, &pcpu->ec_mask);
-	order = pcpu_running(pcpu) ?
-		SIGP_EXTERNAL_CALL : SIGP_EMERGENCY_SIGNAL;
+	if (test_and_set_bit(ec_bit, &pcpu->ec_mask))
+		return;
+	order = pcpu_running(pcpu) ? SIGP_EXTERNAL_CALL : SIGP_EMERGENCY_SIGNAL;
 	pcpu_sigp_retry(pcpu, order, 0);
 }
 
-- 
cgit v1.2.3


From 1c182a628075af7431edb8155601740867b5ae51 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Tue, 3 Dec 2013 11:09:10 +0100
Subject: s390/ptrace: simplify enable/disable single step

The user_enable_single_step() and user_disable_sindle_step() functions
are always called on the inferior, never for the currently active
process. Remove the unnecessary check for the current process and
the update_cr_regs() call from the enable/disable functions.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/ptrace.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index e65c91c591e8..c369a26d1d56 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -107,15 +107,11 @@ void update_cr_regs(struct task_struct *task)
 void user_enable_single_step(struct task_struct *task)
 {
 	set_tsk_thread_flag(task, TIF_SINGLE_STEP);
-	if (task == current)
-		update_cr_regs(task);
 }
 
 void user_disable_single_step(struct task_struct *task)
 {
 	clear_tsk_thread_flag(task, TIF_SINGLE_STEP);
-	if (task == current)
-		update_cr_regs(task);
 }
 
 /*
-- 
cgit v1.2.3


From c63badebfebacdba827ab1cc1d420fc81bd8d818 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Tue, 3 Dec 2013 14:57:18 +0100
Subject: s390: optimize control register update

It is less expensive to update control registers 0 and 2 with two
individual stctg/lctlg instructions as with a single one that spans
control register 0, 1 and 2.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/ptrace.c | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index c369a26d1d56..f6be6087a0e9 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -56,25 +56,26 @@ void update_cr_regs(struct task_struct *task)
 #ifdef CONFIG_64BIT
 	/* Take care of the enable/disable of transactional execution. */
 	if (MACHINE_HAS_TE) {
-		unsigned long cr[3], cr_new[3];
+		unsigned long cr, cr_new;
 
-		__ctl_store(cr, 0, 2);
-		cr_new[1] = cr[1];
+		__ctl_store(cr, 0, 0);
 		/* Set or clear transaction execution TXC bit 8. */
+		cr_new = cr | (1UL << 55);
 		if (task->thread.per_flags & PER_FLAG_NO_TE)
-			cr_new[0] = cr[0] & ~(1UL << 55);
-		else
-			cr_new[0] = cr[0] | (1UL << 55);
+			cr_new &= ~(1UL << 55);
+		if (cr_new != cr)
+			__ctl_load(cr, 0, 0);
 		/* Set or clear transaction execution TDC bits 62 and 63. */
-		cr_new[2] = cr[2] & ~3UL;
+		__ctl_store(cr, 2, 2);
+		cr_new = cr & ~3UL;
 		if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) {
 			if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND)
-				cr_new[2] |= 1UL;
+				cr_new |= 1UL;
 			else
-				cr_new[2] |= 2UL;
+				cr_new |= 2UL;
 		}
-		if (memcmp(&cr_new, &cr, sizeof(cr)))
-			__ctl_load(cr_new, 0, 2);
+		if (cr_new != cr)
+			__ctl_load(cr_new, 2, 2);
 	}
 #endif
 	/* Copy user specified PER registers */
-- 
cgit v1.2.3


From 96619fc1b3d06703113ab4c5cd8c15f35a42dc99 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Thu, 5 Dec 2013 12:42:09 +0100
Subject: s390/smp: reduce memory consumption of pcpu_devices array

Remove the embedded struct cpu from struct pcpu and replace it with a
pointer instead. The struct cpu now gets allocated when a new cpu gets
detected.

The size of the pcpu_devices array (NR_CPUS * sizeof(struct pcpu)) gets
reduced by nearly 120KB.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/smp.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 86b291323c62..8ceefc949e8f 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -59,7 +59,7 @@ enum {
 };
 
 struct pcpu {
-	struct cpu cpu;
+	struct cpu *cpu;
 	struct _lowcore *lowcore;	/* lowcore page(s) for the cpu */
 	unsigned long async_stack;	/* async stack for the cpu */
 	unsigned long panic_stack;	/* panic stack for the cpu */
@@ -958,7 +958,7 @@ static int smp_cpu_notify(struct notifier_block *self, unsigned long action,
 			  void *hcpu)
 {
 	unsigned int cpu = (unsigned int)(long)hcpu;
-	struct cpu *c = &pcpu_devices[cpu].cpu;
+	struct cpu *c = pcpu_devices[cpu].cpu;
 	struct device *s = &c->dev;
 	int err = 0;
 
@@ -975,10 +975,15 @@ static int smp_cpu_notify(struct notifier_block *self, unsigned long action,
 
 static int smp_add_present_cpu(int cpu)
 {
-	struct cpu *c = &pcpu_devices[cpu].cpu;
-	struct device *s = &c->dev;
+	struct device *s;
+	struct cpu *c;
 	int rc;
 
+	c = kzalloc(sizeof(*c), GFP_KERNEL);
+	if (!c)
+		return -ENOMEM;
+	pcpu_devices[cpu].cpu = c;
+	s = &c->dev;
 	c->hotpluggable = 1;
 	rc = register_cpu(c, cpu);
 	if (rc)
-- 
cgit v1.2.3


From 41932bc1c86e527f866acfcd26506da3bd20509b Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Tue, 10 Dec 2013 16:18:07 +0100
Subject: s390/compat: correct check for EFAULT in rt-signal frame creation

The return code of the __put_user call to store the rt_sigreturn
system call to the user stack if not properly checked, the err
variable is only checked before to the __put_user. Use an if
statement instead.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/compat_signal.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index 95e7ba0fbb7e..8b84bc373e94 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -412,8 +412,9 @@ static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info,
 		regs->gprs[14] = (__u64 __force) ka->sa.sa_restorer | PSW32_ADDR_AMODE;
 	} else {
 		regs->gprs[14] = (__u64 __force) frame->retcode | PSW32_ADDR_AMODE;
-		err |= __put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn,
-				  (u16 __force __user *)(frame->retcode));
+		if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn,
+			       (u16 __force __user *)(frame->retcode)))
+			goto give_sigsegv;
 	}
 
 	/* Set up backchain. */
-- 
cgit v1.2.3


From 52733e0152dad719ed6374b56fd1c33e784e44b3 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Thu, 5 Dec 2013 19:28:39 +0100
Subject: s390/sclp_early: Add function to detect sclp console capabilities

Add SCLP console detect functions to encapsulate detection of SCLP console
capabilities, for example, VT220 support.  Reuse the sclp_send/receive masks
that were stored by the most recent sclp_set_event_mask() call to prevent
unnecessary SCLP calls.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Reviewed-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/sclp.h   |  4 ++--
 drivers/s390/char/sclp_early.c | 46 +++++++++++++++++++++++++++---------------
 2 files changed, 32 insertions(+), 18 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index 2f390956c7c1..220e171413f8 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -52,8 +52,8 @@ int sclp_chp_configure(struct chp_id chpid);
 int sclp_chp_deconfigure(struct chp_id chpid);
 int sclp_chp_read_info(struct sclp_chp_info *info);
 void sclp_get_ipl_info(struct sclp_ipl_info *info);
-bool sclp_has_linemode(void);
-bool sclp_has_vt220(void);
+bool __init sclp_has_linemode(void);
+bool __init sclp_has_vt220(void);
 int sclp_pci_configure(u32 fid);
 int sclp_pci_deconfigure(u32 fid);
 int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode);
diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c
index 1af3555c096d..82f2c389b4d1 100644
--- a/drivers/s390/char/sclp_early.c
+++ b/drivers/s390/char/sclp_early.c
@@ -36,6 +36,8 @@ struct read_info_sccb {
 } __packed __aligned(PAGE_SIZE);
 
 static char sccb_early[PAGE_SIZE] __aligned(PAGE_SIZE) __initdata;
+static unsigned int sclp_con_has_vt220 __initdata;
+static unsigned int sclp_con_has_linemode __initdata;
 static unsigned long sclp_hsa_size;
 static struct sclp_ipl_info sclp_ipl_info;
 
@@ -109,26 +111,12 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb)
 
 bool __init sclp_has_linemode(void)
 {
-	struct init_sccb *sccb = (void *) &sccb_early;
-
-	if (sccb->header.response_code != 0x20)
-		return 0;
-	if (!(sccb->sclp_send_mask & (EVTYP_OPCMD_MASK | EVTYP_PMSGCMD_MASK)))
-		return 0;
-	if (!(sccb->sclp_receive_mask & (EVTYP_MSG_MASK | EVTYP_PMSGCMD_MASK)))
-		return 0;
-	return 1;
+	return !!sclp_con_has_linemode;
 }
 
 bool __init sclp_has_vt220(void)
 {
-	struct init_sccb *sccb = (void *) &sccb_early;
-
-	if (sccb->header.response_code != 0x20)
-		return 0;
-	if (sccb->sclp_send_mask & EVTYP_VT220MSG_MASK)
-		return 1;
-	return 0;
+	return !!sclp_con_has_vt220;
 }
 
 unsigned long long sclp_get_rnmax(void)
@@ -240,11 +228,37 @@ out:
 	sclp_hsa_size = size;
 }
 
+static unsigned int __init sclp_con_check_linemode(struct init_sccb *sccb)
+{
+	if (!(sccb->sclp_send_mask & (EVTYP_OPCMD_MASK | EVTYP_PMSGCMD_MASK)))
+		return 0;
+	if (!(sccb->sclp_receive_mask & (EVTYP_MSG_MASK | EVTYP_PMSGCMD_MASK)))
+		return 0;
+	return 1;
+}
+
+static void __init sclp_console_detect(struct init_sccb *sccb)
+{
+	if (sccb->header.response_code != 0x20)
+		return;
+
+	if (sccb->sclp_send_mask & EVTYP_VT220MSG_MASK)
+		sclp_con_has_vt220 = 1;
+
+	if (sclp_con_check_linemode(sccb))
+		sclp_con_has_linemode = 1;
+}
+
 void __init sclp_early_detect(void)
 {
 	void *sccb = &sccb_early;
 
 	sclp_facilities_detect(sccb);
 	sclp_hsa_size_detect(sccb);
+
+	/* Turn off SCLP event notifications.  Also save remote masks in the
+	 * sccb.  These are sufficient to detect sclp console capabilities.
+	 */
 	sclp_set_event_mask(sccb, 0, 0);
+	sclp_console_detect(sccb);
 }
-- 
cgit v1.2.3


From cf48ad83278aad39d4a0158cd085b6038b2941e6 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Wed, 11 Dec 2013 12:15:52 +0100
Subject: s390/oprofile: move hwsampler interfaces to cpu_mf.h

Extract and move the oprofile hwsampler data structures and interfaces to
the cpu_mf.h header file which contains common interface definitions
for the various CPU-measurement facilities.   This change is necessary for
a new perf PMU.

Few interface names have been revised to fit to the latest CPU-measurement
facilities documentation.  Also declare the data structures as __packed and
correct checkpatch findings.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/cpu_mf.h | 138 +++++++++++++++++++++++++++++++++++++++++
 arch/s390/oprofile/hwsampler.c |  67 +-------------------
 arch/s390/oprofile/hwsampler.h |  52 +---------------
 3 files changed, 142 insertions(+), 115 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index c879fad404c8..f6dddeaad965 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -56,6 +56,78 @@ struct cpumf_ctr_info {
 	u32   reserved2[12];
 } __packed;
 
+/* QUERY SAMPLING INFORMATION block */
+struct hws_qsi_info_block {	    /* Bit(s) */
+	unsigned int b0_13:14;	    /* 0-13: zeros			 */
+	unsigned int as:1;	    /* 14: sampling authorisation control*/
+	unsigned int b15_21:7;	    /* 15-21: zeros			 */
+	unsigned int es:1;	    /* 22: sampling enable control	 */
+	unsigned int b23_29:7;	    /* 23-29: zeros			 */
+	unsigned int cs:1;	    /* 30: sampling activation control	 */
+	unsigned int:1; 	    /* 31: reserved			 */
+	unsigned int bsdes:16;	    /* 4-5: size of basic sampling entry      */
+	unsigned int dsdes:16;	    /* 6-7: size of diagnostic sampling entry */
+	unsigned long min_sampl_rate; /* 8-15: minimum sampling interval */
+	unsigned long max_sampl_rate; /* 16-23: maximum sampling interval*/
+	unsigned long tear;	    /* 24-31: TEAR contents		 */
+	unsigned long dear;	    /* 32-39: DEAR contents		 */
+	unsigned int rsvrd0;	    /* 40-43: reserved			 */
+	unsigned int cpu_speed;     /* 44-47: CPU speed 		 */
+	unsigned long long rsvrd1;  /* 48-55: reserved			 */
+	unsigned long long rsvrd2;  /* 56-63: reserved			 */
+} __packed;
+
+/* SET SAMPLING CONTROLS request block */
+struct hws_lsctl_request_block {
+	unsigned int s:1;	    /* 0: maximum buffer indicator	 */
+	unsigned int h:1;	    /* 1: part. level reserved for VM use*/
+	unsigned long long b2_53:52;/* 2-53: zeros			 */
+	unsigned int es:1;	    /* 54: sampling enable control	 */
+	unsigned int b55_61:7;	    /* 55-61: - zeros			 */
+	unsigned int cs:1;	    /* 62: sampling activation control	 */
+	unsigned int b63:1;	    /* 63: zero 			 */
+	unsigned long interval;     /* 8-15: sampling interval		 */
+	unsigned long tear;	    /* 16-23: TEAR contents		 */
+	unsigned long dear;	    /* 24-31: DEAR contents		 */
+	/* 32-63:							 */
+	unsigned long rsvrd1;	    /* reserved 			 */
+	unsigned long rsvrd2;	    /* reserved 			 */
+	unsigned long rsvrd3;	    /* reserved 			 */
+	unsigned long rsvrd4;	    /* reserved 			 */
+} __packed;
+
+
+struct hws_data_entry {
+	unsigned int def:16;	    /* 0-15  Data Entry Format		 */
+	unsigned int R:4;	    /* 16-19 reserved			 */
+	unsigned int U:4;	    /* 20-23 Number of unique instruct.  */
+	unsigned int z:2;	    /* zeros				 */
+	unsigned int T:1;	    /* 26 PSW DAT mode			 */
+	unsigned int W:1;	    /* 27 PSW wait state		 */
+	unsigned int P:1;	    /* 28 PSW Problem state		 */
+	unsigned int AS:2;	    /* 29-30 PSW address-space control	 */
+	unsigned int I:1;	    /* 31 entry valid or invalid	 */
+	unsigned int:16;
+	unsigned int prim_asn:16;   /* primary ASN			 */
+	unsigned long long ia;	    /* Instruction Address		 */
+	unsigned long long gpp;     /* Guest Program Parameter		 */
+	unsigned long long hpp;     /* Host Program Parameter		 */
+} __packed;
+
+struct hws_trailer_entry {
+	unsigned int f:1;	    /* 0 - Block Full Indicator 	 */
+	unsigned int a:1;	    /* 1 - Alert request control	 */
+	unsigned int t:1;	    /* 2 - Timestamp format		 */
+	unsigned long long:61;	    /* 3 - 63: Reserved 		 */
+	unsigned long long overflow;	 /* 64 - sample Overflow count	      */
+	unsigned long long timestamp;	 /* 16 - time-stamp		      */
+	unsigned long long timestamp1;	 /*				      */
+	unsigned long long reserved1;	 /* 32 -Reserved		      */
+	unsigned long long reserved2;	 /*				      */
+	unsigned long long progusage1;	 /* 48 - reserved for programming use */
+	unsigned long long progusage2;	 /*				      */
+} __packed;
+
 /* Query counter information */
 static inline int qctri(struct cpumf_ctr_info *info)
 {
@@ -99,4 +171,70 @@ static inline int ecctr(u64 ctr, u64 *val)
 	return cc;
 }
 
+/* Query sampling information */
+static inline int qsi(struct hws_qsi_info_block *info)
+{
+	int cc;
+	cc = 1;
+
+	asm volatile(
+		"0:	.insn	s,0xb2860000,0(%1)\n"
+		"1:	lhi	%0,0\n"
+		"2:\n"
+		EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
+		: "=d" (cc), "+a" (info)
+		: "m" (*info)
+		: "cc", "memory");
+
+	return cc ? -EINVAL : 0;
+}
+
+/* Load sampling controls */
+static inline int lsctl(struct hws_lsctl_request_block *req)
+{
+	int cc;
+
+	cc = 1;
+	asm volatile(
+		"0:	.insn	s,0xb2870000,0(%1)\n"
+		"1:	ipm	%0\n"
+		"	srl	%0,28\n"
+		"2:\n"
+		EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
+		: "+d" (cc), "+a" (req)
+		: "m" (*req)
+		: "cc", "memory");
+
+	return cc ? -EINVAL : 0;
+}
+
+/* Sampling control helper functions */
+
+#define SDB_TE_ALERT_REQ_MASK	0x4000000000000000UL
+#define SDB_TE_BUFFER_FULL_MASK 0x8000000000000000UL
+
+/* Return pointer to trailer entry of an sample data block */
+static inline unsigned long *trailer_entry_ptr(unsigned long v)
+{
+	void *ret;
+
+	ret = (void *) v;
+	ret += PAGE_SIZE;
+	ret -= sizeof(struct hws_trailer_entry);
+
+	return (unsigned long *) ret;
+}
+
+/* Return if the entry in the sample data block table (sdbt)
+ * is a link to the next sdbt */
+static inline int is_link_entry(unsigned long *s)
+{
+	return *s & 0x1ul ? 1 : 0;
+}
+
+/* Return pointer to the linked sdbt */
+static inline unsigned long *get_next_sdbt(unsigned long *s)
+{
+	return (unsigned long *) (*s & ~0x1ul);
+}
 #endif /* _ASM_S390_CPU_MF_H */
diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c
index 231cecafc2f1..bbca76ad6e1b 100644
--- a/arch/s390/oprofile/hwsampler.c
+++ b/arch/s390/oprofile/hwsampler.c
@@ -26,9 +26,6 @@
 #define MAX_NUM_SDB 511
 #define MIN_NUM_SDB 1
 
-#define ALERT_REQ_MASK   0x4000000000000000ul
-#define BUFFER_FULL_MASK 0x8000000000000000ul
-
 DECLARE_PER_CPU(struct hws_cpu_buffer, sampler_cpu_buffer);
 
 struct hws_execute_parms {
@@ -65,43 +62,6 @@ static unsigned long interval;
 static unsigned long min_sampler_rate;
 static unsigned long max_sampler_rate;
 
-static int ssctl(void *buffer)
-{
-	int cc;
-
-	/* set in order to detect a program check */
-	cc = 1;
-
-	asm volatile(
-		"0: .insn s,0xB2870000,0(%1)\n"
-		"1: ipm %0\n"
-		"   srl %0,28\n"
-		"2:\n"
-		EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
-		: "+d" (cc), "+a" (buffer)
-		: "m" (*((struct hws_ssctl_request_block *)buffer))
-		: "cc", "memory");
-
-	return cc ? -EINVAL : 0 ;
-}
-
-static int qsi(void *buffer)
-{
-	int cc;
-	cc = 1;
-
-	asm volatile(
-		"0: .insn s,0xB2860000,0(%1)\n"
-		"1: lhi %0,0\n"
-		"2:\n"
-		EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
-		: "=d" (cc), "+a" (buffer)
-		: "m" (*((struct hws_qsi_info_block *)buffer))
-		: "cc", "memory");
-
-	return cc ? -EINVAL : 0;
-}
-
 static void execute_qsi(void *parms)
 {
 	struct hws_execute_parms *ep = parms;
@@ -113,7 +73,7 @@ static void execute_ssctl(void *parms)
 {
 	struct hws_execute_parms *ep = parms;
 
-	ep->rc = ssctl(ep->buffer);
+	ep->rc = lsctl(ep->buffer);
 }
 
 static int smp_ctl_ssctl_stop(int cpu)
@@ -214,17 +174,6 @@ static int smp_ctl_qsi(int cpu)
 	return ep.rc;
 }
 
-static inline unsigned long *trailer_entry_ptr(unsigned long v)
-{
-	void *ret;
-
-	ret = (void *)v;
-	ret += PAGE_SIZE;
-	ret -= sizeof(struct hws_trailer_entry);
-
-	return (unsigned long *) ret;
-}
-
 static void hws_ext_handler(struct ext_code ext_code,
 			    unsigned int param32, unsigned long param64)
 {
@@ -256,16 +205,6 @@ static void init_all_cpu_buffers(void)
 	}
 }
 
-static int is_link_entry(unsigned long *s)
-{
-	return *s & 0x1ul ? 1 : 0;
-}
-
-static unsigned long *get_next_sdbt(unsigned long *s)
-{
-	return (unsigned long *) (*s & ~0x1ul);
-}
-
 static int prepare_cpu_buffers(void)
 {
 	int cpu;
@@ -353,7 +292,7 @@ static int allocate_sdbt(int cpu)
 			}
 			*sdbt = sdb;
 			trailer = trailer_entry_ptr(*sdbt);
-			*trailer = ALERT_REQ_MASK;
+			*trailer = SDB_TE_ALERT_REQ_MASK;
 			sdbt++;
 			mutex_unlock(&hws_sem_oom);
 		}
@@ -829,7 +768,7 @@ static void worker_on_interrupt(unsigned int cpu)
 
 		trailer = trailer_entry_ptr(*sdbt);
 		/* leave loop if no more work to do */
-		if (!(*trailer & BUFFER_FULL_MASK)) {
+		if (!(*trailer & SDB_TE_BUFFER_FULL_MASK)) {
 			done = 1;
 			if (!hws_flush_all)
 				continue;
diff --git a/arch/s390/oprofile/hwsampler.h b/arch/s390/oprofile/hwsampler.h
index 0022e1ebfbde..a483d06f2fa7 100644
--- a/arch/s390/oprofile/hwsampler.h
+++ b/arch/s390/oprofile/hwsampler.h
@@ -9,27 +9,7 @@
 #define HWSAMPLER_H_
 
 #include <linux/workqueue.h>
-
-struct hws_qsi_info_block          /* QUERY SAMPLING information block  */
-{ /* Bit(s) */
-	unsigned int b0_13:14;      /* 0-13: zeros                       */
-	unsigned int as:1;          /* 14: sampling authorisation control*/
-	unsigned int b15_21:7;      /* 15-21: zeros                      */
-	unsigned int es:1;          /* 22: sampling enable control       */
-	unsigned int b23_29:7;      /* 23-29: zeros                      */
-	unsigned int cs:1;          /* 30: sampling activation control   */
-	unsigned int:1;             /* 31: reserved                      */
-	unsigned int bsdes:16;      /* 4-5: size of sampling entry       */
-	unsigned int:16;            /* 6-7: reserved                     */
-	unsigned long min_sampl_rate; /* 8-15: minimum sampling interval */
-	unsigned long max_sampl_rate; /* 16-23: maximum sampling interval*/
-	unsigned long tear;         /* 24-31: TEAR contents              */
-	unsigned long dear;         /* 32-39: DEAR contents              */
-	unsigned int rsvrd0;        /* 40-43: reserved                   */
-	unsigned int cpu_speed;     /* 44-47: CPU speed                  */
-	unsigned long long rsvrd1;  /* 48-55: reserved                   */
-	unsigned long long rsvrd2;  /* 56-63: reserved                   */
-};
+#include <asm/cpu_mf.h>
 
 struct hws_ssctl_request_block     /* SET SAMPLING CONTROLS req block   */
 { /* bytes 0 - 7  Bit(s) */
@@ -68,36 +48,6 @@ struct hws_cpu_buffer {
 	unsigned int stop_mode:1;
 };
 
-struct hws_data_entry {
-	unsigned int def:16;        /* 0-15  Data Entry Format           */
-	unsigned int R:4;           /* 16-19 reserved                    */
-	unsigned int U:4;           /* 20-23 Number of unique instruct.  */
-	unsigned int z:2;           /* zeros                             */
-	unsigned int T:1;           /* 26 PSW DAT mode                   */
-	unsigned int W:1;           /* 27 PSW wait state                 */
-	unsigned int P:1;           /* 28 PSW Problem state              */
-	unsigned int AS:2;          /* 29-30 PSW address-space control   */
-	unsigned int I:1;           /* 31 entry valid or invalid         */
-	unsigned int:16;
-	unsigned int prim_asn:16;   /* primary ASN                       */
-	unsigned long long ia;      /* Instruction Address               */
-	unsigned long long gpp;     /* Guest Program Parameter		 */
-	unsigned long long hpp;     /* Host Program Parameter		 */
-};
-
-struct hws_trailer_entry {
-	unsigned int f:1;           /* 0 - Block Full Indicator          */
-	unsigned int a:1;           /* 1 - Alert request control         */
-	unsigned long:62;           /* 2 - 63: Reserved                  */
-	unsigned long overflow;     /* 64 - sample Overflow count        */
-	unsigned long timestamp;    /* 16 - time-stamp                   */
-	unsigned long timestamp1;   /*                                   */
-	unsigned long reserved1;    /* 32 -Reserved                      */
-	unsigned long reserved2;    /*                                   */
-	unsigned long progusage1;   /* 48 - reserved for programming use */
-	unsigned long progusage2;   /*                                   */
-};
-
 int hwsampler_setup(void);
 int hwsampler_shutdown(void);
 int hwsampler_allocate(unsigned long sdbt, unsigned long sdb);
-- 
cgit v1.2.3


From c716832513f30430179b60ac5ffd203c53f7eb40 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Wed, 11 Dec 2013 12:44:40 +0100
Subject: s390/cpum_cf: Export event names in sysfs

Provide PMU event attributes for supported counters and export their symbolic
names to the sysfs "events" directory.

See the /sys/devices/cpum_cf/events/ directory for a list of available counters.
Note that you might require counter set authorizations for the LPAR to use them.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/perf_event.h     |  23 ++-
 arch/s390/kernel/Makefile              |   3 +-
 arch/s390/kernel/perf_cpum_cf.c        |   1 +
 arch/s390/kernel/perf_cpum_cf_events.c | 322 +++++++++++++++++++++++++++++++++
 arch/s390/kernel/perf_event.c          |  12 ++
 5 files changed, 358 insertions(+), 3 deletions(-)
 create mode 100644 arch/s390/kernel/perf_cpum_cf_events.c

(limited to 'arch')

diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index 1141fb3e7b21..34185020ae0a 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -1,11 +1,18 @@
 /*
  * Performance event support - s390 specific definitions.
  *
- * Copyright IBM Corp. 2009, 2012
+ * Copyright IBM Corp. 2009, 2013
  * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
  *	      Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
  */
 
+#ifndef _ASM_S390_PERF_EVENT_H
+#define _ASM_S390_PERF_EVENT_H
+
+#ifdef CONFIG_64BIT
+
+#include <linux/perf_event.h>
+#include <linux/device.h>
 #include <asm/cpu_mf.h>
 
 /* CPU-measurement counter facility */
@@ -15,7 +22,18 @@
 #define PMU_F_RESERVED			0x1000
 #define PMU_F_ENABLED			0x2000
 
-#ifdef CONFIG_64BIT
+/* Perf defintions for PMU event attributes in sysfs */
+extern __init const struct attribute_group **cpumf_cf_event_group(void);
+extern ssize_t cpumf_events_sysfs_show(struct device *dev,
+				       struct device_attribute *attr,
+				       char *page);
+#define EVENT_VAR(_cat, _name)		event_attr_##_cat##_##_name
+#define EVENT_PTR(_cat, _name)		(&EVENT_VAR(_cat, _name).attr.attr)
+
+#define CPUMF_EVENT_ATTR(cat, name, id)			\
+	PMU_EVENT_ATTR(name, EVENT_VAR(cat, name), id, cpumf_events_sysfs_show)
+#define CPUMF_EVENT_PTR(cat, name)	EVENT_PTR(cat, name)
+
 
 /* Perf callbacks */
 struct pt_regs;
@@ -24,3 +42,4 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs);
 #define perf_misc_flags(regs) perf_misc_flags(regs)
 
 #endif /* CONFIG_64BIT */
+#endif /* _ASM_S390_PERF_EVENT_H */
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 2403303cfed7..9f1e2adbd77e 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -60,7 +60,8 @@ obj-$(CONFIG_FTRACE_SYSCALLS)  += ftrace.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
 
 ifdef CONFIG_64BIT
-obj-$(CONFIG_PERF_EVENTS)	+= perf_event.o perf_cpum_cf.o
+obj-$(CONFIG_PERF_EVENTS)	+= perf_event.o perf_cpum_cf.o \
+						perf_cpum_cf_events.o
 obj-y				+= runtime_instr.o cache.o
 endif
 
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 1105502bf6e9..f51214c04858 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -680,6 +680,7 @@ static int __init cpumf_pmu_init(void)
 		goto out;
 	}
 
+	cpumf_pmu.attr_groups = cpumf_cf_event_group();
 	rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", PERF_TYPE_RAW);
 	if (rc) {
 		pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc);
diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c
new file mode 100644
index 000000000000..4554a4bae39e
--- /dev/null
+++ b/arch/s390/kernel/perf_cpum_cf_events.c
@@ -0,0 +1,322 @@
+/*
+ * Perf PMU sysfs events attributes for available CPU-measurement counters
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/perf_event.h>
+
+
+/* BEGIN: CPUM_CF COUNTER DEFINITIONS =================================== */
+
+CPUMF_EVENT_ATTR(cf, CPU_CYCLES, 0x0000);
+CPUMF_EVENT_ATTR(cf, INSTRUCTIONS, 0x0001);
+CPUMF_EVENT_ATTR(cf, L1I_DIR_WRITES, 0x0002);
+CPUMF_EVENT_ATTR(cf, L1I_PENALTY_CYCLES, 0x0003);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_CPU_CYCLES, 0x0020);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_INSTRUCTIONS, 0x0021);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1I_DIR_WRITES, 0x0022);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1I_PENALTY_CYCLES, 0x0023);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1D_DIR_WRITES, 0x0024);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1D_PENALTY_CYCLES, 0x0025);
+CPUMF_EVENT_ATTR(cf, L1D_DIR_WRITES, 0x0004);
+CPUMF_EVENT_ATTR(cf, L1D_PENALTY_CYCLES, 0x0005);
+CPUMF_EVENT_ATTR(cf, PRNG_FUNCTIONS, 0x0040);
+CPUMF_EVENT_ATTR(cf, PRNG_CYCLES, 0x0041);
+CPUMF_EVENT_ATTR(cf, PRNG_BLOCKED_FUNCTIONS, 0x0042);
+CPUMF_EVENT_ATTR(cf, PRNG_BLOCKED_CYCLES, 0x0043);
+CPUMF_EVENT_ATTR(cf, SHA_FUNCTIONS, 0x0044);
+CPUMF_EVENT_ATTR(cf, SHA_CYCLES, 0x0045);
+CPUMF_EVENT_ATTR(cf, SHA_BLOCKED_FUNCTIONS, 0x0046);
+CPUMF_EVENT_ATTR(cf, SHA_BLOCKED_CYCLES, 0x0047);
+CPUMF_EVENT_ATTR(cf, DEA_FUNCTIONS, 0x0048);
+CPUMF_EVENT_ATTR(cf, DEA_CYCLES, 0x0049);
+CPUMF_EVENT_ATTR(cf, DEA_BLOCKED_FUNCTIONS, 0x004a);
+CPUMF_EVENT_ATTR(cf, DEA_BLOCKED_CYCLES, 0x004b);
+CPUMF_EVENT_ATTR(cf, AES_FUNCTIONS, 0x004c);
+CPUMF_EVENT_ATTR(cf, AES_CYCLES, 0x004d);
+CPUMF_EVENT_ATTR(cf, AES_BLOCKED_FUNCTIONS, 0x004e);
+CPUMF_EVENT_ATTR(cf, AES_BLOCKED_CYCLES, 0x004f);
+CPUMF_EVENT_ATTR(cf_z10, L1I_L2_SOURCED_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z10, L1D_L2_SOURCED_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z10, L1I_L3_LOCAL_WRITES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z10, L1D_L3_LOCAL_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z10, L1I_L3_REMOTE_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_z10, L1D_L3_REMOTE_WRITES, 0x0085);
+CPUMF_EVENT_ATTR(cf_z10, L1D_LMEM_SOURCED_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z10, L1I_LMEM_SOURCED_WRITES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z10, L1D_RO_EXCL_WRITES, 0x0088);
+CPUMF_EVENT_ATTR(cf_z10, L1I_CACHELINE_INVALIDATES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z10, ITLB1_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z10, DTLB1_WRITES, 0x008b);
+CPUMF_EVENT_ATTR(cf_z10, TLB2_PTE_WRITES, 0x008c);
+CPUMF_EVENT_ATTR(cf_z10, TLB2_CRSTE_WRITES, 0x008d);
+CPUMF_EVENT_ATTR(cf_z10, TLB2_CRSTE_HPAGE_WRITES, 0x008e);
+CPUMF_EVENT_ATTR(cf_z10, ITLB1_MISSES, 0x0091);
+CPUMF_EVENT_ATTR(cf_z10, DTLB1_MISSES, 0x0092);
+CPUMF_EVENT_ATTR(cf_z10, L2C_STORES_SENT, 0x0093);
+CPUMF_EVENT_ATTR(cf_z196, L1D_L2_SOURCED_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z196, L1I_L2_SOURCED_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z196, DTLB1_MISSES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z196, ITLB1_MISSES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z196, L2C_STORES_SENT, 0x0085);
+CPUMF_EVENT_ATTR(cf_z196, L1D_OFFBOOK_L3_SOURCED_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z196, L1D_ONBOOK_L4_SOURCED_WRITES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z196, L1I_ONBOOK_L4_SOURCED_WRITES, 0x0088);
+CPUMF_EVENT_ATTR(cf_z196, L1D_RO_EXCL_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z196, L1D_OFFBOOK_L4_SOURCED_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z196, L1I_OFFBOOK_L4_SOURCED_WRITES, 0x008b);
+CPUMF_EVENT_ATTR(cf_z196, DTLB1_HPAGE_WRITES, 0x008c);
+CPUMF_EVENT_ATTR(cf_z196, L1D_LMEM_SOURCED_WRITES, 0x008d);
+CPUMF_EVENT_ATTR(cf_z196, L1I_LMEM_SOURCED_WRITES, 0x008e);
+CPUMF_EVENT_ATTR(cf_z196, L1I_OFFBOOK_L3_SOURCED_WRITES, 0x008f);
+CPUMF_EVENT_ATTR(cf_z196, DTLB1_WRITES, 0x0090);
+CPUMF_EVENT_ATTR(cf_z196, ITLB1_WRITES, 0x0091);
+CPUMF_EVENT_ATTR(cf_z196, TLB2_PTE_WRITES, 0x0092);
+CPUMF_EVENT_ATTR(cf_z196, TLB2_CRSTE_HPAGE_WRITES, 0x0093);
+CPUMF_EVENT_ATTR(cf_z196, TLB2_CRSTE_WRITES, 0x0094);
+CPUMF_EVENT_ATTR(cf_z196, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0096);
+CPUMF_EVENT_ATTR(cf_z196, L1D_OFFCHIP_L3_SOURCED_WRITES, 0x0098);
+CPUMF_EVENT_ATTR(cf_z196, L1I_ONCHIP_L3_SOURCED_WRITES, 0x0099);
+CPUMF_EVENT_ATTR(cf_z196, L1I_OFFCHIP_L3_SOURCED_WRITES, 0x009b);
+CPUMF_EVENT_ATTR(cf_zec12, DTLB1_MISSES, 0x0080);
+CPUMF_EVENT_ATTR(cf_zec12, ITLB1_MISSES, 0x0081);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_L2I_SOURCED_WRITES, 0x0082);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_L2I_SOURCED_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_L2D_SOURCED_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_zec12, DTLB1_WRITES, 0x0085);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_LMEM_SOURCED_WRITES, 0x0087);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_LMEM_SOURCED_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_RO_EXCL_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_zec12, DTLB1_HPAGE_WRITES, 0x008b);
+CPUMF_EVENT_ATTR(cf_zec12, ITLB1_WRITES, 0x008c);
+CPUMF_EVENT_ATTR(cf_zec12, TLB2_PTE_WRITES, 0x008d);
+CPUMF_EVENT_ATTR(cf_zec12, TLB2_CRSTE_HPAGE_WRITES, 0x008e);
+CPUMF_EVENT_ATTR(cf_zec12, TLB2_CRSTE_WRITES, 0x008f);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0090);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES, 0x0091);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES, 0x0092);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_ONBOOK_L4_SOURCED_WRITES, 0x0093);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L4_SOURCED_WRITES, 0x0094);
+CPUMF_EVENT_ATTR(cf_zec12, TX_NC_TEND, 0x0095);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES_IV, 0x0096);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES_IV, 0x0097);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES_IV, 0x0098);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES, 0x0099);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES, 0x009a);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES, 0x009b);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_ONBOOK_L4_SOURCED_WRITES, 0x009c);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L4_SOURCED_WRITES, 0x009d);
+CPUMF_EVENT_ATTR(cf_zec12, TX_C_TEND, 0x009e);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES_IV, 0x009f);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES_IV, 0x00a0);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES_IV, 0x00a1);
+CPUMF_EVENT_ATTR(cf_zec12, TX_NC_TABORT, 0x00b1);
+CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_NO_SPECIAL, 0x00b2);
+CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_SPECIAL, 0x00b3);
+
+static struct attribute *cpumcf_pmu_event_attr[] = {
+	CPUMF_EVENT_PTR(cf, CPU_CYCLES),
+	CPUMF_EVENT_PTR(cf, INSTRUCTIONS),
+	CPUMF_EVENT_PTR(cf, L1I_DIR_WRITES),
+	CPUMF_EVENT_PTR(cf, L1I_PENALTY_CYCLES),
+	CPUMF_EVENT_PTR(cf, PROBLEM_STATE_CPU_CYCLES),
+	CPUMF_EVENT_PTR(cf, PROBLEM_STATE_INSTRUCTIONS),
+	CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1I_DIR_WRITES),
+	CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1I_PENALTY_CYCLES),
+	CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1D_DIR_WRITES),
+	CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1D_PENALTY_CYCLES),
+	CPUMF_EVENT_PTR(cf, L1D_DIR_WRITES),
+	CPUMF_EVENT_PTR(cf, L1D_PENALTY_CYCLES),
+	CPUMF_EVENT_PTR(cf, PRNG_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf, PRNG_CYCLES),
+	CPUMF_EVENT_PTR(cf, PRNG_BLOCKED_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf, PRNG_BLOCKED_CYCLES),
+	CPUMF_EVENT_PTR(cf, SHA_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf, SHA_CYCLES),
+	CPUMF_EVENT_PTR(cf, SHA_BLOCKED_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf, SHA_BLOCKED_CYCLES),
+	CPUMF_EVENT_PTR(cf, DEA_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf, DEA_CYCLES),
+	CPUMF_EVENT_PTR(cf, DEA_BLOCKED_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf, DEA_BLOCKED_CYCLES),
+	CPUMF_EVENT_PTR(cf, AES_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf, AES_CYCLES),
+	CPUMF_EVENT_PTR(cf, AES_BLOCKED_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf, AES_BLOCKED_CYCLES),
+	NULL,
+};
+
+static struct attribute *cpumcf_z10_pmu_event_attr[] __initdata = {
+	CPUMF_EVENT_PTR(cf_z10, L1I_L2_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1D_L2_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1I_L3_LOCAL_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1D_L3_LOCAL_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1I_L3_REMOTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1D_L3_REMOTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1D_LMEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1I_LMEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1D_RO_EXCL_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1I_CACHELINE_INVALIDATES),
+	CPUMF_EVENT_PTR(cf_z10, ITLB1_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, DTLB1_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, TLB2_PTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, TLB2_CRSTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, TLB2_CRSTE_HPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, ITLB1_MISSES),
+	CPUMF_EVENT_PTR(cf_z10, DTLB1_MISSES),
+	CPUMF_EVENT_PTR(cf_z10, L2C_STORES_SENT),
+	NULL,
+};
+
+static struct attribute *cpumcf_z196_pmu_event_attr[] __initdata = {
+	CPUMF_EVENT_PTR(cf_z196, L1D_L2_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1I_L2_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, DTLB1_MISSES),
+	CPUMF_EVENT_PTR(cf_z196, ITLB1_MISSES),
+	CPUMF_EVENT_PTR(cf_z196, L2C_STORES_SENT),
+	CPUMF_EVENT_PTR(cf_z196, L1D_OFFBOOK_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1D_ONBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1I_ONBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1D_RO_EXCL_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1D_OFFBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1I_OFFBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, DTLB1_HPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1D_LMEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1I_LMEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1I_OFFBOOK_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, DTLB1_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, ITLB1_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, TLB2_PTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, TLB2_CRSTE_HPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, TLB2_CRSTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1D_ONCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1D_OFFCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1I_ONCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1I_OFFCHIP_L3_SOURCED_WRITES),
+	NULL,
+};
+
+static struct attribute *cpumcf_zec12_pmu_event_attr[] __initdata = {
+	CPUMF_EVENT_PTR(cf_zec12, DTLB1_MISSES),
+	CPUMF_EVENT_PTR(cf_zec12, ITLB1_MISSES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_L2I_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_L2I_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_L2D_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, DTLB1_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_LMEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_LMEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_RO_EXCL_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, DTLB1_HPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, ITLB1_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, TLB2_PTE_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, TLB2_CRSTE_HPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, TLB2_CRSTE_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_ONBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, TX_NC_TEND),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_ONBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, TX_C_TEND),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_zec12, TX_NC_TABORT),
+	CPUMF_EVENT_PTR(cf_zec12, TX_C_TABORT_NO_SPECIAL),
+	CPUMF_EVENT_PTR(cf_zec12, TX_C_TABORT_SPECIAL),
+	NULL,
+};
+
+/* END: CPUM_CF COUNTER DEFINITIONS ===================================== */
+
+static struct attribute_group cpumsf_pmu_events_group = {
+	.name = "events",
+	.attrs = cpumcf_pmu_event_attr,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-63");
+
+static struct attribute *cpumsf_pmu_format_attr[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+
+static struct attribute_group cpumsf_pmu_format_group = {
+	.name = "format",
+	.attrs = cpumsf_pmu_format_attr,
+};
+
+static const struct attribute_group *cpumsf_pmu_attr_groups[] = {
+	&cpumsf_pmu_events_group,
+	&cpumsf_pmu_format_group,
+	NULL,
+};
+
+
+static __init struct attribute **merge_attr(struct attribute **a,
+					    struct attribute **b)
+{
+	struct attribute **new;
+	int j, i;
+
+	for (j = 0; a[j]; j++)
+		;
+	for (i = 0; b[i]; i++)
+		j++;
+	j++;
+
+	new = kmalloc(sizeof(struct attribute *) * j, GFP_KERNEL);
+	if (!new)
+		return NULL;
+	j = 0;
+	for (i = 0; a[i]; i++)
+		new[j++] = a[i];
+	for (i = 0; b[i]; i++)
+		new[j++] = b[i];
+	new[j] = NULL;
+
+	return new;
+}
+
+__init const struct attribute_group **cpumf_cf_event_group(void)
+{
+	struct attribute **combined, **model;
+	struct cpuid cpu_id;
+
+	get_cpu_id(&cpu_id);
+	switch (cpu_id.machine) {
+	case 0x2097:
+	case 0x2098:
+		model = cpumcf_z10_pmu_event_attr;
+		break;
+	case 0x2817:
+	case 0x2818:
+		model = cpumcf_z196_pmu_event_attr;
+		break;
+	case 0x2827:
+	case 0x2828:
+		model = cpumcf_zec12_pmu_event_attr;
+		break;
+	default:
+		model = NULL;
+		break;
+	};
+
+	if (!model)
+		goto out;
+
+	combined = merge_attr(cpumcf_pmu_event_attr, model);
+	if (combined)
+		cpumsf_pmu_events_group.attrs = combined;
+out:
+	return cpumsf_pmu_attr_groups;
+}
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index 2343c218b8f9..4c1d336ce941 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -16,6 +16,7 @@
 #include <linux/kvm_host.h>
 #include <linux/percpu.h>
 #include <linux/export.h>
+#include <linux/sysfs.h>
 #include <asm/irq.h>
 #include <asm/cpu_mf.h>
 #include <asm/lowcore.h>
@@ -172,3 +173,14 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry,
 	__store_trace(entry, head, S390_lowcore.thread_info,
 		      S390_lowcore.thread_info + THREAD_SIZE);
 }
+
+/* Perf defintions for PMU event attributes in sysfs */
+ssize_t cpumf_events_sysfs_show(struct device *dev,
+				struct device_attribute *attr, char *page)
+{
+	struct perf_pmu_events_attr *pmu_attr;
+
+	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+	return sprintf(page, "event=0x%04llx,name=%s\n",
+		       pmu_attr->id, attr->attr.name);
+}
-- 
cgit v1.2.3


From 8c069ff4bd6063a3f15e606c882e03f75c7e7711 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Thu, 12 Dec 2013 16:32:47 +0100
Subject: s390/perf: add support for the CPU-Measurement Sampling Facility

Introduce a perf PMU, "cpum_sf", to support the CPU-Measurement
Sampling Facility.  You can control the sampling facility through
this perf PMU interfaces.  Perf sampling events are created for
hardware samples.

For details about the CPU-Measurement Sampling Facility, see
"The Load-Program-Parameter and the CPU-Measurement Facilities" (SA23-2260).

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/cpu_mf.h     |   14 +
 arch/s390/include/asm/perf_event.h |   17 +-
 arch/s390/kernel/Makefile          |    2 +-
 arch/s390/kernel/perf_cpum_sf.c    | 1024 ++++++++++++++++++++++++++++++++++++
 arch/s390/kernel/perf_event.c      |   42 +-
 5 files changed, 1086 insertions(+), 13 deletions(-)
 create mode 100644 arch/s390/kernel/perf_cpum_sf.c

(limited to 'arch')

diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index f6dddeaad965..d707abc26157 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -210,6 +210,20 @@ static inline int lsctl(struct hws_lsctl_request_block *req)
 
 /* Sampling control helper functions */
 
+#include <linux/time.h>
+
+static inline unsigned long freq_to_sample_rate(struct hws_qsi_info_block *qsi,
+						unsigned long freq)
+{
+	return (USEC_PER_SEC / freq) * qsi->cpu_speed;
+}
+
+static inline unsigned long sample_rate_to_freq(struct hws_qsi_info_block *qsi,
+						unsigned long rate)
+{
+	return USEC_PER_SEC * qsi->cpu_speed / rate;
+}
+
 #define SDB_TE_ALERT_REQ_MASK	0x4000000000000000UL
 #define SDB_TE_BUFFER_FULL_MASK 0x8000000000000000UL
 
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index 34185020ae0a..b4eea25f379e 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -15,12 +15,13 @@
 #include <linux/device.h>
 #include <asm/cpu_mf.h>
 
-/* CPU-measurement counter facility */
-#define PERF_CPUM_CF_MAX_CTR		256
-
 /* Per-CPU flags for PMU states */
 #define PMU_F_RESERVED			0x1000
 #define PMU_F_ENABLED			0x2000
+#define PMU_F_IN_USE			0x4000
+#define PMU_F_ERR_IBE			0x0100
+#define PMU_F_ERR_LSDA			0x0200
+#define PMU_F_ERR_MASK			(PMU_F_ERR_IBE|PMU_F_ERR_LSDA)
 
 /* Perf defintions for PMU event attributes in sysfs */
 extern __init const struct attribute_group **cpumf_cf_event_group(void);
@@ -41,5 +42,15 @@ extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
 extern unsigned long perf_misc_flags(struct pt_regs *regs);
 #define perf_misc_flags(regs) perf_misc_flags(regs)
 
+/* Perf PMU definitions for the counter facility */
+#define PERF_CPUM_CF_MAX_CTR		256
+
+/* Perf PMU definitions for the sampling facility */
+#define PERF_CPUM_SF_MAX_CTR		1
+#define PERF_EVENT_CPUM_SF		0xB0000UL	/* Raw event ID */
+
+#define TEAR_REG(hwc)		((hwc)->last_tag)
+#define SAMPL_RATE(hwc)		((hwc)->event_base)
+
 #endif /* CONFIG_64BIT */
 #endif /* _ASM_S390_PERF_EVENT_H */
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 9f1e2adbd77e..1b3ac09c11b6 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -60,7 +60,7 @@ obj-$(CONFIG_FTRACE_SYSCALLS)  += ftrace.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
 
 ifdef CONFIG_64BIT
-obj-$(CONFIG_PERF_EVENTS)	+= perf_event.o perf_cpum_cf.o \
+obj-$(CONFIG_PERF_EVENTS)	+= perf_event.o perf_cpum_cf.o perf_cpum_sf.o \
 						perf_cpum_cf_events.o
 obj-y				+= runtime_instr.o cache.o
 endif
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
new file mode 100644
index 000000000000..141eca0917f4
--- /dev/null
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -0,0 +1,1024 @@
+/*
+ * Performance event support for the System z CPU-measurement Sampling Facility
+ *
+ * Copyright IBM Corp. 2013
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+#define KMSG_COMPONENT	"cpum_sf"
+#define pr_fmt(fmt)	KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/perf_event.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/export.h>
+#include <asm/cpu_mf.h>
+#include <asm/irq.h>
+#include <asm/debug.h>
+#include <asm/timex.h>
+
+/* Minimum number of sample-data-block-tables:
+ * At least one table is required for the sampling buffer structure.
+ * A single table contains up to 511 pointers to sample-data-blocks.
+ */
+#define CPUM_SF_MIN_SDBT    1
+
+/* Minimum number of sample-data-blocks:
+ * The minimum designates a single page for sample-data-block, i.e.,
+ * up to 126 sample-data-blocks with a size of 32 bytes (bsdes).
+ */
+#define CPUM_SF_MIN_SDB	    126
+
+/* Maximum number of sample-data-blocks:
+ * The maximum number designates approx. 256K per CPU including
+ * the given number of sample-data-blocks and taking the number
+ * of sample-data-block tables into account.
+ *
+ * Later, this number can be increased for extending the sampling
+ * buffer, for example, by factor 2 (512K) or 4 (1M).
+ */
+#define CPUM_SF_MAX_SDB	    6471
+
+struct sf_buffer {
+	unsigned long	 sdbt;	    /* Sample-data-block-table origin */
+	/* buffer characteristics (required for buffer increments) */
+	unsigned long num_sdb;	    /* Number of sample-data-blocks */
+	unsigned long	 tail;	    /* last sample-data-block-table */
+};
+
+struct cpu_hw_sf {
+	/* CPU-measurement sampling information block */
+	struct hws_qsi_info_block qsi;
+	struct hws_lsctl_request_block lsctl;
+	struct sf_buffer sfb;	    /* Sampling buffer */
+	unsigned int flags;	    /* Status flags */
+	struct perf_event *event;   /* Scheduled perf event */
+};
+static DEFINE_PER_CPU(struct cpu_hw_sf, cpu_hw_sf);
+
+/* Debug feature */
+static debug_info_t *sfdbg;
+
+/*
+ * sf_buffer_available() - Check for an allocated sampling buffer
+ */
+static int sf_buffer_available(struct cpu_hw_sf *cpuhw)
+{
+	return (cpuhw->sfb.sdbt) ? 1 : 0;
+}
+
+/*
+ * deallocate sampling facility buffer
+ */
+static void free_sampling_buffer(struct sf_buffer *sfb)
+{
+	unsigned long sdbt, *curr;
+
+	if (!sfb->sdbt)
+		return;
+
+	sdbt = sfb->sdbt;
+	curr = (unsigned long *) sdbt;
+
+	/* we'll free the SDBT after all SDBs are processed... */
+	while (1) {
+		if (!*curr || !sdbt)
+			break;
+
+		/* watch for link entry reset if found */
+		if (is_link_entry(curr)) {
+			curr = get_next_sdbt(curr);
+			if (sdbt)
+				free_page(sdbt);
+
+			/* we are done if we reach the origin */
+			if ((unsigned long) curr == sfb->sdbt)
+				break;
+			else
+				sdbt = (unsigned long) curr;
+		} else {
+			/* process SDB pointer */
+			if (*curr) {
+				free_page(*curr);
+				curr++;
+			}
+		}
+	}
+
+	debug_sprintf_event(sfdbg, 5,
+			    "free_sampling_buffer: freed sdbt=%0lx\n", sfb->sdbt);
+	memset(sfb, 0, sizeof(*sfb));
+}
+
+/*
+ * allocate_sampling_buffer() - allocate sampler memory
+ *
+ * Allocates and initializes a sampling buffer structure using the
+ * specified number of sample-data-blocks (SDB).  For each allocation,
+ * a 4K page is used.  The number of sample-data-block-tables (SDBT)
+ * are calculated from SDBs.
+ * Also set the ALERT_REQ mask in each SDBs trailer.
+ *
+ * Returns zero on success, non-zero otherwise.
+ */
+static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb)
+{
+	int j, k, rc;
+	unsigned long *sdbt, *tail, *trailer;
+	unsigned long sdb;
+	unsigned long num_sdbt, sdb_per_table;
+
+	if (sfb->sdbt)
+		return -EINVAL;
+	sfb->num_sdb = 0;
+
+	/* Compute the number of required sample-data-block-tables (SDBT) */
+	num_sdbt = num_sdb / ((PAGE_SIZE - 8) / 8);
+	if (num_sdbt < CPUM_SF_MIN_SDBT)
+		num_sdbt = CPUM_SF_MIN_SDBT;
+	sdb_per_table = (PAGE_SIZE - 8) / 8;
+
+	debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: num_sdbt=%lu "
+			    "num_sdb=%lu sdb_per_table=%lu\n",
+			    num_sdbt, num_sdb, sdb_per_table);
+	sdbt = NULL;
+	tail = sdbt;
+
+	for (j = 0; j < num_sdbt; j++) {
+		sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL);
+		if (!sdbt) {
+			rc = -ENOMEM;
+			goto allocate_sdbt_error;
+		}
+
+		/* save origin of sample-data-block-table */
+		if (!sfb->sdbt)
+			sfb->sdbt = (unsigned long) sdbt;
+
+		/* link current page to tail of chain */
+		if (tail)
+			*tail = (unsigned long)(void *) sdbt + 1;
+
+		for (k = 0; k < num_sdb && k < sdb_per_table; k++) {
+			/* get and set SDB page */
+			sdb = get_zeroed_page(GFP_KERNEL);
+			if (!sdb) {
+				rc = -ENOMEM;
+				goto allocate_sdbt_error;
+			}
+			*sdbt = sdb;
+			trailer = trailer_entry_ptr(*sdbt);
+			*trailer = SDB_TE_ALERT_REQ_MASK;
+			sdbt++;
+		}
+		num_sdb -= k;
+		sfb->num_sdb += k;	/* count allocated sdb's */
+		tail = sdbt;
+	}
+
+	rc = 0;
+	if (tail)
+		*tail = sfb->sdbt + 1;
+	sfb->tail = (unsigned long) (void *)tail;
+
+allocate_sdbt_error:
+	if (rc)
+		free_sampling_buffer(sfb);
+	else
+		debug_sprintf_event(sfdbg, 4,
+			"alloc_sampling_buffer: tear=%0lx dear=%0lx\n",
+			sfb->sdbt, *(unsigned long *) sfb->sdbt);
+	return rc;
+}
+
+static int allocate_sdbt(struct cpu_hw_sf *cpuhw, const struct hw_perf_event *hwc)
+{
+	unsigned long n_sdb, freq;
+	unsigned long factor;
+
+	/* Calculate sampling buffers using 4K pages
+	 *
+	 *    1. Use frequency as input.  The samping buffer is designed for
+	 *	 a complete second.  This can be adjusted through the "factor"
+	 *	 variable.
+	 *	 In any case, alloc_sampling_buffer() sets the Alert Request
+	 *	 Control indicator to trigger measurement-alert to harvest
+	 *	 sample-data-blocks (sdb).
+	 *
+	 *    2. Compute the number of sample-data-blocks and ensure a minimum
+	 *	 of CPUM_SF_MIN_SDB.  Also ensure the upper limit does not
+	 *	 exceed CPUM_SF_MAX_SDB.  See also the remarks for these
+	 *	 symbolic constants.
+	 *
+	 *    3. Compute number of pages used for the sample-data-block-table
+	 *	 and ensure a minimum of CPUM_SF_MIN_SDBT (at minimum one table
+	 *	 to manage up to 511 sample-data-blocks).
+	 */
+	freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc));
+	factor = 1;
+	n_sdb = DIV_ROUND_UP(freq, factor * ((PAGE_SIZE-64) / cpuhw->qsi.bsdes));
+	if (n_sdb < CPUM_SF_MIN_SDB)
+		n_sdb = CPUM_SF_MIN_SDB;
+
+	/* Return if there is already a sampling buffer allocated.
+	 * XXX Remove this later and check number of available and
+	 * required sdb's and, if necessary, increase the sampling buffer.
+	 */
+	if (sf_buffer_available(cpuhw))
+		return 0;
+
+	debug_sprintf_event(sfdbg, 3,
+			    "allocate_sdbt: rate=%lu f=%lu sdb=%lu/%i cpuhw=%p\n",
+			    SAMPL_RATE(hwc), freq, n_sdb, CPUM_SF_MAX_SDB, cpuhw);
+
+	return alloc_sampling_buffer(&cpuhw->sfb,
+			       min_t(unsigned long, n_sdb, CPUM_SF_MAX_SDB));
+}
+
+
+/* Number of perf events counting hardware events */
+static atomic_t num_events;
+/* Used to avoid races in calling reserve/release_cpumf_hardware */
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+/*
+ * sf_disable() - Switch off sampling facility
+ */
+static int sf_disable(void)
+{
+	struct hws_lsctl_request_block sreq;
+
+	memset(&sreq, 0, sizeof(sreq));
+	return lsctl(&sreq);
+}
+
+
+#define PMC_INIT      0
+#define PMC_RELEASE   1
+static void setup_pmc_cpu(void *flags)
+{
+	int err;
+	struct cpu_hw_sf *cpusf = &__get_cpu_var(cpu_hw_sf);
+
+	/* XXX Improve error handling and pass a flag in the *flags
+	 *     variable to indicate failures.  Alternatively, ignore
+	 *     (print) errors here and let the PMU functions fail if
+	 *     the per-cpu PMU_F_RESERVED flag is not.
+	 */
+	err = 0;
+	switch (*((int *) flags)) {
+	case PMC_INIT:
+		memset(cpusf, 0, sizeof(*cpusf));
+		err = qsi(&cpusf->qsi);
+		if (err)
+			break;
+		cpusf->flags |= PMU_F_RESERVED;
+		err = sf_disable();
+		if (err)
+			pr_err("Switching off the sampling facility failed "
+			       "with rc=%i\n", err);
+		debug_sprintf_event(sfdbg, 5,
+				    "setup_pmc_cpu: initialized: cpuhw=%p\n", cpusf);
+		break;
+	case PMC_RELEASE:
+		cpusf->flags &= ~PMU_F_RESERVED;
+		err = sf_disable();
+		if (err) {
+			pr_err("Switching off the sampling facility failed "
+			       "with rc=%i\n", err);
+		} else {
+			if (cpusf->sfb.sdbt)
+				free_sampling_buffer(&cpusf->sfb);
+		}
+		debug_sprintf_event(sfdbg, 5,
+				    "setup_pmc_cpu: released: cpuhw=%p\n", cpusf);
+		break;
+	}
+}
+
+static void release_pmc_hardware(void)
+{
+	int flags = PMC_RELEASE;
+
+	irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+	on_each_cpu(setup_pmc_cpu, &flags, 1);
+}
+
+static int reserve_pmc_hardware(void)
+{
+	int flags = PMC_INIT;
+
+	on_each_cpu(setup_pmc_cpu, &flags, 1);
+	irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+
+	return 0;
+}
+
+static void hw_perf_event_destroy(struct perf_event *event)
+{
+	/* Release PMC if this is the last perf event */
+	if (!atomic_add_unless(&num_events, -1, 1)) {
+		mutex_lock(&pmc_reserve_mutex);
+		if (atomic_dec_return(&num_events) == 0)
+			release_pmc_hardware();
+		mutex_unlock(&pmc_reserve_mutex);
+	}
+}
+
+static void hw_init_period(struct hw_perf_event *hwc, u64 period)
+{
+	hwc->sample_period = period;
+	hwc->last_period = hwc->sample_period;
+	local64_set(&hwc->period_left, hwc->sample_period);
+}
+
+static void hw_reset_registers(struct hw_perf_event *hwc,
+			       unsigned long sdbt_origin)
+{
+	TEAR_REG(hwc) = sdbt_origin;	      /* (re)set to first sdb table */
+}
+
+static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si,
+				   unsigned long rate)
+{
+	if (rate < si->min_sampl_rate)
+		return si->min_sampl_rate;
+	if (rate > si->max_sampl_rate)
+		return si->max_sampl_rate;
+	return rate;
+}
+
+static int __hw_perf_event_init(struct perf_event *event)
+{
+	struct cpu_hw_sf *cpuhw;
+	struct hws_qsi_info_block si;
+	struct perf_event_attr *attr = &event->attr;
+	struct hw_perf_event *hwc = &event->hw;
+	unsigned long rate;
+	int cpu, err;
+
+	/* Reserve CPU-measurement sampling facility */
+	err = 0;
+	if (!atomic_inc_not_zero(&num_events)) {
+		mutex_lock(&pmc_reserve_mutex);
+		if (atomic_read(&num_events) == 0 && reserve_pmc_hardware())
+			err = -EBUSY;
+		else
+			atomic_inc(&num_events);
+		mutex_unlock(&pmc_reserve_mutex);
+	}
+	event->destroy = hw_perf_event_destroy;
+
+	if (err)
+		goto out;
+
+	/* Access per-CPU sampling information (query sampling info) */
+	/*
+	 * The event->cpu value can be -1 to count on every CPU, for example,
+	 * when attaching to a task.  If this is specified, use the query
+	 * sampling info from the current CPU, otherwise use event->cpu to
+	 * retrieve the per-CPU information.
+	 * Later, cpuhw indicates whether to allocate sampling buffers for a
+	 * particular CPU (cpuhw!=NULL) or each online CPU (cpuw==NULL).
+	 */
+	memset(&si, 0, sizeof(si));
+	cpuhw = NULL;
+	if (event->cpu == -1)
+		qsi(&si);
+	else {
+		/* Event is pinned to a particular CPU, retrieve the per-CPU
+		 * sampling structure for accessing the CPU-specific QSI.
+		 */
+		cpuhw = &per_cpu(cpu_hw_sf, event->cpu);
+		si = cpuhw->qsi;
+	}
+
+	/* Check sampling facility authorization and, if not authorized,
+	 * fall back to other PMUs.  It is safe to check any CPU because
+	 * the authorization is identical for all configured CPUs.
+	 */
+	if (!si.as) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	/* The sampling information (si) contains information about the
+	 * min/max sampling intervals and the CPU speed.  So calculate the
+	 * correct sampling interval and avoid the whole period adjust
+	 * feedback loop.
+	 */
+	rate = 0;
+	if (attr->freq) {
+		rate = freq_to_sample_rate(&si, attr->sample_freq);
+		rate = hw_limit_rate(&si, rate);
+		attr->freq = 0;
+		attr->sample_period = rate;
+	} else {
+		/* The min/max sampling rates specifies the valid range
+		 * of sample periods.  If the specified sample period is
+		 * out of range, limit the period to the range boundary.
+		 */
+		rate = hw_limit_rate(&si, hwc->sample_period);
+
+		/* The perf core maintains a maximum sample rate that is
+		 * configurable through the sysctl interface.  Ensure the
+		 * sampling rate does not exceed this value.  This also helps
+		 * to avoid throttling when pushing samples with
+		 * perf_event_overflow().
+		 */
+		if (sample_rate_to_freq(&si, rate) >
+		      sysctl_perf_event_sample_rate) {
+			err = -EINVAL;
+			debug_sprintf_event(sfdbg, 1, "Sampling rate exceeds maximum perf sample rate\n");
+			goto out;
+		}
+	}
+	SAMPL_RATE(hwc) = rate;
+	hw_init_period(hwc, SAMPL_RATE(hwc));
+
+	/* Allocate the per-CPU sampling buffer using the CPU information
+	 * from the event.  If the event is not pinned to a particular
+	 * CPU (event->cpu == -1; or cpuhw == NULL), allocate sampling
+	 * buffers for each online CPU.
+	 */
+	if (cpuhw)
+		/* Event is pinned to a particular CPU */
+		err = allocate_sdbt(cpuhw, hwc);
+	else {
+		/* Event is not pinned, allocate sampling buffer on
+		 * each online CPU
+		 */
+		for_each_online_cpu(cpu) {
+			cpuhw = &per_cpu(cpu_hw_sf, cpu);
+			err = allocate_sdbt(cpuhw, hwc);
+			if (err)
+				break;
+		}
+	}
+out:
+	return err;
+}
+
+static int cpumsf_pmu_event_init(struct perf_event *event)
+{
+	int err;
+
+	if (event->attr.type != PERF_TYPE_RAW)
+		return -ENOENT;
+
+	if (event->attr.config != PERF_EVENT_CPUM_SF)
+		return -ENOENT;
+
+	if (event->cpu >= nr_cpumask_bits ||
+	    (event->cpu >= 0 && !cpu_online(event->cpu)))
+		return -ENODEV;
+
+	err = __hw_perf_event_init(event);
+	if (unlikely(err))
+		if (event->destroy)
+			event->destroy(event);
+	return err;
+}
+
+static void cpumsf_pmu_enable(struct pmu *pmu)
+{
+	struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
+	int err;
+
+	if (cpuhw->flags & PMU_F_ENABLED)
+		return;
+
+	if (cpuhw->flags & PMU_F_ERR_MASK)
+		return;
+
+	cpuhw->flags |= PMU_F_ENABLED;
+	barrier();
+
+	err = lsctl(&cpuhw->lsctl);
+	if (err) {
+		cpuhw->flags &= ~PMU_F_ENABLED;
+		pr_err("Loading sampling controls failed: op=%i err=%i\n",
+			1, err);
+		return;
+	}
+
+	debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i tear=%p dear=%p\n",
+			    cpuhw->lsctl.es, cpuhw->lsctl.cs,
+			    (void *) cpuhw->lsctl.tear, (void *) cpuhw->lsctl.dear);
+}
+
+static void cpumsf_pmu_disable(struct pmu *pmu)
+{
+	struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
+	struct hws_lsctl_request_block inactive;
+	struct hws_qsi_info_block si;
+	int err;
+
+	if (!(cpuhw->flags & PMU_F_ENABLED))
+		return;
+
+	if (cpuhw->flags & PMU_F_ERR_MASK)
+		return;
+
+	/* Switch off sampling activation control */
+	inactive = cpuhw->lsctl;
+	inactive.cs = 0;
+
+	err = lsctl(&inactive);
+	if (err) {
+		pr_err("Loading sampling controls failed: op=%i err=%i\n",
+			2, err);
+		return;
+	}
+
+	/* Save state of TEAR and DEAR register contents */
+	if (!qsi(&si)) {
+		/* TEAR/DEAR values are valid only if the sampling facility is
+		 * enabled.  Note that cpumsf_pmu_disable() might be called even
+		 * for a disabled sampling facility because cpumsf_pmu_enable()
+		 * controls the enable/disable state.
+		 */
+		if (si.es) {
+			cpuhw->lsctl.tear = si.tear;
+			cpuhw->lsctl.dear = si.dear;
+		}
+	} else
+		debug_sprintf_event(sfdbg, 3, "cpumsf_pmu_disable: "
+				    "qsi() failed with err=%i\n", err);
+
+	cpuhw->flags &= ~PMU_F_ENABLED;
+}
+
+/* perf_push_sample() - Push samples to perf
+ * @event:	The perf event
+ * @sample:	Hardware sample data
+ *
+ * Use the hardware sample data to create perf event sample.  The sample
+ * is the pushed to the event subsystem and the function checks for
+ * possible event overflows.  If an event overflow occurs, the PMU is
+ * stopped.
+ *
+ * Return non-zero if an event overflow occurred.
+ */
+static int perf_push_sample(struct perf_event *event,
+			    struct hws_data_entry *sample)
+{
+	int overflow;
+	struct pt_regs regs;
+	struct perf_sample_data data;
+
+	/* Skip samples that are invalid or for which the instruction address
+	 * is not predictable.	For the latter, the wait-state bit is set.
+	 */
+	if (sample->I || sample->W)
+		return 0;
+
+	perf_sample_data_init(&data, 0, event->hw.last_period);
+
+	memset(&regs, 0, sizeof(regs));
+	regs.psw.addr = sample->ia;
+	if (sample->T)
+		regs.psw.mask |= PSW_MASK_DAT;
+	if (sample->W)
+		regs.psw.mask |= PSW_MASK_WAIT;
+	if (sample->P)
+		regs.psw.mask |= PSW_MASK_PSTATE;
+	switch (sample->AS) {
+	case 0x0:
+		regs.psw.mask |= PSW_ASC_PRIMARY;
+		break;
+	case 0x1:
+		regs.psw.mask |= PSW_ASC_ACCREG;
+		break;
+	case 0x2:
+		regs.psw.mask |= PSW_ASC_SECONDARY;
+		break;
+	case 0x3:
+		regs.psw.mask |= PSW_ASC_HOME;
+		break;
+	}
+
+	overflow = 0;
+	if (perf_event_overflow(event, &data, &regs)) {
+		overflow = 1;
+		event->pmu->stop(event, 0);
+		debug_sprintf_event(sfdbg, 4, "perf_push_sample: PMU stopped"
+				    " because of an event overflow\n");
+	}
+	perf_event_update_userpage(event);
+
+	return overflow;
+}
+
+static void perf_event_count_update(struct perf_event *event, u64 count)
+{
+	local64_add(count, &event->count);
+}
+
+/* hw_collect_samples() - Walk through a sample-data-block and collect samples
+ * @event:	The perf event
+ * @sdbt:	Sample-data-block table
+ * @overflow:	Event overflow counter
+ *
+ * Walks through a sample-data-block and collects hardware sample-data that is
+ * pushed to the perf event subsystem.	The overflow reports the number of
+ * samples that has been discarded due to an event overflow.
+ */
+static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
+			       unsigned long long *overflow)
+{
+	struct hws_data_entry *sample;
+	unsigned long *trailer;
+
+	trailer = trailer_entry_ptr(*sdbt);
+	sample = (struct hws_data_entry *) *sdbt;
+	while ((unsigned long *) sample < trailer) {
+		/* Check for an empty sample */
+		if (!sample->def)
+			break;
+
+		/* Update perf event period */
+		perf_event_count_update(event, SAMPL_RATE(&event->hw));
+
+		/* Check for basic sampling mode */
+		if (sample->def == 0x0001) {
+			/* If an event overflow occurred, the PMU is stopped to
+			 * throttle event delivery.  Remaining sample data is
+			 * discarded.
+			 */
+			if (!*overflow)
+				*overflow = perf_push_sample(event, sample);
+			else
+				/* Count discarded samples */
+				*overflow += 1;
+		} else
+			/* Sample slot is not yet written or other record */
+			debug_sprintf_event(sfdbg, 5, "hw_collect_samples: "
+					    "Unknown sample data entry format:"
+					    " %i\n", sample->def);
+
+		/* Reset sample slot and advance to next sample */
+		sample->def = 0;
+		sample++;
+	}
+}
+
+/* hw_perf_event_update() - Process sampling buffer
+ * @event:	The perf event
+ * @flush_all:	Flag to also flush partially filled sample-data-blocks
+ *
+ * Processes the sampling buffer and create perf event samples.
+ * The sampling buffer position are retrieved and saved in the TEAR_REG
+ * register of the specified perf event.
+ *
+ * Only full sample-data-blocks are processed.	Specify the flash_all flag
+ * to also walk through partially filled sample-data-blocks.
+ *
+ */
+static void hw_perf_event_update(struct perf_event *event, int flush_all)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct hws_trailer_entry *te;
+	unsigned long *sdbt;
+	unsigned long long event_overflow, sampl_overflow;
+	int done;
+
+	sdbt = (unsigned long *) TEAR_REG(hwc);
+	done = event_overflow = sampl_overflow = 0;
+	while (!done) {
+		/* Get the trailer entry of the sample-data-block */
+		te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt);
+
+		/* Leave loop if no more work to do (block full indicator) */
+		if (!te->f) {
+			done = 1;
+			if (!flush_all)
+				break;
+		}
+
+		/* Check sample overflow count */
+		if (te->overflow) {
+			/* Increment sample overflow counter */
+			sampl_overflow += te->overflow;
+
+			/* XXX: If an sample overflow occurs, increase the
+			 *	sampling buffer.  Set a "realloc" flag because
+			 *	the sampler must be re-enabled for changing
+			 *	the sample-data-block-table content.
+			 */
+		}
+
+		/* Timestamps are valid for full sample-data-blocks only */
+		debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt=%p "
+				    "overflow=%llu timestamp=0x%llx\n",
+				    sdbt, te->overflow,
+				    (te->f) ? te->timestamp : 0ULL);
+
+		/* Collect all samples from a single sample-data-block and
+		 * flag if an (perf) event overflow happened.  If so, the PMU
+		 * is stopped and remaining samples will be discarded.
+		 */
+		hw_collect_samples(event, sdbt, &event_overflow);
+
+		/* Reset trailer */
+		xchg(&te->overflow, 0);
+		xchg((unsigned char *) te, 0x40);
+
+		/* Advance to next sample-data-block */
+		sdbt++;
+		if (is_link_entry(sdbt))
+			sdbt = get_next_sdbt(sdbt);
+
+		/* Update event hardware registers */
+		TEAR_REG(hwc) = (unsigned long) sdbt;
+
+		/* Stop processing sample-data if all samples of the current
+		 * sample-data-block were flushed even if it was not full.
+		 */
+		if (flush_all && done)
+			break;
+
+		/* If an event overflow happened, discard samples by
+		 * processing any remaining sample-data-blocks.
+		 */
+		if (event_overflow)
+			flush_all = 1;
+	}
+
+	if (sampl_overflow || event_overflow)
+		debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: "
+				    "overflow stats: sample=%llu event=%llu\n",
+				    sampl_overflow, event_overflow);
+}
+
+static void cpumsf_pmu_read(struct perf_event *event)
+{
+	/* Nothing to do ... updates are interrupt-driven */
+}
+
+/* Activate sampling control.
+ * Next call of pmu_enable() starts sampling.
+ */
+static void cpumsf_pmu_start(struct perf_event *event, int flags)
+{
+	struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
+
+	if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+		return;
+
+	if (flags & PERF_EF_RELOAD)
+		WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+
+	perf_pmu_disable(event->pmu);
+	event->hw.state = 0;
+	cpuhw->lsctl.cs = 1;
+	perf_pmu_enable(event->pmu);
+}
+
+/* Deactivate sampling control.
+ * Next call of pmu_enable() stops sampling.
+ */
+static void cpumsf_pmu_stop(struct perf_event *event, int flags)
+{
+	struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
+
+	if (event->hw.state & PERF_HES_STOPPED)
+		return;
+
+	perf_pmu_disable(event->pmu);
+	cpuhw->lsctl.cs = 0;
+	event->hw.state |= PERF_HES_STOPPED;
+
+	if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) {
+		hw_perf_event_update(event, 1);
+		event->hw.state |= PERF_HES_UPTODATE;
+	}
+	perf_pmu_enable(event->pmu);
+}
+
+static int cpumsf_pmu_add(struct perf_event *event, int flags)
+{
+	struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
+	int err;
+
+	if (cpuhw->flags & PMU_F_IN_USE)
+		return -EAGAIN;
+
+	if (!cpuhw->sfb.sdbt)
+		return -EINVAL;
+
+	err = 0;
+	perf_pmu_disable(event->pmu);
+
+	event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+	/* Set up sampling controls.  Always program the sampling register
+	 * using the SDB-table start.  Reset TEAR_REG event hardware register
+	 * that is used by hw_perf_event_update() to store the sampling buffer
+	 * position after samples have been flushed.
+	 */
+	cpuhw->lsctl.s = 0;
+	cpuhw->lsctl.h = 1;
+	cpuhw->lsctl.tear = cpuhw->sfb.sdbt;
+	cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt;
+	cpuhw->lsctl.interval = SAMPL_RATE(&event->hw);
+	hw_reset_registers(&event->hw, cpuhw->sfb.sdbt);
+
+	/* Ensure sampling functions are in the disabled state.  If disabled,
+	 * switch on sampling enable control. */
+	if (WARN_ON_ONCE(cpuhw->lsctl.es == 1)) {
+		err = -EAGAIN;
+		goto out;
+	}
+	cpuhw->lsctl.es = 1;
+
+	/* Set in_use flag and store event */
+	event->hw.idx = 0;	  /* only one sampling event per CPU supported */
+	cpuhw->event = event;
+	cpuhw->flags |= PMU_F_IN_USE;
+
+	if (flags & PERF_EF_START)
+		cpumsf_pmu_start(event, PERF_EF_RELOAD);
+out:
+	perf_event_update_userpage(event);
+	perf_pmu_enable(event->pmu);
+	return err;
+}
+
+static void cpumsf_pmu_del(struct perf_event *event, int flags)
+{
+	struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
+
+	perf_pmu_disable(event->pmu);
+	cpumsf_pmu_stop(event, PERF_EF_UPDATE);
+
+	cpuhw->lsctl.es = 0;
+	cpuhw->flags &= ~PMU_F_IN_USE;
+	cpuhw->event = NULL;
+
+	perf_event_update_userpage(event);
+	perf_pmu_enable(event->pmu);
+}
+
+static int cpumsf_pmu_event_idx(struct perf_event *event)
+{
+	return event->hw.idx;
+}
+
+CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC, PERF_EVENT_CPUM_SF);
+
+static struct attribute *cpumsf_pmu_events_attr[] = {
+	CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC),
+	NULL,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-63");
+
+static struct attribute *cpumsf_pmu_format_attr[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+
+static struct attribute_group cpumsf_pmu_events_group = {
+	.name = "events",
+	.attrs = cpumsf_pmu_events_attr,
+};
+static struct attribute_group cpumsf_pmu_format_group = {
+	.name = "format",
+	.attrs = cpumsf_pmu_format_attr,
+};
+static const struct attribute_group *cpumsf_pmu_attr_groups[] = {
+	&cpumsf_pmu_events_group,
+	&cpumsf_pmu_format_group,
+	NULL,
+};
+
+static struct pmu cpumf_sampling = {
+	.pmu_enable   = cpumsf_pmu_enable,
+	.pmu_disable  = cpumsf_pmu_disable,
+
+	.event_init   = cpumsf_pmu_event_init,
+	.add	      = cpumsf_pmu_add,
+	.del	      = cpumsf_pmu_del,
+
+	.start	      = cpumsf_pmu_start,
+	.stop	      = cpumsf_pmu_stop,
+	.read	      = cpumsf_pmu_read,
+
+	.event_idx    = cpumsf_pmu_event_idx,
+	.attr_groups  = cpumsf_pmu_attr_groups,
+};
+
+static void cpumf_measurement_alert(struct ext_code ext_code,
+				    unsigned int alert, unsigned long unused)
+{
+	struct cpu_hw_sf *cpuhw;
+
+	if (!(alert & CPU_MF_INT_SF_MASK))
+		return;
+	inc_irq_stat(IRQEXT_CMS);
+	cpuhw = &__get_cpu_var(cpu_hw_sf);
+
+	/* Measurement alerts are shared and might happen when the PMU
+	 * is not reserved.  Ignore these alerts in this case. */
+	if (!(cpuhw->flags & PMU_F_RESERVED))
+		return;
+
+	/* The processing below must take care of multiple alert events that
+	 * might be indicated concurrently. */
+
+	/* Program alert request */
+	if (alert & CPU_MF_INT_SF_PRA) {
+		if (cpuhw->flags & PMU_F_IN_USE)
+			hw_perf_event_update(cpuhw->event, 0);
+		else
+			WARN_ON_ONCE(!(cpuhw->flags & PMU_F_IN_USE));
+	}
+
+	/* Report measurement alerts only for non-PRA codes */
+	if (alert != CPU_MF_INT_SF_PRA)
+		debug_sprintf_event(sfdbg, 6, "measurement alert: 0x%x\n", alert);
+
+	/* Sampling authorization change request */
+	if (alert & CPU_MF_INT_SF_SACA)
+		qsi(&cpuhw->qsi);
+
+	/* Loss of sample data due to high-priority machine activities */
+	if (alert & CPU_MF_INT_SF_LSDA) {
+		pr_err("Sample data was lost\n");
+		cpuhw->flags |= PMU_F_ERR_LSDA;
+		sf_disable();
+	}
+
+	/* Invalid sampling buffer entry */
+	if (alert & (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE)) {
+		pr_err("A sampling buffer entry is incorrect (alert=0x%x)\n",
+		       alert);
+		cpuhw->flags |= PMU_F_ERR_IBE;
+		sf_disable();
+	}
+}
+
+static int __cpuinit cpumf_pmu_notifier(struct notifier_block *self,
+					unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (long) hcpu;
+	int flags;
+
+	/* Ignore the notification if no events are scheduled on the PMU.
+	 * This might be racy...
+	 */
+	if (!atomic_read(&num_events))
+		return NOTIFY_OK;
+
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_ONLINE:
+	case CPU_ONLINE_FROZEN:
+		flags = PMC_INIT;
+		smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
+		break;
+	case CPU_DOWN_PREPARE:
+		flags = PMC_RELEASE;
+		smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
+		break;
+	default:
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static int __init init_cpum_sampling_pmu(void)
+{
+	int err;
+
+	if (!cpum_sf_avail())
+		return -ENODEV;
+
+	sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80);
+	if (!sfdbg)
+		pr_err("Registering for s390dbf failed\n");
+	debug_register_view(sfdbg, &debug_sprintf_view);
+
+	err = register_external_interrupt(0x1407, cpumf_measurement_alert);
+	if (err) {
+		pr_err("Failed to register for CPU-measurement alerts\n");
+		goto out;
+	}
+
+	err = perf_pmu_register(&cpumf_sampling, "cpum_sf", PERF_TYPE_RAW);
+	if (err) {
+		pr_err("Failed to register cpum_sf pmu\n");
+		unregister_external_interrupt(0x1407, cpumf_measurement_alert);
+		goto out;
+	}
+	perf_cpu_notifier(cpumf_pmu_notifier);
+out:
+	return err;
+}
+arch_initcall(init_cpum_sampling_pmu);
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index 4c1d336ce941..b9843ba9829f 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -16,6 +16,7 @@
 #include <linux/kvm_host.h>
 #include <linux/percpu.h>
 #include <linux/export.h>
+#include <linux/spinlock.h>
 #include <linux/sysfs.h>
 #include <asm/irq.h>
 #include <asm/cpu_mf.h>
@@ -36,6 +37,8 @@ int perf_num_counters(void)
 
 	if (cpum_cf_avail())
 		num += PERF_CPUM_CF_MAX_CTR;
+	if (cpum_sf_avail())
+		num += PERF_CPUM_SF_MAX_CTR;
 
 	return num;
 }
@@ -93,24 +96,45 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
 			       : PERF_RECORD_MISC_KERNEL;
 }
 
-void perf_event_print_debug(void)
+void print_debug_cf(void)
 {
 	struct cpumf_ctr_info cf_info;
-	unsigned long flags;
-	int cpu;
-
-	if (!cpum_cf_avail())
-		return;
-
-	local_irq_save(flags);
+	int cpu = smp_processor_id();
 
-	cpu = smp_processor_id();
 	memset(&cf_info, 0, sizeof(cf_info));
 	if (!qctri(&cf_info))
 		pr_info("CPU[%i] CPUM_CF: ver=%u.%u A=%04x E=%04x C=%04x\n",
 			cpu, cf_info.cfvn, cf_info.csvn,
 			cf_info.auth_ctl, cf_info.enable_ctl, cf_info.act_ctl);
+}
+
+static void print_debug_sf(void)
+{
+	struct hws_qsi_info_block si;
+	int cpu = smp_processor_id();
+
+	memset(&si, 0, sizeof(si));
+	if (qsi(&si)) {
+		pr_err("CPU[%i]: CPM_SF: qsi failed\n");
+		return;
+	}
+
+	pr_info("CPU[%i]: CPM_SF: as=%i es=%i cs=%i bsdes=%i dsdes=%i"
+		" min=%i max=%i cpu_speed=%i tear=%p dear=%p\n",
+		cpu, si.as, si.es, si.cs, si.bsdes, si.dsdes,
+		si.min_sampl_rate, si.max_sampl_rate, si.cpu_speed,
+		si.tear, si.dear);
+}
+
+void perf_event_print_debug(void)
+{
+	unsigned long flags;
 
+	local_irq_save(flags);
+	if (cpum_cf_avail())
+		print_debug_cf();
+	if (cpum_sf_avail())
+		print_debug_sf();
 	local_irq_restore(flags);
 }
 
-- 
cgit v1.2.3


From 55baa2f831ae4a41da9617ab9e7cef5ebc991ec9 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Thu, 12 Dec 2013 16:47:00 +0100
Subject: s390/perf: Improve PMU selection for PERF_COUNT_HW_CPU_CYCLES events

The cpum_cf (counter facility) PMU does not support sampling events.
With cpum_sf (sampling facility), a PMU for sampling CPU cycles is
available.

Make cpum_sf the "default" PMU for PERF_COUNT_HW_CPU_CYCLES sampling
events but use the more precise cpum_cf PMU for non-sampling events.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/perf_cpum_sf.c | 26 ++++++++++++++++++++++----
 1 file changed, 22 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 141eca0917f4..52bf36ee91aa 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -468,11 +468,29 @@ static int cpumsf_pmu_event_init(struct perf_event *event)
 {
 	int err;
 
-	if (event->attr.type != PERF_TYPE_RAW)
-		return -ENOENT;
-
-	if (event->attr.config != PERF_EVENT_CPUM_SF)
+	/* No support for taken branch sampling */
+	if (has_branch_stack(event))
+		return -EOPNOTSUPP;
+
+	switch (event->attr.type) {
+	case PERF_TYPE_RAW:
+		if (event->attr.config != PERF_EVENT_CPUM_SF)
+			return -ENOENT;
+		break;
+	case PERF_TYPE_HARDWARE:
+		/* Support sampling of CPU cycles in addition to the
+		 * counter facility.  However, the counter facility
+		 * is more precise and, hence, restrict this PMU to
+		 * sampling events only.
+		 */
+		if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES)
+			return -ENOENT;
+		if (!is_sampling_event(event))
+			return -ENOENT;
+		break;
+	default:
 		return -ENOENT;
+	}
 
 	if (event->cpu >= nr_cpumask_bits ||
 	    (event->cpu >= 0 && !cpu_online(event->cpu)))
-- 
cgit v1.2.3


From e28bb79d9935293a8eea5f3c771fde89db645ba7 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Thu, 12 Dec 2013 16:52:48 +0100
Subject: s390/perf,oprofile: Share sampling facility

Introduce reserve/release functions to share the sampling facility
between perf and oprofile.
Also improve error handling for the sampling facility support in perf.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/perf_event.h |  4 ++++
 arch/s390/kernel/perf_cpum_sf.c    | 17 ++++++++++++-----
 arch/s390/kernel/perf_event.c      | 30 ++++++++++++++++++++++++++++++
 arch/s390/oprofile/hwsampler.c     |  7 +++++++
 arch/s390/oprofile/init.c          | 23 ++++++++++++++++++++---
 5 files changed, 73 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index b4eea25f379e..23d2dfa8201d 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -52,5 +52,9 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs);
 #define TEAR_REG(hwc)		((hwc)->last_tag)
 #define SAMPL_RATE(hwc)		((hwc)->event_base)
 
+/* Perf hardware reserve and release functions */
+int perf_reserve_sampling(void);
+void perf_release_sampling(void);
+
 #endif /* CONFIG_64BIT */
 #endif /* _ASM_S390_PERF_EVENT_H */
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 52bf36ee91aa..ae5e0192160d 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -260,16 +260,12 @@ static int sf_disable(void)
 
 #define PMC_INIT      0
 #define PMC_RELEASE   1
+#define PMC_FAILURE   2
 static void setup_pmc_cpu(void *flags)
 {
 	int err;
 	struct cpu_hw_sf *cpusf = &__get_cpu_var(cpu_hw_sf);
 
-	/* XXX Improve error handling and pass a flag in the *flags
-	 *     variable to indicate failures.  Alternatively, ignore
-	 *     (print) errors here and let the PMU functions fail if
-	 *     the per-cpu PMU_F_RESERVED flag is not.
-	 */
 	err = 0;
 	switch (*((int *) flags)) {
 	case PMC_INIT:
@@ -299,6 +295,8 @@ static void setup_pmc_cpu(void *flags)
 				    "setup_pmc_cpu: released: cpuhw=%p\n", cpusf);
 		break;
 	}
+	if (err)
+		*((int *) flags) |= PMC_FAILURE;
 }
 
 static void release_pmc_hardware(void)
@@ -307,13 +305,22 @@ static void release_pmc_hardware(void)
 
 	irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
 	on_each_cpu(setup_pmc_cpu, &flags, 1);
+	perf_release_sampling();
 }
 
 static int reserve_pmc_hardware(void)
 {
 	int flags = PMC_INIT;
+	int err;
 
+	err = perf_reserve_sampling();
+	if (err)
+		return err;
 	on_each_cpu(setup_pmc_cpu, &flags, 1);
+	if (flags & PMC_FAILURE) {
+		release_pmc_hardware();
+		return -ENODEV;
+	}
 	irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
 
 	return 0;
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index b9843ba9829f..4edcdfa4894e 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -208,3 +208,33 @@ ssize_t cpumf_events_sysfs_show(struct device *dev,
 	return sprintf(page, "event=0x%04llx,name=%s\n",
 		       pmu_attr->id, attr->attr.name);
 }
+
+/* Reserve/release functions for sharing perf hardware */
+static DEFINE_SPINLOCK(perf_hw_owner_lock);
+static void *perf_sampling_owner;
+
+int perf_reserve_sampling(void)
+{
+	int err;
+
+	err = 0;
+	spin_lock(&perf_hw_owner_lock);
+	if (perf_sampling_owner) {
+		pr_warn("The sampling facility is already reserved by %p\n",
+			perf_sampling_owner);
+		err = -EBUSY;
+	} else
+		perf_sampling_owner = __builtin_return_address(0);
+	spin_unlock(&perf_hw_owner_lock);
+	return err;
+}
+EXPORT_SYMBOL(perf_reserve_sampling);
+
+void perf_release_sampling(void)
+{
+	spin_lock(&perf_hw_owner_lock);
+	WARN_ON(!perf_sampling_owner);
+	perf_sampling_owner = NULL;
+	spin_unlock(&perf_hw_owner_lock);
+}
+EXPORT_SYMBOL(perf_release_sampling);
diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c
index bbca76ad6e1b..eb095874540d 100644
--- a/arch/s390/oprofile/hwsampler.c
+++ b/arch/s390/oprofile/hwsampler.c
@@ -41,6 +41,7 @@ static DEFINE_MUTEX(hws_sem_oom);
 
 static unsigned char hws_flush_all;
 static unsigned int hws_oom;
+static unsigned int hws_alert;
 static struct workqueue_struct *hws_wq;
 
 static unsigned int hws_state;
@@ -182,6 +183,9 @@ static void hws_ext_handler(struct ext_code ext_code,
 	if (!(param32 & CPU_MF_INT_SF_MASK))
 		return;
 
+	if (!hws_alert)
+		return;
+
 	inc_irq_stat(IRQEXT_CMS);
 	atomic_xchg(&cb->ext_params, atomic_read(&cb->ext_params) | param32);
 
@@ -941,6 +945,7 @@ int hwsampler_deallocate(void)
 		goto deallocate_exit;
 
 	irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+	hws_alert = 0;
 	deallocate_sdbt();
 
 	hws_state = HWS_DEALLOCATED;
@@ -1055,6 +1060,7 @@ int hwsampler_shutdown(void)
 
 		if (hws_state == HWS_STOPPED) {
 			irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+			hws_alert = 0;
 			deallocate_sdbt();
 		}
 		if (hws_wq) {
@@ -1129,6 +1135,7 @@ start_all_exit:
 	hws_oom = 1;
 	hws_flush_all = 0;
 	/* now let them in, 1407 CPUMF external interrupts */
+	hws_alert = 1;
 	irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
 
 	return 0;
diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c
index 04e1b6a85362..9ffe645d5989 100644
--- a/arch/s390/oprofile/init.c
+++ b/arch/s390/oprofile/init.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/oprofile.h>
+#include <linux/perf_event.h>
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
@@ -67,6 +68,21 @@ module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0);
 MODULE_PARM_DESC(cpu_type, "Force legacy basic mode sampling"
 		           "(report cpu_type \"timer\"");
 
+static int __oprofile_hwsampler_start(void)
+{
+	int retval;
+
+	retval = hwsampler_allocate(oprofile_sdbt_blocks, oprofile_sdb_blocks);
+	if (retval)
+		return retval;
+
+	retval = hwsampler_start_all(oprofile_hw_interval);
+	if (retval)
+		hwsampler_deallocate();
+
+	return retval;
+}
+
 static int oprofile_hwsampler_start(void)
 {
 	int retval;
@@ -76,13 +92,13 @@ static int oprofile_hwsampler_start(void)
 	if (!hwsampler_running)
 		return timer_ops.start();
 
-	retval = hwsampler_allocate(oprofile_sdbt_blocks, oprofile_sdb_blocks);
+	retval = perf_reserve_sampling();
 	if (retval)
 		return retval;
 
-	retval = hwsampler_start_all(oprofile_hw_interval);
+	retval = __oprofile_hwsampler_start();
 	if (retval)
-		hwsampler_deallocate();
+		perf_release_sampling();
 
 	return retval;
 }
@@ -96,6 +112,7 @@ static void oprofile_hwsampler_stop(void)
 
 	hwsampler_stop_all();
 	hwsampler_deallocate();
+	perf_release_sampling();
 	return;
 }
 
-- 
cgit v1.2.3


From 99c64b6679c41d8238b154c1a462724d7101765c Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Fri, 13 Dec 2013 12:38:39 +0100
Subject: s390/perf: Add service level information for CPU-Measurement
 Facilities

Register a service level handler to report information about available
CPU-Measurement facilities.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/perf_event.c | 58 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 57 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index 4edcdfa4894e..3bd2bf030ad4 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -16,17 +16,19 @@
 #include <linux/kvm_host.h>
 #include <linux/percpu.h>
 #include <linux/export.h>
+#include <linux/seq_file.h>
 #include <linux/spinlock.h>
 #include <linux/sysfs.h>
 #include <asm/irq.h>
 #include <asm/cpu_mf.h>
 #include <asm/lowcore.h>
 #include <asm/processor.h>
+#include <asm/sysinfo.h>
 
 const char *perf_pmu_name(void)
 {
 	if (cpum_cf_avail() || cpum_sf_avail())
-		return "CPU-measurement facilities (CPUMF)";
+		return "CPU-Measurement Facilities (CPU-MF)";
 	return "pmu";
 }
 EXPORT_SYMBOL(perf_pmu_name);
@@ -138,6 +140,60 @@ void perf_event_print_debug(void)
 	local_irq_restore(flags);
 }
 
+/* Service level infrastructure */
+static void sl_print_counter(struct seq_file *m)
+{
+	struct cpumf_ctr_info ci;
+
+	memset(&ci, 0, sizeof(ci));
+	if (qctri(&ci))
+		return;
+
+	seq_printf(m, "CPU-MF: Counter facility: version=%u.%u "
+		   "authorization=%04x\n", ci.cfvn, ci.csvn, ci.auth_ctl);
+}
+
+static void sl_print_sampling(struct seq_file *m)
+{
+	struct hws_qsi_info_block si;
+
+	memset(&si, 0, sizeof(si));
+	if (qsi(&si))
+		return;
+
+	if (!si.as && !si.ad)
+		return;
+
+	seq_printf(m, "CPU-MF: Sampling facility: min_rate=%lu max_rate=%lu"
+		   " cpu_speed=%u\n", si.min_sampl_rate, si.max_sampl_rate,
+		   si.cpu_speed);
+	if (si.as)
+		seq_printf(m, "CPU-MF: Sampling facility: mode=basic"
+			   " sample_size=%u\n", si.bsdes);
+	if (si.ad)
+		seq_printf(m, "CPU-MF: Sampling facility: mode=diagnostic"
+			   " sample_size=%u\n", si.dsdes);
+}
+
+static void service_level_perf_print(struct seq_file *m,
+				     struct service_level *sl)
+{
+	if (cpum_cf_avail())
+		sl_print_counter(m);
+	if (cpum_sf_avail())
+		sl_print_sampling(m);
+}
+
+static struct service_level service_level_perf = {
+	.seq_print = service_level_perf_print,
+};
+
+static int __init service_level_perf_register(void)
+{
+	return register_service_level(&service_level_perf);
+}
+arch_initcall(service_level_perf_register);
+
 /* See also arch/s390/kernel/traps.c */
 static unsigned long __store_trace(struct perf_callchain_entry *entry,
 				   unsigned long sp,
-- 
cgit v1.2.3


From aa3b7c296732b4351dfdbfe70be6b38a0882be14 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Thu, 12 Dec 2013 17:48:32 +0100
Subject: s390/pci: prevent inadvertently triggered bus scans

Initialization and scanning of the pci bus is omitted on older
machines without pci support or if pci=off was specified. Remember
the fact that we ran without pci support and prevent further bus
scans during resume from hibernate or after receiving hotplug
notifications.

Reported-by: Stefan Haberland <stefan.haberland@de.ibm.com>
Reviewed-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/pci.h |  1 +
 arch/s390/pci/pci.c         | 10 +++++++++-
 arch/s390/pci/pci_event.c   | 18 ++++++++++++++----
 3 files changed, 24 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index c129ab2ac731..2583466f576b 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -144,6 +144,7 @@ int clp_disable_fh(struct zpci_dev *);
 void zpci_event_error(void *);
 void zpci_event_availability(void *);
 void zpci_rescan(void);
+bool zpci_is_enabled(void);
 #else /* CONFIG_PCI */
 static inline void zpci_event_error(void *e) {}
 static inline void zpci_event_availability(void *e) {}
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index bf7c73d71eef..e3265b50f3ae 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -920,6 +920,7 @@ static void zpci_mem_exit(void)
 }
 
 static unsigned int s390_pci_probe;
+static unsigned int s390_pci_initialized;
 
 char * __init pcibios_setup(char *str)
 {
@@ -930,6 +931,11 @@ char * __init pcibios_setup(char *str)
 	return str;
 }
 
+bool zpci_is_enabled(void)
+{
+	return s390_pci_initialized;
+}
+
 static int __init pci_base_init(void)
 {
 	int rc;
@@ -961,6 +967,7 @@ static int __init pci_base_init(void)
 	if (rc)
 		goto out_find;
 
+	s390_pci_initialized = 1;
 	return 0;
 
 out_find:
@@ -978,5 +985,6 @@ subsys_initcall_sync(pci_base_init);
 
 void zpci_rescan(void)
 {
-	clp_rescan_pci_devices_simple();
+	if (zpci_is_enabled())
+		clp_rescan_pci_devices_simple();
 }
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index 800f064b0da7..228787a3630a 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -43,9 +43,8 @@ struct zpci_ccdf_avail {
 	u16 pec;			/* PCI event code */
 } __packed;
 
-void zpci_event_error(void *data)
+static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
 {
-	struct zpci_ccdf_err *ccdf = data;
 	struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
 
 	zpci_err("error CCDF:\n");
@@ -58,9 +57,14 @@ void zpci_event_error(void *data)
 	       pci_name(zdev->pdev), ccdf->pec, ccdf->fid);
 }
 
-void zpci_event_availability(void *data)
+void zpci_event_error(void *data)
+{
+	if (zpci_is_enabled())
+		__zpci_event_error(data);
+}
+
+static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
 {
-	struct zpci_ccdf_avail *ccdf = data;
 	struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
 	struct pci_dev *pdev = zdev ? zdev->pdev : NULL;
 	int ret;
@@ -115,3 +119,9 @@ void zpci_event_availability(void *data)
 		break;
 	}
 }
+
+void zpci_event_availability(void *data)
+{
+	if (zpci_is_enabled())
+		__zpci_event_availability(data);
+}
-- 
cgit v1.2.3


From 704268925d32a0457202371a61580af76b94c53a Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Thu, 12 Dec 2013 17:50:53 +0100
Subject: s390/pci: fix removal of nonexistent pci bus

If we remove a pci bus after receiving a hotplug notification we need
to check if the bus is actually present (creation of the pci bus
during an earlier notification may have been failed).

Reviewed-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/pci/pci_event.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index 228787a3630a..65ea105f68a1 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -112,6 +112,8 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
 		clp_rescan_pci_devices();
 		break;
 	case 0x0308: /* Standby -> Reserved */
+		if (!zdev)
+			break;
 		pci_stop_root_bus(zdev->bus);
 		pci_remove_root_bus(zdev->bus);
 		break;
-- 
cgit v1.2.3


From 0c0c2776926018e7560b99e921467aea1115d03b Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Thu, 12 Dec 2013 17:53:11 +0100
Subject: s390/pci: set error state for unavailable functions

If we receive a notification that a pci function became unavailable we clean
up by removing the pci device. This can confuse the driver since the function
is already unaccessible. Improve this situation by setting an appropriate
error_state.

Reviewed-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/pci/pci_event.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index 65ea105f68a1..7fc4c2c5708c 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -102,8 +102,12 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
 
 		break;
 	case 0x0304: /* Configured -> Standby */
-		if (pdev)
+		if (pdev) {
+			/* Give the driver a hint that the function is
+			 * already unusable. */
+			pdev->error_state = pci_channel_io_perm_failure;
 			pci_stop_and_remove_bus_device(pdev);
+		}
 
 		zpci_disable_device(zdev);
 		zdev->state = ZPCI_FN_STATE_STANDBY;
-- 
cgit v1.2.3


From f7038b7c3f4924b18390c51c1ec1e49287cc87db Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Thu, 12 Dec 2013 17:53:59 +0100
Subject: s390/pci/dma: fix accounting of allocated_pages

allocated_pages sometimes are increased even if s390_dma_alloc fails
also this value is never decreased even if s390_dma_free is called.
This patch fixes these bugs.
Also remove the atomic64_t casts (the members are already of this type).

Reviewed-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/pci/pci_dma.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 9b83d080902d..60c11a629d96 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -285,7 +285,7 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
 		flags |= ZPCI_TABLE_PROTECTED;
 
 	if (!dma_update_trans(zdev, pa, dma_addr, size, flags)) {
-		atomic64_add(nr_pages, (atomic64_t *) &zdev->fmb->mapped_pages);
+		atomic64_add(nr_pages, &zdev->fmb->mapped_pages);
 		return dma_addr + (offset & ~PAGE_MASK);
 	}
 
@@ -313,7 +313,7 @@ static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr,
 		zpci_err_hex(&dma_addr, sizeof(dma_addr));
 	}
 
-	atomic64_add(npages, (atomic64_t *) &zdev->fmb->unmapped_pages);
+	atomic64_add(npages, &zdev->fmb->unmapped_pages);
 	iommu_page_index = (dma_addr - zdev->start_dma) >> PAGE_SHIFT;
 	dma_free_iommu(zdev, iommu_page_index, npages);
 }
@@ -332,7 +332,6 @@ static void *s390_dma_alloc(struct device *dev, size_t size,
 	if (!page)
 		return NULL;
 
-	atomic64_add(size / PAGE_SIZE, (atomic64_t *) &zdev->fmb->allocated_pages);
 	pa = page_to_phys(page);
 	memset((void *) pa, 0, size);
 
@@ -343,6 +342,7 @@ static void *s390_dma_alloc(struct device *dev, size_t size,
 		return NULL;
 	}
 
+	atomic64_add(size / PAGE_SIZE, &zdev->fmb->allocated_pages);
 	if (dma_handle)
 		*dma_handle = map;
 	return (void *) pa;
@@ -352,8 +352,11 @@ static void s390_dma_free(struct device *dev, size_t size,
 			  void *pa, dma_addr_t dma_handle,
 			  struct dma_attrs *attrs)
 {
-	s390_dma_unmap_pages(dev, dma_handle, PAGE_ALIGN(size),
-			     DMA_BIDIRECTIONAL, NULL);
+	struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
+
+	size = PAGE_ALIGN(size);
+	atomic64_sub(size / PAGE_SIZE, &zdev->fmb->allocated_pages);
+	s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
 	free_pages((unsigned long) pa, get_order(size));
 }
 
-- 
cgit v1.2.3


From 257608fb4112b4cabefd9e33a4fc2df6b64dca6a Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Thu, 12 Dec 2013 17:55:22 +0100
Subject: s390/pci: reenable per default

HW, FW and Linux support is in a better shape now - let's reenable
pci bus probing per default.

Acked-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/pci/pci.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index e3265b50f3ae..0820362c7b0f 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -919,13 +919,13 @@ static void zpci_mem_exit(void)
 	kmem_cache_destroy(zdev_fmb_cache);
 }
 
-static unsigned int s390_pci_probe;
+static unsigned int s390_pci_probe = 1;
 static unsigned int s390_pci_initialized;
 
 char * __init pcibios_setup(char *str)
 {
-	if (!strcmp(str, "on")) {
-		s390_pci_probe = 1;
+	if (!strcmp(str, "off")) {
+		s390_pci_probe = 0;
 		return NULL;
 	}
 	return str;
-- 
cgit v1.2.3


From 69f239ed335a4b03265cae3ca930f3f166e42e35 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Thu, 12 Dec 2013 17:03:48 +0100
Subject: s390/cpum_sf: Dynamically extend the sampling buffer if overflows
 occur

Improve the sampling buffer allocation and add a function to reallocate and
increase the sampling buffer structure.  The number of allocated buffer elements
(sample-data-blocks) are accounted.  You can control the minimum and maximum
number these sample-data-blocks through the cpum_sfb_size kernel parameter.

The number hardware sample overflows (if any) are also accounted and stored
per perf event.  During the PMU disable/enable calls, the accumulated overflow
counter is analyzed and, if necessary, the sampling buffer is dynamically
increased.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/perf_event.h |   4 +
 arch/s390/kernel/perf_cpum_sf.c    | 527 ++++++++++++++++++++++++++++---------
 2 files changed, 411 insertions(+), 120 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index 23d2dfa8201d..99d7f4e333c2 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -49,6 +49,10 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs);
 #define PERF_CPUM_SF_MAX_CTR		1
 #define PERF_EVENT_CPUM_SF		0xB0000UL	/* Raw event ID */
 
+#define REG_NONE		0
+#define REG_OVERFLOW		1
+#define OVERFLOW_REG(hwc)	((hwc)->extra_reg.config)
+#define SFB_ALLOC_REG(hwc)	((hwc)->extra_reg.alloc)
 #define TEAR_REG(hwc)		((hwc)->last_tag)
 #define SAMPL_RATE(hwc)		((hwc)->event_base)
 
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index ae5e0192160d..ea1656073dac 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -17,6 +17,8 @@
 #include <linux/percpu.h>
 #include <linux/notifier.h>
 #include <linux/export.h>
+#include <linux/mm.h>
+#include <linux/moduleparam.h>
 #include <asm/cpu_mf.h>
 #include <asm/irq.h>
 #include <asm/debug.h>
@@ -26,34 +28,54 @@
  * At least one table is required for the sampling buffer structure.
  * A single table contains up to 511 pointers to sample-data-blocks.
  */
-#define CPUM_SF_MIN_SDBT    1
+#define CPUM_SF_MIN_SDBT	1
 
-/* Minimum number of sample-data-blocks:
- * The minimum designates a single page for sample-data-block, i.e.,
- * up to 126 sample-data-blocks with a size of 32 bytes (bsdes).
+/* Number of sample-data-blocks per sample-data-block-table (SDBT):
+ * The table contains SDB origin (8 bytes) and one SDBT origin that
+ * points to the next table.
  */
-#define CPUM_SF_MIN_SDB	    126
+#define CPUM_SF_SDB_PER_TABLE	((PAGE_SIZE - 8) / 8)
 
-/* Maximum number of sample-data-blocks:
- * The maximum number designates approx. 256K per CPU including
- * the given number of sample-data-blocks and taking the number
- * of sample-data-block tables into account.
+/* Maximum page offset for an SDBT table-link entry:
+ * If this page offset is reached, a table-link entry to the next SDBT
+ * must be added.
+ */
+#define CPUM_SF_SDBT_TL_OFFSET	(CPUM_SF_SDB_PER_TABLE * 8)
+static inline int require_table_link(const void *sdbt)
+{
+	return ((unsigned long) sdbt & ~PAGE_MASK) == CPUM_SF_SDBT_TL_OFFSET;
+}
+
+/* Minimum and maximum sampling buffer sizes:
+ *
+ * This number represents the maximum size of the sampling buffer
+ * taking the number of sample-data-block-tables into account.
  *
- * Later, this number can be increased for extending the sampling
- * buffer, for example, by factor 2 (512K) or 4 (1M).
+ * Sampling buffer size		Buffer characteristics
+ * ---------------------------------------------------
+ *	 64KB		    ==	  16 pages (4KB per page)
+ *				   1 page  for SDB-tables
+ *				  15 pages for SDBs
+ *
+ *  32MB		    ==	8192 pages (4KB per page)
+ *				  16 pages for SDB-tables
+ *				8176 pages for SDBs
  */
-#define CPUM_SF_MAX_SDB	    6471
+static unsigned long __read_mostly CPUM_SF_MIN_SDB = 15;
+static unsigned long __read_mostly CPUM_SF_MAX_SDB = 8176;
 
 struct sf_buffer {
-	unsigned long	 sdbt;	    /* Sample-data-block-table origin */
+	unsigned long	 *sdbt;	    /* Sample-data-block-table origin */
 	/* buffer characteristics (required for buffer increments) */
-	unsigned long num_sdb;	    /* Number of sample-data-blocks */
-	unsigned long	 tail;	    /* last sample-data-block-table */
+	unsigned long  num_sdb;	    /* Number of sample-data-blocks */
+	unsigned long num_sdbt;	    /* Number of sample-data-block-tables */
+	unsigned long	 *tail;	    /* last sample-data-block-table */
 };
 
 struct cpu_hw_sf {
 	/* CPU-measurement sampling information block */
 	struct hws_qsi_info_block qsi;
+	/* CPU-measurement sampling control block */
 	struct hws_lsctl_request_block lsctl;
 	struct sf_buffer sfb;	    /* Sampling buffer */
 	unsigned int flags;	    /* Status flags */
@@ -64,12 +86,23 @@ static DEFINE_PER_CPU(struct cpu_hw_sf, cpu_hw_sf);
 /* Debug feature */
 static debug_info_t *sfdbg;
 
+/*
+ * sf_disable() - Switch off sampling facility
+ */
+static int sf_disable(void)
+{
+	struct hws_lsctl_request_block sreq;
+
+	memset(&sreq, 0, sizeof(sreq));
+	return lsctl(&sreq);
+}
+
 /*
  * sf_buffer_available() - Check for an allocated sampling buffer
  */
 static int sf_buffer_available(struct cpu_hw_sf *cpuhw)
 {
-	return (cpuhw->sfb.sdbt) ? 1 : 0;
+	return !!cpuhw->sfb.sdbt;
 }
 
 /*
@@ -77,32 +110,32 @@ static int sf_buffer_available(struct cpu_hw_sf *cpuhw)
  */
 static void free_sampling_buffer(struct sf_buffer *sfb)
 {
-	unsigned long sdbt, *curr;
+	unsigned long *sdbt, *curr;
 
 	if (!sfb->sdbt)
 		return;
 
 	sdbt = sfb->sdbt;
-	curr = (unsigned long *) sdbt;
+	curr = sdbt;
 
-	/* we'll free the SDBT after all SDBs are processed... */
+	/* Free the SDBT after all SDBs are processed... */
 	while (1) {
 		if (!*curr || !sdbt)
 			break;
 
-		/* watch for link entry reset if found */
+		/* Process table-link entries */
 		if (is_link_entry(curr)) {
 			curr = get_next_sdbt(curr);
 			if (sdbt)
-				free_page(sdbt);
+				free_page((unsigned long) sdbt);
 
-			/* we are done if we reach the origin */
-			if ((unsigned long) curr == sfb->sdbt)
+			/* If the origin is reached, sampling buffer is freed */
+			if (curr == sfb->sdbt)
 				break;
 			else
-				sdbt = (unsigned long) curr;
+				sdbt = curr;
 		} else {
-			/* process SDB pointer */
+			/* Process SDB pointer */
 			if (*curr) {
 				free_page(*curr);
 				curr++;
@@ -111,10 +144,106 @@ static void free_sampling_buffer(struct sf_buffer *sfb)
 	}
 
 	debug_sprintf_event(sfdbg, 5,
-			    "free_sampling_buffer: freed sdbt=%0lx\n", sfb->sdbt);
+			    "free_sampling_buffer: freed sdbt=%p\n", sfb->sdbt);
 	memset(sfb, 0, sizeof(*sfb));
 }
 
+static int alloc_sample_data_block(unsigned long *sdbt, gfp_t gfp_flags)
+{
+	unsigned long sdb, *trailer;
+
+	/* Allocate and initialize sample-data-block */
+	sdb = get_zeroed_page(gfp_flags);
+	if (!sdb)
+		return -ENOMEM;
+	trailer = trailer_entry_ptr(sdb);
+	*trailer = SDB_TE_ALERT_REQ_MASK;
+
+	/* Link SDB into the sample-data-block-table */
+	*sdbt = sdb;
+
+	return 0;
+}
+
+/*
+ * realloc_sampling_buffer() - extend sampler memory
+ *
+ * Allocates new sample-data-blocks and adds them to the specified sampling
+ * buffer memory.
+ *
+ * Important: This modifies the sampling buffer and must be called when the
+ *	      sampling facility is disabled.
+ *
+ * Returns zero on success, non-zero otherwise.
+ */
+static int realloc_sampling_buffer(struct sf_buffer *sfb,
+				   unsigned long num_sdb, gfp_t gfp_flags)
+{
+	int i, rc;
+	unsigned long *new, *tail;
+
+	if (!sfb->sdbt || !sfb->tail)
+		return -EINVAL;
+
+	if (!is_link_entry(sfb->tail))
+		return -EINVAL;
+
+	/* Append to the existing sampling buffer, overwriting the table-link
+	 * register.
+	 * The tail variables always points to the "tail" (last and table-link)
+	 * entry in an SDB-table.
+	 */
+	tail = sfb->tail;
+
+	/* Do a sanity check whether the table-link entry points to
+	 * the sampling buffer origin.
+	 */
+	if (sfb->sdbt != get_next_sdbt(tail)) {
+		debug_sprintf_event(sfdbg, 3, "realloc_sampling_buffer: "
+				    "sampling buffer is not linked: origin=%p"
+				    "tail=%p\n",
+				    (void *) sfb->sdbt, (void *) tail);
+		return -EINVAL;
+	}
+
+	/* Allocate remaining SDBs */
+	rc = 0;
+	for (i = 0; i < num_sdb; i++) {
+		/* Allocate a new SDB-table if it is full. */
+		if (require_table_link(tail)) {
+			new = (unsigned long *) get_zeroed_page(gfp_flags);
+			if (!new) {
+				rc = -ENOMEM;
+				break;
+			}
+			sfb->num_sdbt++;
+			/* Link current page to tail of chain */
+			*tail = (unsigned long)(void *) new + 1;
+			tail = new;
+		}
+
+		/* Allocate a new sample-data-block.
+		 * If there is not enough memory, stop the realloc process
+		 * and simply use what was allocated.  If this is a temporary
+		 * issue, a new realloc call (if required) might succeed.
+		 */
+		rc = alloc_sample_data_block(tail, gfp_flags);
+		if (rc)
+			break;
+		sfb->num_sdb++;
+		tail++;
+	}
+
+	/* Link sampling buffer to its origin */
+	*tail = (unsigned long) sfb->sdbt + 1;
+	sfb->tail = tail;
+
+	debug_sprintf_event(sfdbg, 4, "realloc_sampling_buffer: new buffer"
+			    " settings: sdbt=%lu sdb=%lu\n",
+			    sfb->num_sdbt, sfb->num_sdb);
+	return rc;
+}
+
 /*
  * allocate_sampling_buffer() - allocate sampler memory
  *
@@ -128,75 +257,74 @@ static void free_sampling_buffer(struct sf_buffer *sfb)
  */
 static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb)
 {
-	int j, k, rc;
-	unsigned long *sdbt, *tail, *trailer;
-	unsigned long sdb;
-	unsigned long num_sdbt, sdb_per_table;
+	int rc;
 
 	if (sfb->sdbt)
 		return -EINVAL;
+
+	/* Allocate the sample-data-block-table origin */
+	sfb->sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL);
+	if (!sfb->sdbt)
+		return -ENOMEM;
 	sfb->num_sdb = 0;
+	sfb->num_sdbt = 1;
 
-	/* Compute the number of required sample-data-block-tables (SDBT) */
-	num_sdbt = num_sdb / ((PAGE_SIZE - 8) / 8);
-	if (num_sdbt < CPUM_SF_MIN_SDBT)
-		num_sdbt = CPUM_SF_MIN_SDBT;
-	sdb_per_table = (PAGE_SIZE - 8) / 8;
-
-	debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: num_sdbt=%lu "
-			    "num_sdb=%lu sdb_per_table=%lu\n",
-			    num_sdbt, num_sdb, sdb_per_table);
-	sdbt = NULL;
-	tail = sdbt;
-
-	for (j = 0; j < num_sdbt; j++) {
-		sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL);
-		if (!sdbt) {
-			rc = -ENOMEM;
-			goto allocate_sdbt_error;
-		}
+	/* Link the table origin to point to itself to prepare for
+	 * realloc_sampling_buffer() invocation.
+	 */
+	sfb->tail = sfb->sdbt;
+	*sfb->tail = (unsigned long)(void *) sfb->sdbt + 1;
 
-		/* save origin of sample-data-block-table */
-		if (!sfb->sdbt)
-			sfb->sdbt = (unsigned long) sdbt;
+	/* Allocate requested number of sample-data-blocks */
+	rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL);
+	if (rc) {
+		free_sampling_buffer(sfb);
+		debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: "
+			"realloc_sampling_buffer failed with rc=%i\n", rc);
+	} else
+		debug_sprintf_event(sfdbg, 4,
+			"alloc_sampling_buffer: tear=%p dear=%p\n",
+			sfb->sdbt, (void *) *sfb->sdbt);
+	return rc;
+}
 
-		/* link current page to tail of chain */
-		if (tail)
-			*tail = (unsigned long)(void *) sdbt + 1;
+static void sfb_set_limits(unsigned long min, unsigned long max)
+{
+	CPUM_SF_MIN_SDB = min;
+	CPUM_SF_MAX_SDB = max;
+}
 
-		for (k = 0; k < num_sdb && k < sdb_per_table; k++) {
-			/* get and set SDB page */
-			sdb = get_zeroed_page(GFP_KERNEL);
-			if (!sdb) {
-				rc = -ENOMEM;
-				goto allocate_sdbt_error;
-			}
-			*sdbt = sdb;
-			trailer = trailer_entry_ptr(*sdbt);
-			*trailer = SDB_TE_ALERT_REQ_MASK;
-			sdbt++;
-		}
-		num_sdb -= k;
-		sfb->num_sdb += k;	/* count allocated sdb's */
-		tail = sdbt;
-	}
+static unsigned long sfb_pending_allocs(struct sf_buffer *sfb,
+					struct hw_perf_event *hwc)
+{
+	if (!sfb->sdbt)
+		return SFB_ALLOC_REG(hwc);
+	if (SFB_ALLOC_REG(hwc) > sfb->num_sdb)
+		return SFB_ALLOC_REG(hwc) - sfb->num_sdb;
+	return 0;
+}
 
-	rc = 0;
-	if (tail)
-		*tail = sfb->sdbt + 1;
-	sfb->tail = (unsigned long) (void *)tail;
+static int sfb_has_pending_allocs(struct sf_buffer *sfb,
+				   struct hw_perf_event *hwc)
+{
+	return sfb_pending_allocs(sfb, hwc) > 0;
+}
 
-allocate_sdbt_error:
-	if (rc)
-		free_sampling_buffer(sfb);
-	else
-		debug_sprintf_event(sfdbg, 4,
-			"alloc_sampling_buffer: tear=%0lx dear=%0lx\n",
-			sfb->sdbt, *(unsigned long *) sfb->sdbt);
-	return rc;
+static void sfb_account_allocs(unsigned long num, struct hw_perf_event *hwc)
+{
+	/* Limit the number SDBs to not exceed the maximum */
+	num = min_t(unsigned long, num, CPUM_SF_MAX_SDB - SFB_ALLOC_REG(hwc));
+	if (num)
+		SFB_ALLOC_REG(hwc) += num;
 }
 
-static int allocate_sdbt(struct cpu_hw_sf *cpuhw, const struct hw_perf_event *hwc)
+static void sfb_init_allocs(unsigned long num, struct hw_perf_event *hwc)
+{
+	SFB_ALLOC_REG(hwc) = 0;
+	sfb_account_allocs(num, hwc);
+}
+
+static int allocate_sdbt(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
 {
 	unsigned long n_sdb, freq;
 	unsigned long factor;
@@ -225,39 +353,133 @@ static int allocate_sdbt(struct cpu_hw_sf *cpuhw, const struct hw_perf_event *hw
 	if (n_sdb < CPUM_SF_MIN_SDB)
 		n_sdb = CPUM_SF_MIN_SDB;
 
-	/* Return if there is already a sampling buffer allocated.
-	 * XXX Remove this later and check number of available and
-	 * required sdb's and, if necessary, increase the sampling buffer.
+	/* If there is already a sampling buffer allocated, it is very likely
+	 * that the sampling facility is enabled too.  If the event to be
+	 * initialized requires a greater sampling buffer, the allocation must
+	 * be postponed.  Changing the sampling buffer requires the sampling
+	 * facility to be in the disabled state.  So, account the number of
+	 * required SDBs and let cpumsf_pmu_enable() resize the buffer just
+	 * before the event is started.
 	 */
+	sfb_init_allocs(n_sdb, hwc);
 	if (sf_buffer_available(cpuhw))
 		return 0;
 
 	debug_sprintf_event(sfdbg, 3,
-			    "allocate_sdbt: rate=%lu f=%lu sdb=%lu/%i cpuhw=%p\n",
+			    "allocate_sdbt: rate=%lu f=%lu sdb=%lu/%lu cpuhw=%p\n",
 			    SAMPL_RATE(hwc), freq, n_sdb, CPUM_SF_MAX_SDB, cpuhw);
 
 	return alloc_sampling_buffer(&cpuhw->sfb,
-			       min_t(unsigned long, n_sdb, CPUM_SF_MAX_SDB));
+				     sfb_pending_allocs(&cpuhw->sfb, hwc));
 }
 
+static unsigned long min_percent(unsigned int percent, unsigned long base,
+				 unsigned long min)
+{
+	return min_t(unsigned long, min, DIV_ROUND_UP(percent * base, 100));
+}
 
-/* Number of perf events counting hardware events */
-static atomic_t num_events;
-/* Used to avoid races in calling reserve/release_cpumf_hardware */
-static DEFINE_MUTEX(pmc_reserve_mutex);
+static unsigned long compute_sfb_extent(unsigned long ratio, unsigned long base)
+{
+	/* Use a percentage-based approach to extend the sampling facility
+	 * buffer.  Accept up to 5% sample data loss.
+	 * Vary the extents between 1% to 5% of the current number of
+	 * sample-data-blocks.
+	 */
+	if (ratio <= 5)
+		return 0;
+	if (ratio <= 25)
+		return min_percent(1, base, 1);
+	if (ratio <= 50)
+		return min_percent(1, base, 1);
+	if (ratio <= 75)
+		return min_percent(2, base, 2);
+	if (ratio <= 100)
+		return min_percent(3, base, 3);
+	if (ratio <= 250)
+		return min_percent(4, base, 4);
+
+	return min_percent(5, base, 8);
+}
 
-/*
- * sf_disable() - Switch off sampling facility
+static void sfb_account_overflows(struct cpu_hw_sf *cpuhw,
+				  struct hw_perf_event *hwc)
+{
+	unsigned long ratio, num;
+
+	if (!OVERFLOW_REG(hwc))
+		return;
+
+	/* The sample_overflow contains the average number of sample data
+	 * that has been lost because sample-data-blocks were full.
+	 *
+	 * Calculate the total number of sample data entries that has been
+	 * discarded.  Then calculate the ratio of lost samples to total samples
+	 * per second in percent.
+	 */
+	ratio = DIV_ROUND_UP(100 * OVERFLOW_REG(hwc) * cpuhw->sfb.num_sdb,
+			     sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc)));
+
+	/* Compute number of sample-data-blocks */
+	num = compute_sfb_extent(ratio, cpuhw->sfb.num_sdb);
+	if (num)
+		sfb_account_allocs(num, hwc);
+
+	debug_sprintf_event(sfdbg, 5, "sfb: overflow: overflow=%llu ratio=%lu"
+			    " num=%lu\n", OVERFLOW_REG(hwc), ratio, num);
+	OVERFLOW_REG(hwc) = 0;
+}
+
+/* extend_sampling_buffer() - Extend sampling buffer
+ * @sfb:	Sampling buffer structure (for local CPU)
+ * @hwc:	Perf event hardware structure
+ *
+ * Use this function to extend the sampling buffer based on the overflow counter
+ * and postponed allocation extents stored in the specified Perf event hardware.
+ *
+ * Important: This function disables the sampling facility in order to safely
+ *	      change the sampling buffer structure.  Do not call this function
+ *	      when the PMU is active.
  */
-static int sf_disable(void)
+static void extend_sampling_buffer(struct sf_buffer *sfb,
+				   struct hw_perf_event *hwc)
 {
-	struct hws_lsctl_request_block sreq;
+	unsigned long num, num_old;
+	int rc;
 
-	memset(&sreq, 0, sizeof(sreq));
-	return lsctl(&sreq);
+	num = sfb_pending_allocs(sfb, hwc);
+	if (!num)
+		return;
+	num_old = sfb->num_sdb;
+
+	/* Disable the sampling facility to reset any states and also
+	 * clear pending measurement alerts.
+	 */
+	sf_disable();
+
+	/* Extend the sampling buffer.
+	 * This memory allocation typically happens in an atomic context when
+	 * called by perf.  Because this is a reallocation, it is fine if the
+	 * new SDB-request cannot be satisfied immediately.
+	 */
+	rc = realloc_sampling_buffer(sfb, num, GFP_ATOMIC);
+	if (rc)
+		debug_sprintf_event(sfdbg, 5, "sfb: extend: realloc "
+				    "failed with rc=%i\n", rc);
+
+	if (sfb_has_pending_allocs(sfb, hwc))
+		debug_sprintf_event(sfdbg, 5, "sfb: extend: "
+				    "req=%lu alloc=%lu remaining=%lu\n",
+				    num, sfb->num_sdb - num_old,
+				    sfb_pending_allocs(sfb, hwc));
 }
 
 
+/* Number of perf events counting hardware events */
+static atomic_t num_events;
+/* Used to avoid races in calling reserve/release_cpumf_hardware */
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
 #define PMC_INIT      0
 #define PMC_RELEASE   1
 #define PMC_FAILURE   2
@@ -345,19 +567,17 @@ static void hw_init_period(struct hw_perf_event *hwc, u64 period)
 }
 
 static void hw_reset_registers(struct hw_perf_event *hwc,
-			       unsigned long sdbt_origin)
+			       unsigned long *sdbt_origin)
 {
-	TEAR_REG(hwc) = sdbt_origin;	      /* (re)set to first sdb table */
+	/* (Re)set to first sample-data-block-table */
+	TEAR_REG(hwc) = (unsigned long) sdbt_origin;
 }
 
 static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si,
 				   unsigned long rate)
 {
-	if (rate < si->min_sampl_rate)
-		return si->min_sampl_rate;
-	if (rate > si->max_sampl_rate)
-		return si->max_sampl_rate;
-	return rate;
+	return clamp_t(unsigned long, rate,
+		       si->min_sampl_rate, si->max_sampl_rate);
 }
 
 static int __hw_perf_event_init(struct perf_event *event)
@@ -448,6 +668,10 @@ static int __hw_perf_event_init(struct perf_event *event)
 	SAMPL_RATE(hwc) = rate;
 	hw_init_period(hwc, SAMPL_RATE(hwc));
 
+	/* Initialize sample data overflow accounting */
+	hwc->extra_reg.reg = REG_OVERFLOW;
+	OVERFLOW_REG(hwc) = 0;
+
 	/* Allocate the per-CPU sampling buffer using the CPU information
 	 * from the event.  If the event is not pinned to a particular
 	 * CPU (event->cpu == -1; or cpuhw == NULL), allocate sampling
@@ -513,6 +737,7 @@ static int cpumsf_pmu_event_init(struct perf_event *event)
 static void cpumsf_pmu_enable(struct pmu *pmu)
 {
 	struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
+	struct hw_perf_event *hwc;
 	int err;
 
 	if (cpuhw->flags & PMU_F_ENABLED)
@@ -521,6 +746,26 @@ static void cpumsf_pmu_enable(struct pmu *pmu)
 	if (cpuhw->flags & PMU_F_ERR_MASK)
 		return;
 
+	/* Check whether to extent the sampling buffer.
+	 *
+	 * Two conditions trigger an increase of the sampling buffer for a
+	 * perf event:
+	 *    1. Postponed buffer allocations from the event initialization.
+	 *    2. Sampling overflows that contribute to pending allocations.
+	 *
+	 * Note that the extend_sampling_buffer() function disables the sampling
+	 * facility, but it can be fully re-enabled using sampling controls that
+	 * have been saved in cpumsf_pmu_disable().
+	 */
+	if (cpuhw->event) {
+		hwc = &cpuhw->event->hw;
+		/* Account number of overflow-designated buffer extents */
+		sfb_account_overflows(cpuhw, hwc);
+		if (sfb_has_pending_allocs(&cpuhw->sfb, hwc))
+			extend_sampling_buffer(&cpuhw->sfb, hwc);
+	}
+
+	/* (Re)enable the PMU and sampling facility */
 	cpuhw->flags |= PMU_F_ENABLED;
 	barrier();
 
@@ -632,8 +877,6 @@ static int perf_push_sample(struct perf_event *event,
 	if (perf_event_overflow(event, &data, &regs)) {
 		overflow = 1;
 		event->pmu->stop(event, 0);
-		debug_sprintf_event(sfdbg, 4, "perf_push_sample: PMU stopped"
-				    " because of an event overflow\n");
 	}
 	perf_event_update_userpage(event);
 
@@ -710,11 +953,11 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
 	struct hw_perf_event *hwc = &event->hw;
 	struct hws_trailer_entry *te;
 	unsigned long *sdbt;
-	unsigned long long event_overflow, sampl_overflow;
+	unsigned long long event_overflow, sampl_overflow, num_sdb;
 	int done;
 
 	sdbt = (unsigned long *) TEAR_REG(hwc);
-	done = event_overflow = sampl_overflow = 0;
+	done = event_overflow = sampl_overflow = num_sdb = 0;
 	while (!done) {
 		/* Get the trailer entry of the sample-data-block */
 		te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt);
@@ -726,17 +969,13 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
 				break;
 		}
 
-		/* Check sample overflow count */
-		if (te->overflow) {
-			/* Increment sample overflow counter */
-			sampl_overflow += te->overflow;
-
-			/* XXX: If an sample overflow occurs, increase the
-			 *	sampling buffer.  Set a "realloc" flag because
-			 *	the sampler must be re-enabled for changing
-			 *	the sample-data-block-table content.
+		/* Check the sample overflow count */
+		if (te->overflow)
+			/* Account sample overflows and, if a particular limit
+			 * is reached, extend the sampling buffer.
+			 * For details, see sfb_account_overflows().
 			 */
-		}
+			sampl_overflow += te->overflow;
 
 		/* Timestamps are valid for full sample-data-blocks only */
 		debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt=%p "
@@ -749,6 +988,7 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
 		 * is stopped and remaining samples will be discarded.
 		 */
 		hw_collect_samples(event, sdbt, &event_overflow);
+		num_sdb++;
 
 		/* Reset trailer */
 		xchg(&te->overflow, 0);
@@ -775,6 +1015,10 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
 			flush_all = 1;
 	}
 
+	/* Account sample overflows in the event hardware structure */
+	if (sampl_overflow)
+		OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) +
+						 sampl_overflow, 1 + num_sdb);
 	if (sampl_overflow || event_overflow)
 		debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: "
 				    "overflow stats: sample=%llu event=%llu\n",
@@ -849,7 +1093,7 @@ static int cpumsf_pmu_add(struct perf_event *event, int flags)
 	 */
 	cpuhw->lsctl.s = 0;
 	cpuhw->lsctl.h = 1;
-	cpuhw->lsctl.tear = cpuhw->sfb.sdbt;
+	cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt;
 	cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt;
 	cpuhw->lsctl.interval = SAMPL_RATE(&event->hw);
 	hw_reset_registers(&event->hw, cpuhw->sfb.sdbt);
@@ -1018,6 +1262,48 @@ static int __cpuinit cpumf_pmu_notifier(struct notifier_block *self,
 	return NOTIFY_OK;
 }
 
+static int param_get_sfb_size(char *buffer, const struct kernel_param *kp)
+{
+	if (!cpum_sf_avail())
+		return -ENODEV;
+	return sprintf(buffer, "%lu,%lu", CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
+}
+
+static int param_set_sfb_size(const char *val, const struct kernel_param *kp)
+{
+	int rc;
+	unsigned long min, max;
+
+	if (!cpum_sf_avail())
+		return -ENODEV;
+	if (!val || !strlen(val))
+		return -EINVAL;
+
+	/* Valid parameter values: "min,max" or "max" */
+	min = CPUM_SF_MIN_SDB;
+	max = CPUM_SF_MAX_SDB;
+	if (strchr(val, ','))
+		rc = (sscanf(val, "%lu,%lu", &min, &max) == 2) ? 0 : -EINVAL;
+	else
+		rc = kstrtoul(val, 10, &max);
+
+	if (min < 2 || min >= max || max > get_num_physpages())
+		rc = -EINVAL;
+	if (rc)
+		return rc;
+
+	sfb_set_limits(min, max);
+	pr_info("Changed sampling buffer settings: min=%lu max=%lu\n",
+		CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
+	return 0;
+}
+
+#define param_check_sfb_size(name, p) __param_check(name, p, void)
+static struct kernel_param_ops param_ops_sfb_size = {
+	.set = param_set_sfb_size,
+	.get = param_get_sfb_size,
+};
+
 static int __init init_cpum_sampling_pmu(void)
 {
 	int err;
@@ -1047,3 +1333,4 @@ out:
 	return err;
 }
 arch_initcall(init_cpum_sampling_pmu);
+core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0640);
-- 
cgit v1.2.3


From fcc77f507333776eaa336ab4ff49c23422f53703 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Thu, 12 Dec 2013 17:26:51 +0100
Subject: s390/cpum_sf: Atomically reset trailer entry fields of
 sample-data-blocks

Ensure to reset the sample-data-block full indicator and the overflow counter
at the same time.  This must be done atomically because the sampling hardware
is still active while full sample-data-block is processed.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/cpu_mf.h  | 13 +++++++++----
 arch/s390/kernel/perf_cpum_sf.c | 12 ++++++++----
 2 files changed, 17 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index d707abc26157..b0b3059b8d64 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -115,10 +115,15 @@ struct hws_data_entry {
 } __packed;
 
 struct hws_trailer_entry {
-	unsigned int f:1;	    /* 0 - Block Full Indicator 	 */
-	unsigned int a:1;	    /* 1 - Alert request control	 */
-	unsigned int t:1;	    /* 2 - Timestamp format		 */
-	unsigned long long:61;	    /* 3 - 63: Reserved 		 */
+	union {
+		struct {
+			unsigned int f:1;	/* 0 - Block Full Indicator   */
+			unsigned int a:1;	/* 1 - Alert request control  */
+			unsigned int t:1;	/* 2 - Timestamp format	      */
+			unsigned long long:61;	/* 3 - 63: Reserved	      */
+		};
+		unsigned long long flags;	/* 0 - 63: All indicators     */
+	};
 	unsigned long long overflow;	 /* 64 - sample Overflow count	      */
 	unsigned long long timestamp;	 /* 16 - time-stamp		      */
 	unsigned long long timestamp1;	 /*				      */
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index ea1656073dac..9202f2858894 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -953,7 +953,7 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
 	struct hw_perf_event *hwc = &event->hw;
 	struct hws_trailer_entry *te;
 	unsigned long *sdbt;
-	unsigned long long event_overflow, sampl_overflow, num_sdb;
+	unsigned long long event_overflow, sampl_overflow, num_sdb, te_flags;
 	int done;
 
 	sdbt = (unsigned long *) TEAR_REG(hwc);
@@ -990,9 +990,13 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
 		hw_collect_samples(event, sdbt, &event_overflow);
 		num_sdb++;
 
-		/* Reset trailer */
-		xchg(&te->overflow, 0);
-		xchg((unsigned char *) te, 0x40);
+		/* Reset trailer (using compare-double-and-swap) */
+		do {
+			te_flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK;
+			te_flags |= SDB_TE_ALERT_REQ_MASK;
+		} while (!cmpxchg_double(&te->flags, &te->overflow,
+					 te->flags, te->overflow,
+					 te_flags, 0ULL));
 
 		/* Advance to next sample-data-block */
 		sdbt++;
-- 
cgit v1.2.3


From 443d4beb823d4dccaaf964b59df9dd38b4d6aae7 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Thu, 12 Dec 2013 17:38:50 +0100
Subject: s390/cpum_sf: Add helper to read TOD from trailer entries

The trailer entry contains a timestamp of the time when the sample-data-block
became full.  The timestamp specifies a TOD (time-of-day) value in either the
STCK or STCKE format.

Provide a helper function to return the TOD value depending on the setting of
time format indicator.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/cpu_mf.h  | 14 ++++++++++++--
 arch/s390/kernel/perf_cpum_sf.c |  2 +-
 2 files changed, 13 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index b0b3059b8d64..09dc5facc0bc 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -125,8 +125,7 @@ struct hws_trailer_entry {
 		unsigned long long flags;	/* 0 - 63: All indicators     */
 	};
 	unsigned long long overflow;	 /* 64 - sample Overflow count	      */
-	unsigned long long timestamp;	 /* 16 - time-stamp		      */
-	unsigned long long timestamp1;	 /*				      */
+	unsigned char timestamp[16];	 /* 16 - 31 timestamp		      */
 	unsigned long long reserved1;	 /* 32 -Reserved		      */
 	unsigned long long reserved2;	 /*				      */
 	unsigned long long progusage1;	 /* 48 - reserved for programming use */
@@ -232,6 +231,17 @@ static inline unsigned long sample_rate_to_freq(struct hws_qsi_info_block *qsi,
 #define SDB_TE_ALERT_REQ_MASK	0x4000000000000000UL
 #define SDB_TE_BUFFER_FULL_MASK 0x8000000000000000UL
 
+/* Return TOD timestamp contained in an trailer entry */
+static inline unsigned long long trailer_timestamp(struct hws_trailer_entry *te)
+{
+	/* TOD in STCKE format */
+	if (te->t)
+		return *((unsigned long long *) &te->timestamp[1]);
+
+	/* TOD in STCK format */
+	return *((unsigned long long *) &te->timestamp[0]);
+}
+
 /* Return pointer to trailer entry of an sample data block */
 static inline unsigned long *trailer_entry_ptr(unsigned long v)
 {
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 9202f2858894..3ab7e67ee2e4 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -981,7 +981,7 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
 		debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt=%p "
 				    "overflow=%llu timestamp=0x%llx\n",
 				    sdbt, te->overflow,
-				    (te->f) ? te->timestamp : 0ULL);
+				    (te->f) ? trailer_timestamp(te) : 0ULL);
 
 		/* Collect all samples from a single sample-data-block and
 		 * flag if an (perf) event overflow happened.  If so, the PMU
-- 
cgit v1.2.3


From 443e802bab16916f9a51a34f2213f4dee6e8762c Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Thu, 12 Dec 2013 17:54:57 +0100
Subject: s390/cpum_sf: Detect KVM guest samples

The host-program-parameter (hpp) value of basic sample-data-entries designates
a SIE control block that is set by the LPP instruction in sie64a().
Non-zero values indicate guest samples, a value of zero indicates a host sample.

For perf samples, host and guest samples are distinguished using particular
PERF_MISC_* flags.  The perf layer calls perf_misc_flags() to set the flags
based on the pt_regs content.  For each sample-data-entry, the cpum_sf PMU
creates a pt_regs structure with the sample-data information.  An additional
flag structure is added to easily distinguish between host and guest samples.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/perf_event.h |  6 ++++++
 arch/s390/kernel/perf_cpum_sf.c    | 20 ++++++++++++++++++++
 arch/s390/kernel/perf_event.c      | 25 ++++++++++++++++++++++++-
 3 files changed, 50 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index 99d7f4e333c2..7667bde37dcb 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -42,6 +42,12 @@ extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
 extern unsigned long perf_misc_flags(struct pt_regs *regs);
 #define perf_misc_flags(regs) perf_misc_flags(regs)
 
+/* Perf pt_regs extension for sample-data-entry indicators */
+struct perf_sf_sde_regs {
+	unsigned char in_guest:1;	  /* guest sample */
+	unsigned long reserved:63;	  /* reserved */
+};
+
 /* Perf PMU definitions for the counter facility */
 #define PERF_CPUM_CF_MAX_CTR		256
 
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 3ab7e67ee2e4..d611facae599 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -840,6 +840,7 @@ static int perf_push_sample(struct perf_event *event,
 {
 	int overflow;
 	struct pt_regs regs;
+	struct perf_sf_sde_regs *sde_regs;
 	struct perf_sample_data data;
 
 	/* Skip samples that are invalid or for which the instruction address
@@ -850,7 +851,16 @@ static int perf_push_sample(struct perf_event *event,
 
 	perf_sample_data_init(&data, 0, event->hw.last_period);
 
+	/* Setup pt_regs to look like an CPU-measurement external interrupt
+	 * using the Program Request Alert code.  The regs.int_parm_long
+	 * field which is unused contains additional sample-data-entry related
+	 * indicators.
+	 */
 	memset(&regs, 0, sizeof(regs));
+	regs.int_code = 0x1407;
+	regs.int_parm = CPU_MF_INT_SF_PRA;
+	sde_regs = (struct perf_sf_sde_regs *) &regs.int_parm_long;
+
 	regs.psw.addr = sample->ia;
 	if (sample->T)
 		regs.psw.mask |= PSW_MASK_DAT;
@@ -873,6 +883,16 @@ static int perf_push_sample(struct perf_event *event,
 		break;
 	}
 
+	/* The host-program-parameter (hpp) contains the sie control
+	 * block that is set by sie64a() in entry64.S.	Check if hpp
+	 * refers to a valid control block and set sde_regs flags
+	 * accordingly.  This would allow to use hpp values for other
+	 * purposes too.
+	 * For now, simply use a non-zero value as guest indicator.
+	 */
+	if (sample->hpp)
+		sde_regs->in_guest = 1;
+
 	overflow = 0;
 	if (perf_event_overflow(event, &data, &regs)) {
 		overflow = 1;
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index 3bd2bf030ad4..60a68261d091 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -1,7 +1,7 @@
 /*
  * Performance event support for s390x
  *
- *  Copyright IBM Corp. 2012
+ *  Copyright IBM Corp. 2012, 2013
  *  Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
  *
  * This program is free software; you can redistribute it and/or modify
@@ -89,8 +89,31 @@ static unsigned long perf_misc_guest_flags(struct pt_regs *regs)
 					: PERF_RECORD_MISC_GUEST_KERNEL;
 }
 
+static unsigned long perf_misc_flags_sf(struct pt_regs *regs)
+{
+	struct perf_sf_sde_regs *sde_regs;
+	unsigned long flags;
+
+	sde_regs = (struct perf_sf_sde_regs *) &regs->int_parm_long;
+	if (sde_regs->in_guest)
+		flags = user_mode(regs) ? PERF_RECORD_MISC_GUEST_USER
+					: PERF_RECORD_MISC_GUEST_KERNEL;
+	else
+		flags = user_mode(regs) ? PERF_RECORD_MISC_USER
+					: PERF_RECORD_MISC_KERNEL;
+	return flags;
+}
+
 unsigned long perf_misc_flags(struct pt_regs *regs)
 {
+	/* Check if the cpum_sf PMU has created the pt_regs structure.
+	 * In this case, perf misc flags can be easily extracted.  Otherwise,
+	 * do regular checks on the pt_regs content.
+	 */
+	if (regs->int_code == 0x1407 && regs->int_parm == CPU_MF_INT_SF_PRA)
+		if (!regs->gprs[15])
+			return perf_misc_flags_sf(regs);
+
 	if (is_in_guest(regs))
 		return perf_misc_guest_flags(regs);
 
-- 
cgit v1.2.3


From dd127b3b977b81eab58d1d7ee037195cf0bbeba7 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Thu, 12 Dec 2013 18:05:20 +0100
Subject: s390/cpum_sf: Filter perf events based event->attr.exclude_* settings

Introduce the perf_exclude_event() function to filter perf samples
according to event->attr.exclude_* settings.   During event initialization,
reset event exclude settings that are not supported.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/perf_cpum_sf.c | 36 +++++++++++++++++++++++++++++++++++-
 1 file changed, 35 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index d611facae599..28fa2f235158 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -723,10 +723,19 @@ static int cpumsf_pmu_event_init(struct perf_event *event)
 		return -ENOENT;
 	}
 
+	/* Check online status of the CPU to which the event is pinned */
 	if (event->cpu >= nr_cpumask_bits ||
 	    (event->cpu >= 0 && !cpu_online(event->cpu)))
 		return -ENODEV;
 
+	/* Force reset of idle/hv excludes regardless of what the
+	 * user requested.
+	 */
+	if (event->attr.exclude_hv)
+		event->attr.exclude_hv = 0;
+	if (event->attr.exclude_idle)
+		event->attr.exclude_idle = 0;
+
 	err = __hw_perf_event_init(event);
 	if (unlikely(err))
 		if (event->destroy)
@@ -824,6 +833,29 @@ static void cpumsf_pmu_disable(struct pmu *pmu)
 	cpuhw->flags &= ~PMU_F_ENABLED;
 }
 
+/* perf_exclude_event() - Filter event
+ * @event:	The perf event
+ * @regs:	pt_regs structure
+ * @sde_regs:	Sample-data-entry (sde) regs structure
+ *
+ * Filter perf events according to their exclude specification.
+ *
+ * Return non-zero if the event shall be excluded.
+ */
+static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs,
+			      struct perf_sf_sde_regs *sde_regs)
+{
+	if (event->attr.exclude_user && user_mode(regs))
+		return 1;
+	if (event->attr.exclude_kernel && !user_mode(regs))
+		return 1;
+	if (event->attr.exclude_guest && sde_regs->in_guest)
+		return 1;
+	if (event->attr.exclude_host && !sde_regs->in_guest)
+		return 1;
+	return 0;
+}
+
 /* perf_push_sample() - Push samples to perf
  * @event:	The perf event
  * @sample:	Hardware sample data
@@ -894,12 +926,14 @@ static int perf_push_sample(struct perf_event *event,
 		sde_regs->in_guest = 1;
 
 	overflow = 0;
+	if (perf_exclude_event(event, &regs, sde_regs))
+		goto out;
 	if (perf_event_overflow(event, &data, &regs)) {
 		overflow = 1;
 		event->pmu->stop(event, 0);
 	}
 	perf_event_update_userpage(event);
-
+out:
 	return overflow;
 }
 
-- 
cgit v1.2.3


From 7e75fc3ff4cffd90684816d69838f8730ac3e072 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Fri, 13 Dec 2013 11:42:44 +0100
Subject: s390/cpum_sf: Add raw data sampling to support the
 diagnostic-sampling function

Also support the diagnostic-sampling function in addition to the basic-sampling
function.  Diagnostic-sampling data entries contain hardware model specific
sampling data and additional programs are required to analyze the data.

To deliver diagnostic-sampling, as well, as basis-sampling data entries to user
space, introduce support for sampling "raw data".  If this particular perf
sampling type (PERF_SAMPLE_RAW) is used, sampling data entries are copied
to user space.  External programs can then analyze these data.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/cpu_mf.h     |  40 ++--
 arch/s390/include/asm/perf_event.h |  28 ++-
 arch/s390/kernel/perf_cpum_sf.c    | 380 ++++++++++++++++++++++++++++++-------
 arch/s390/kernel/perf_event.c      |  21 +-
 arch/s390/oprofile/hwsampler.c     |   4 +-
 5 files changed, 377 insertions(+), 96 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index 09dc5facc0bc..cb700d54bd83 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -59,13 +59,15 @@ struct cpumf_ctr_info {
 /* QUERY SAMPLING INFORMATION block */
 struct hws_qsi_info_block {	    /* Bit(s) */
 	unsigned int b0_13:14;	    /* 0-13: zeros			 */
-	unsigned int as:1;	    /* 14: sampling authorisation control*/
-	unsigned int b15_21:7;	    /* 15-21: zeros			 */
-	unsigned int es:1;	    /* 22: sampling enable control	 */
-	unsigned int b23_29:7;	    /* 23-29: zeros			 */
-	unsigned int cs:1;	    /* 30: sampling activation control	 */
-	unsigned int:1; 	    /* 31: reserved			 */
-	unsigned int bsdes:16;	    /* 4-5: size of basic sampling entry      */
+	unsigned int as:1;	    /* 14: basic-sampling authorization	 */
+	unsigned int ad:1;	    /* 15: diag-sampling authorization	 */
+	unsigned int b16_21:6;	    /* 16-21: zeros			 */
+	unsigned int es:1;	    /* 22: basic-sampling enable control */
+	unsigned int ed:1;	    /* 23: diag-sampling enable control	 */
+	unsigned int b24_29:6;	    /* 24-29: zeros			 */
+	unsigned int cs:1;	    /* 30: basic-sampling activation control */
+	unsigned int cd:1;	    /* 31: diag-sampling activation control */
+	unsigned int bsdes:16;	    /* 4-5: size of basic sampling entry */
 	unsigned int dsdes:16;	    /* 6-7: size of diagnostic sampling entry */
 	unsigned long min_sampl_rate; /* 8-15: minimum sampling interval */
 	unsigned long max_sampl_rate; /* 16-23: maximum sampling interval*/
@@ -82,10 +84,11 @@ struct hws_lsctl_request_block {
 	unsigned int s:1;	    /* 0: maximum buffer indicator	 */
 	unsigned int h:1;	    /* 1: part. level reserved for VM use*/
 	unsigned long long b2_53:52;/* 2-53: zeros			 */
-	unsigned int es:1;	    /* 54: sampling enable control	 */
-	unsigned int b55_61:7;	    /* 55-61: - zeros			 */
-	unsigned int cs:1;	    /* 62: sampling activation control	 */
-	unsigned int b63:1;	    /* 63: zero 			 */
+	unsigned int es:1;	    /* 54: basic-sampling enable control */
+	unsigned int ed:1;	    /* 55: diag-sampling enable control	 */
+	unsigned int b56_61:6;	    /* 56-61: - zeros			 */
+	unsigned int cs:1;	    /* 62: basic-sampling activation control */
+	unsigned int cd:1;	    /* 63: diag-sampling activation control  */
 	unsigned long interval;     /* 8-15: sampling interval		 */
 	unsigned long tear;	    /* 16-23: TEAR contents		 */
 	unsigned long dear;	    /* 24-31: DEAR contents		 */
@@ -96,8 +99,7 @@ struct hws_lsctl_request_block {
 	unsigned long rsvrd4;	    /* reserved 			 */
 } __packed;
 
-
-struct hws_data_entry {
+struct hws_basic_entry {
 	unsigned int def:16;	    /* 0-15  Data Entry Format		 */
 	unsigned int R:4;	    /* 16-19 reserved			 */
 	unsigned int U:4;	    /* 20-23 Number of unique instruct.  */
@@ -114,6 +116,18 @@ struct hws_data_entry {
 	unsigned long long hpp;     /* Host Program Parameter		 */
 } __packed;
 
+struct hws_diag_entry {
+	unsigned int def:16;	    /* 0-15  Data Entry Format		 */
+	unsigned int R:14;	    /* 16-19 and 20-30 reserved		 */
+	unsigned int I:1;	    /* 31 entry valid or invalid	 */
+	u8	     data[];	    /* Machine-dependent sample data	 */
+} __packed;
+
+struct hws_combined_entry {
+	struct hws_basic_entry	basic;	/* Basic-sampling data entry */
+	struct hws_diag_entry	diag;	/* Diagnostic-sampling data entry */
+} __packed;
+
 struct hws_trailer_entry {
 	union {
 		struct {
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index 7667bde37dcb..bd4573f1d65c 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -52,15 +52,39 @@ struct perf_sf_sde_regs {
 #define PERF_CPUM_CF_MAX_CTR		256
 
 /* Perf PMU definitions for the sampling facility */
-#define PERF_CPUM_SF_MAX_CTR		1
-#define PERF_EVENT_CPUM_SF		0xB0000UL	/* Raw event ID */
+#define PERF_CPUM_SF_MAX_CTR		2
+#define PERF_EVENT_CPUM_SF		0xB0000UL /* Event: Basic-sampling */
+#define PERF_EVENT_CPUM_SF_DIAG		0xBD000UL /* Event: Combined-sampling */
+#define PERF_CPUM_SF_BASIC_MODE		0x0001	  /* Basic-sampling flag */
+#define PERF_CPUM_SF_DIAG_MODE		0x0002	  /* Diagnostic-sampling flag */
+#define PERF_CPUM_SF_MODE_MASK		(PERF_CPUM_SF_BASIC_MODE| \
+					 PERF_CPUM_SF_DIAG_MODE)
 
 #define REG_NONE		0
 #define REG_OVERFLOW		1
 #define OVERFLOW_REG(hwc)	((hwc)->extra_reg.config)
 #define SFB_ALLOC_REG(hwc)	((hwc)->extra_reg.alloc)
+#define RAWSAMPLE_REG(hwc)	((hwc)->config)
 #define TEAR_REG(hwc)		((hwc)->last_tag)
 #define SAMPL_RATE(hwc)		((hwc)->event_base)
+#define SAMPL_FLAGS(hwc)	((hwc)->config_base)
+#define SAMPL_DIAG_MODE(hwc)	(SAMPL_FLAGS(hwc) & PERF_CPUM_SF_DIAG_MODE)
+
+/* Structure for sampling data entries to be passed as perf raw sample data
+ * to user space.  Note that raw sample data must be aligned and, thus, might
+ * be padded with zeros.
+ */
+struct sf_raw_sample {
+#define SF_RAW_SAMPLE_BASIC	PERF_CPUM_SF_BASIC_MODE
+#define SF_RAW_SAMPLE_DIAG	PERF_CPUM_SF_DIAG_MODE
+	u64			format;
+	u32			 size;	  /* Size of sf_raw_sample */
+	u16			bsdes;	  /* Basic-sampling data entry size */
+	u16			dsdes;	  /* Diagnostic-sampling data entry size */
+	struct hws_basic_entry	basic;	  /* Basic-sampling data entry */
+	struct hws_diag_entry	 diag;	  /* Diagnostic-sampling data entry */
+	u8		    padding[];	  /* Padding to next multiple of 8 */
+} __packed;
 
 /* Perf hardware reserve and release functions */
 int perf_reserve_sampling(void);
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 28fa2f235158..b4ec058c4f10 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -17,6 +17,7 @@
 #include <linux/percpu.h>
 #include <linux/notifier.h>
 #include <linux/export.h>
+#include <linux/slab.h>
 #include <linux/mm.h>
 #include <linux/moduleparam.h>
 #include <asm/cpu_mf.h>
@@ -31,8 +32,8 @@
 #define CPUM_SF_MIN_SDBT	1
 
 /* Number of sample-data-blocks per sample-data-block-table (SDBT):
- * The table contains SDB origin (8 bytes) and one SDBT origin that
- * points to the next table.
+ * A table contains SDB pointers (8 bytes) and one table-link entry
+ * that points to the origin of the next SDBT.
  */
 #define CPUM_SF_SDB_PER_TABLE	((PAGE_SIZE - 8) / 8)
 
@@ -48,8 +49,11 @@ static inline int require_table_link(const void *sdbt)
 
 /* Minimum and maximum sampling buffer sizes:
  *
- * This number represents the maximum size of the sampling buffer
- * taking the number of sample-data-block-tables into account.
+ * This number represents the maximum size of the sampling buffer taking
+ * the number of sample-data-block-tables into account.  Note that these
+ * numbers apply to the basic-sampling function only.
+ * The maximum number of SDBs is increased by CPUM_SF_SDB_DIAG_FACTOR if
+ * the diagnostic-sampling function is active.
  *
  * Sampling buffer size		Buffer characteristics
  * ---------------------------------------------------
@@ -63,6 +67,7 @@ static inline int require_table_link(const void *sdbt)
  */
 static unsigned long __read_mostly CPUM_SF_MIN_SDB = 15;
 static unsigned long __read_mostly CPUM_SF_MAX_SDB = 8176;
+static unsigned long __read_mostly CPUM_SF_SDB_DIAG_FACTOR = 1;
 
 struct sf_buffer {
 	unsigned long	 *sdbt;	    /* Sample-data-block-table origin */
@@ -290,8 +295,20 @@ static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb)
 
 static void sfb_set_limits(unsigned long min, unsigned long max)
 {
+	struct hws_qsi_info_block si;
+
 	CPUM_SF_MIN_SDB = min;
 	CPUM_SF_MAX_SDB = max;
+
+	memset(&si, 0, sizeof(si));
+	if (!qsi(&si))
+		CPUM_SF_SDB_DIAG_FACTOR = DIV_ROUND_UP(si.dsdes, si.bsdes);
+}
+
+static unsigned long sfb_max_limit(struct hw_perf_event *hwc)
+{
+	return SAMPL_DIAG_MODE(hwc) ? CPUM_SF_MAX_SDB * CPUM_SF_SDB_DIAG_FACTOR
+				    : CPUM_SF_MAX_SDB;
 }
 
 static unsigned long sfb_pending_allocs(struct sf_buffer *sfb,
@@ -312,8 +329,8 @@ static int sfb_has_pending_allocs(struct sf_buffer *sfb,
 
 static void sfb_account_allocs(unsigned long num, struct hw_perf_event *hwc)
 {
-	/* Limit the number SDBs to not exceed the maximum */
-	num = min_t(unsigned long, num, CPUM_SF_MAX_SDB - SFB_ALLOC_REG(hwc));
+	/* Limit the number of SDBs to not exceed the maximum */
+	num = min_t(unsigned long, num, sfb_max_limit(hwc) - SFB_ALLOC_REG(hwc));
 	if (num)
 		SFB_ALLOC_REG(hwc) += num;
 }
@@ -324,32 +341,89 @@ static void sfb_init_allocs(unsigned long num, struct hw_perf_event *hwc)
 	sfb_account_allocs(num, hwc);
 }
 
-static int allocate_sdbt(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
+static size_t event_sample_size(struct hw_perf_event *hwc)
+{
+	struct sf_raw_sample *sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(hwc);
+	size_t sample_size;
+
+	/* The sample size depends on the sampling function: The basic-sampling
+	 * function must be always enabled, diagnostic-sampling function is
+	 * optional.
+	 */
+	sample_size = sfr->bsdes;
+	if (SAMPL_DIAG_MODE(hwc))
+		sample_size += sfr->dsdes;
+
+	return sample_size;
+}
+
+static void deallocate_buffers(struct cpu_hw_sf *cpuhw)
+{
+	if (cpuhw->sfb.sdbt)
+		free_sampling_buffer(&cpuhw->sfb);
+}
+
+static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
 {
-	unsigned long n_sdb, freq;
-	unsigned long factor;
+	unsigned long n_sdb, freq, factor;
+	size_t sfr_size, sample_size;
+	struct sf_raw_sample *sfr;
+
+	/* Allocate raw sample buffer
+	 *
+	 *    The raw sample buffer is used to temporarily store sampling data
+	 *    entries for perf raw sample processing.  The buffer size mainly
+	 *    depends on the size of diagnostic-sampling data entries which is
+	 *    machine-specific.  The exact size calculation includes:
+	 *	1. The first 4 bytes of diagnostic-sampling data entries are
+	 *	   already reflected in the sf_raw_sample structure.  Subtract
+	 *	   these bytes.
+	 *	2. The perf raw sample data must be 8-byte aligned (u64) and
+	 *	   perf's internal data size must be considered too.  So add
+	 *	   an additional u32 for correct alignment and subtract before
+	 *	   allocating the buffer.
+	 *	3. Store the raw sample buffer pointer in the perf event
+	 *	   hardware structure.
+	 */
+	sfr_size = ALIGN((sizeof(*sfr) - sizeof(sfr->diag) + cpuhw->qsi.dsdes) +
+			 sizeof(u32), sizeof(u64));
+	sfr_size -= sizeof(u32);
+	sfr = kzalloc(sfr_size, GFP_KERNEL);
+	if (!sfr)
+		return -ENOMEM;
+	sfr->size = sfr_size;
+	sfr->bsdes = cpuhw->qsi.bsdes;
+	sfr->dsdes = cpuhw->qsi.dsdes;
+	RAWSAMPLE_REG(hwc) = (unsigned long) sfr;
 
 	/* Calculate sampling buffers using 4K pages
 	 *
-	 *    1. Use frequency as input.  The samping buffer is designed for
-	 *	 a complete second.  This can be adjusted through the "factor"
-	 *	 variable.
+	 *    1. Determine the sample data size which depends on the used
+	 *	 sampling functions, for example, basic-sampling or
+	 *	 basic-sampling with diagnostic-sampling.
+	 *
+	 *    2. Use the sampling frequency as input.  The sampling buffer is
+	 *	 designed for almost one second.  This can be adjusted through
+	 *	 the "factor" variable.
 	 *	 In any case, alloc_sampling_buffer() sets the Alert Request
-	 *	 Control indicator to trigger measurement-alert to harvest
+	 *	 Control indicator to trigger a measurement-alert to harvest
 	 *	 sample-data-blocks (sdb).
 	 *
-	 *    2. Compute the number of sample-data-blocks and ensure a minimum
+	 *    3. Compute the number of sample-data-blocks and ensure a minimum
 	 *	 of CPUM_SF_MIN_SDB.  Also ensure the upper limit does not
-	 *	 exceed CPUM_SF_MAX_SDB.  See also the remarks for these
-	 *	 symbolic constants.
+	 *	 exceed a "calculated" maximum.  The symbolic maximum is
+	 *	 designed for basic-sampling only and needs to be increased if
+	 *	 diagnostic-sampling is active.
+	 *	 See also the remarks for these symbolic constants.
 	 *
-	 *    3. Compute number of pages used for the sample-data-block-table
-	 *	 and ensure a minimum of CPUM_SF_MIN_SDBT (at minimum one table
-	 *	 to manage up to 511 sample-data-blocks).
+	 *    4. Compute the number of sample-data-block-tables (SDBT) and
+	 *	 ensure a minimum of CPUM_SF_MIN_SDBT (one table can manage up
+	 *	 to 511 SDBs).
 	 */
+	sample_size = event_sample_size(hwc);
 	freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc));
 	factor = 1;
-	n_sdb = DIV_ROUND_UP(freq, factor * ((PAGE_SIZE-64) / cpuhw->qsi.bsdes));
+	n_sdb = DIV_ROUND_UP(freq, factor * ((PAGE_SIZE-64) / sample_size));
 	if (n_sdb < CPUM_SF_MIN_SDB)
 		n_sdb = CPUM_SF_MIN_SDB;
 
@@ -366,8 +440,10 @@ static int allocate_sdbt(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
 		return 0;
 
 	debug_sprintf_event(sfdbg, 3,
-			    "allocate_sdbt: rate=%lu f=%lu sdb=%lu/%lu cpuhw=%p\n",
-			    SAMPL_RATE(hwc), freq, n_sdb, CPUM_SF_MAX_SDB, cpuhw);
+			    "allocate_buffers: rate=%lu f=%lu sdb=%lu/%lu"
+			    " sample_size=%lu cpuhw=%p\n",
+			    SAMPL_RATE(hwc), freq, n_sdb, sfb_max_limit(hwc),
+			    sample_size, cpuhw);
 
 	return alloc_sampling_buffer(&cpuhw->sfb,
 				     sfb_pending_allocs(&cpuhw->sfb, hwc));
@@ -509,10 +585,8 @@ static void setup_pmc_cpu(void *flags)
 		if (err) {
 			pr_err("Switching off the sampling facility failed "
 			       "with rc=%i\n", err);
-		} else {
-			if (cpusf->sfb.sdbt)
-				free_sampling_buffer(&cpusf->sfb);
-		}
+		} else
+			deallocate_buffers(cpusf);
 		debug_sprintf_event(sfdbg, 5,
 				    "setup_pmc_cpu: released: cpuhw=%p\n", cpusf);
 		break;
@@ -550,6 +624,10 @@ static int reserve_pmc_hardware(void)
 
 static void hw_perf_event_destroy(struct perf_event *event)
 {
+	/* Free raw sample buffer */
+	if (RAWSAMPLE_REG(&event->hw))
+		kfree((void *) RAWSAMPLE_REG(&event->hw));
+
 	/* Release PMC if this is the last perf event */
 	if (!atomic_add_unless(&num_events, -1, 1)) {
 		mutex_lock(&pmc_reserve_mutex);
@@ -569,8 +647,15 @@ static void hw_init_period(struct hw_perf_event *hwc, u64 period)
 static void hw_reset_registers(struct hw_perf_event *hwc,
 			       unsigned long *sdbt_origin)
 {
+	struct sf_raw_sample *sfr;
+
 	/* (Re)set to first sample-data-block-table */
 	TEAR_REG(hwc) = (unsigned long) sdbt_origin;
+
+	/* (Re)set raw sampling buffer register */
+	sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(hwc);
+	memset(&sfr->basic, 0, sizeof(sfr->basic));
+	memset(&sfr->diag, 0, sfr->dsdes);
 }
 
 static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si,
@@ -634,6 +719,20 @@ static int __hw_perf_event_init(struct perf_event *event)
 		goto out;
 	}
 
+	/* Always enable basic sampling */
+	SAMPL_FLAGS(hwc) = PERF_CPUM_SF_BASIC_MODE;
+
+	/* Check if diagnostic sampling is requested.  Deny if the required
+	 * sampling authorization is missing.
+	 */
+	if (attr->config == PERF_EVENT_CPUM_SF_DIAG) {
+		if (!si.ad) {
+			err = -EPERM;
+			goto out;
+		}
+		SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_DIAG_MODE;
+	}
+
 	/* The sampling information (si) contains information about the
 	 * min/max sampling intervals and the CPU speed.  So calculate the
 	 * correct sampling interval and avoid the whole period adjust
@@ -679,14 +778,14 @@ static int __hw_perf_event_init(struct perf_event *event)
 	 */
 	if (cpuhw)
 		/* Event is pinned to a particular CPU */
-		err = allocate_sdbt(cpuhw, hwc);
+		err = allocate_buffers(cpuhw, hwc);
 	else {
 		/* Event is not pinned, allocate sampling buffer on
 		 * each online CPU
 		 */
 		for_each_online_cpu(cpu) {
 			cpuhw = &per_cpu(cpu_hw_sf, cpu);
-			err = allocate_sdbt(cpuhw, hwc);
+			err = allocate_buffers(cpuhw, hwc);
 			if (err)
 				break;
 		}
@@ -705,7 +804,8 @@ static int cpumsf_pmu_event_init(struct perf_event *event)
 
 	switch (event->attr.type) {
 	case PERF_TYPE_RAW:
-		if (event->attr.config != PERF_EVENT_CPUM_SF)
+		if ((event->attr.config != PERF_EVENT_CPUM_SF) &&
+		    (event->attr.config != PERF_EVENT_CPUM_SF_DIAG))
 			return -ENOENT;
 		break;
 	case PERF_TYPE_HARDWARE:
@@ -786,8 +886,9 @@ static void cpumsf_pmu_enable(struct pmu *pmu)
 		return;
 	}
 
-	debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i tear=%p dear=%p\n",
-			    cpuhw->lsctl.es, cpuhw->lsctl.cs,
+	debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i ed=%i cd=%i "
+			    "tear=%p dear=%p\n", cpuhw->lsctl.es, cpuhw->lsctl.cs,
+			    cpuhw->lsctl.ed, cpuhw->lsctl.cd,
 			    (void *) cpuhw->lsctl.tear, (void *) cpuhw->lsctl.dear);
 }
 
@@ -807,6 +908,7 @@ static void cpumsf_pmu_disable(struct pmu *pmu)
 	/* Switch off sampling activation control */
 	inactive = cpuhw->lsctl;
 	inactive.cs = 0;
+	inactive.cd = 0;
 
 	err = lsctl(&inactive);
 	if (err) {
@@ -867,21 +969,19 @@ static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs,
  *
  * Return non-zero if an event overflow occurred.
  */
-static int perf_push_sample(struct perf_event *event,
-			    struct hws_data_entry *sample)
+static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr)
 {
 	int overflow;
 	struct pt_regs regs;
 	struct perf_sf_sde_regs *sde_regs;
 	struct perf_sample_data data;
+	struct perf_raw_record raw;
 
-	/* Skip samples that are invalid or for which the instruction address
-	 * is not predictable.	For the latter, the wait-state bit is set.
-	 */
-	if (sample->I || sample->W)
-		return 0;
-
+	/* Setup perf sample */
 	perf_sample_data_init(&data, 0, event->hw.last_period);
+	raw.size = sfr->size;
+	raw.data = sfr;
+	data.raw = &raw;
 
 	/* Setup pt_regs to look like an CPU-measurement external interrupt
 	 * using the Program Request Alert code.  The regs.int_parm_long
@@ -893,14 +993,14 @@ static int perf_push_sample(struct perf_event *event,
 	regs.int_parm = CPU_MF_INT_SF_PRA;
 	sde_regs = (struct perf_sf_sde_regs *) &regs.int_parm_long;
 
-	regs.psw.addr = sample->ia;
-	if (sample->T)
+	regs.psw.addr = sfr->basic.ia;
+	if (sfr->basic.T)
 		regs.psw.mask |= PSW_MASK_DAT;
-	if (sample->W)
+	if (sfr->basic.W)
 		regs.psw.mask |= PSW_MASK_WAIT;
-	if (sample->P)
+	if (sfr->basic.P)
 		regs.psw.mask |= PSW_MASK_PSTATE;
-	switch (sample->AS) {
+	switch (sfr->basic.AS) {
 	case 0x0:
 		regs.psw.mask |= PSW_ASC_PRIMARY;
 		break;
@@ -922,7 +1022,7 @@ static int perf_push_sample(struct perf_event *event,
 	 * purposes too.
 	 * For now, simply use a non-zero value as guest indicator.
 	 */
-	if (sample->hpp)
+	if (sfr->basic.hpp)
 		sde_regs->in_guest = 1;
 
 	overflow = 0;
@@ -942,51 +1042,155 @@ static void perf_event_count_update(struct perf_event *event, u64 count)
 	local64_add(count, &event->count);
 }
 
+static int sample_format_is_valid(struct hws_combined_entry *sample,
+				   unsigned int flags)
+{
+	if (likely(flags & PERF_CPUM_SF_BASIC_MODE))
+		/* Only basic-sampling data entries with data-entry-format
+		 * version of 0x0001 can be processed.
+		 */
+		if (sample->basic.def != 0x0001)
+			return 0;
+	if (flags & PERF_CPUM_SF_DIAG_MODE)
+		/* The data-entry-format number of diagnostic-sampling data
+		 * entries can vary.  Because diagnostic data is just passed
+		 * through, do only a sanity check on the DEF.
+		 */
+		if (sample->diag.def < 0x8001)
+			return 0;
+	return 1;
+}
+
+static int sample_is_consistent(struct hws_combined_entry *sample,
+				unsigned long flags)
+{
+	/* This check applies only to basic-sampling data entries of potentially
+	 * combined-sampling data entries.  Invalid entries cannot be processed
+	 * by the PMU and, thus, do not deliver an associated
+	 * diagnostic-sampling data entry.
+	 */
+	if (unlikely(!(flags & PERF_CPUM_SF_BASIC_MODE)))
+		return 0;
+	/*
+	 * Samples are skipped, if they are invalid or for which the
+	 * instruction address is not predictable, i.e., the wait-state bit is
+	 * set.
+	 */
+	if (sample->basic.I || sample->basic.W)
+		return 0;
+	return 1;
+}
+
+static void reset_sample_slot(struct hws_combined_entry *sample,
+			      unsigned long flags)
+{
+	if (likely(flags & PERF_CPUM_SF_BASIC_MODE))
+		sample->basic.def = 0;
+	if (flags & PERF_CPUM_SF_DIAG_MODE)
+		sample->diag.def = 0;
+}
+
+static void sfr_store_sample(struct sf_raw_sample *sfr,
+			     struct hws_combined_entry *sample)
+{
+	if (likely(sfr->format & PERF_CPUM_SF_BASIC_MODE))
+		sfr->basic = sample->basic;
+	if (sfr->format & PERF_CPUM_SF_DIAG_MODE)
+		memcpy(&sfr->diag, &sample->diag, sfr->dsdes);
+}
+
+static void debug_sample_entry(struct hws_combined_entry *sample,
+			       struct hws_trailer_entry *te,
+			       unsigned long flags)
+{
+	debug_sprintf_event(sfdbg, 4, "hw_collect_samples: Found unknown "
+			    "sampling data entry: te->f=%i basic.def=%04x (%p)"
+			    " diag.def=%04x (%p)\n", te->f,
+			    sample->basic.def, &sample->basic,
+			    (flags & PERF_CPUM_SF_DIAG_MODE)
+					? sample->diag.def : 0xFFFF,
+			    (flags & PERF_CPUM_SF_DIAG_MODE)
+					?  &sample->diag : NULL);
+}
+
 /* hw_collect_samples() - Walk through a sample-data-block and collect samples
  * @event:	The perf event
  * @sdbt:	Sample-data-block table
  * @overflow:	Event overflow counter
  *
- * Walks through a sample-data-block and collects hardware sample-data that is
- * pushed to the perf event subsystem.	The overflow reports the number of
- * samples that has been discarded due to an event overflow.
+ * Walks through a sample-data-block and collects sampling data entries that are
+ * then pushed to the perf event subsystem.  Depending on the sampling function,
+ * there can be either basic-sampling or combined-sampling data entries.  A
+ * combined-sampling data entry consists of a basic- and a diagnostic-sampling
+ * data entry.	The sampling function is determined by the flags in the perf
+ * event hardware structure.  The function always works with a combined-sampling
+ * data entry but ignores the the diagnostic portion if it is not available.
+ *
+ * Note that the implementation focuses on basic-sampling data entries and, if
+ * such an entry is not valid, the entire combined-sampling data entry is
+ * ignored.
+ *
+ * The overflow variables counts the number of samples that has been discarded
+ * due to a perf event overflow.
  */
 static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
 			       unsigned long long *overflow)
 {
-	struct hws_data_entry *sample;
-	unsigned long *trailer;
+	unsigned long flags = SAMPL_FLAGS(&event->hw);
+	struct hws_combined_entry *sample;
+	struct hws_trailer_entry *te;
+	struct sf_raw_sample *sfr;
+	size_t sample_size;
+
+	/* Prepare and initialize raw sample data */
+	sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(&event->hw);
+	sfr->format = flags & PERF_CPUM_SF_MODE_MASK;
 
-	trailer = trailer_entry_ptr(*sdbt);
-	sample = (struct hws_data_entry *) *sdbt;
-	while ((unsigned long *) sample < trailer) {
+	sample_size = event_sample_size(&event->hw);
+	te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt);
+	sample = (struct hws_combined_entry *) *sdbt;
+	while ((unsigned long *) sample < (unsigned long *) te) {
 		/* Check for an empty sample */
-		if (!sample->def)
+		if (!sample->basic.def)
 			break;
 
 		/* Update perf event period */
 		perf_event_count_update(event, SAMPL_RATE(&event->hw));
 
-		/* Check for basic sampling mode */
-		if (sample->def == 0x0001) {
+		/* Check sampling data entry */
+		if (sample_format_is_valid(sample, flags)) {
 			/* If an event overflow occurred, the PMU is stopped to
 			 * throttle event delivery.  Remaining sample data is
 			 * discarded.
 			 */
-			if (!*overflow)
-				*overflow = perf_push_sample(event, sample);
-			else
+			if (!*overflow) {
+				if (sample_is_consistent(sample, flags)) {
+					/* Deliver sample data to perf */
+					sfr_store_sample(sfr, sample);
+					*overflow = perf_push_sample(event, sfr);
+				}
+			} else
 				/* Count discarded samples */
 				*overflow += 1;
-		} else
-			/* Sample slot is not yet written or other record */
-			debug_sprintf_event(sfdbg, 5, "hw_collect_samples: "
-					    "Unknown sample data entry format:"
-					    " %i\n", sample->def);
+		} else {
+			debug_sample_entry(sample, te, flags);
+			/* Sample slot is not yet written or other record.
+			 *
+			 * This condition can occur if the buffer was reused
+			 * from a combined basic- and diagnostic-sampling.
+			 * If only basic-sampling is then active, entries are
+			 * written into the larger diagnostic entries.
+			 * This is typically the case for sample-data-blocks
+			 * that are not full.  Stop processing if the first
+			 * invalid format was detected.
+			 */
+			if (!te->f)
+				break;
+		}
 
 		/* Reset sample slot and advance to next sample */
-		sample->def = 0;
-		sample++;
+		reset_sample_slot(sample, flags);
+		sample += sample_size;
 	}
 }
 
@@ -1104,6 +1308,8 @@ static void cpumsf_pmu_start(struct perf_event *event, int flags)
 	perf_pmu_disable(event->pmu);
 	event->hw.state = 0;
 	cpuhw->lsctl.cs = 1;
+	if (SAMPL_DIAG_MODE(&event->hw))
+		cpuhw->lsctl.cd = 1;
 	perf_pmu_enable(event->pmu);
 }
 
@@ -1119,6 +1325,7 @@ static void cpumsf_pmu_stop(struct perf_event *event, int flags)
 
 	perf_pmu_disable(event->pmu);
 	cpuhw->lsctl.cs = 0;
+	cpuhw->lsctl.cd = 0;
 	event->hw.state |= PERF_HES_STOPPED;
 
 	if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) {
@@ -1158,11 +1365,13 @@ static int cpumsf_pmu_add(struct perf_event *event, int flags)
 
 	/* Ensure sampling functions are in the disabled state.  If disabled,
 	 * switch on sampling enable control. */
-	if (WARN_ON_ONCE(cpuhw->lsctl.es == 1)) {
+	if (WARN_ON_ONCE(cpuhw->lsctl.es == 1 || cpuhw->lsctl.ed == 1)) {
 		err = -EAGAIN;
 		goto out;
 	}
 	cpuhw->lsctl.es = 1;
+	if (SAMPL_DIAG_MODE(&event->hw))
+		cpuhw->lsctl.ed = 1;
 
 	/* Set in_use flag and store event */
 	event->hw.idx = 0;	  /* only one sampling event per CPU supported */
@@ -1185,6 +1394,7 @@ static void cpumsf_pmu_del(struct perf_event *event, int flags)
 	cpumsf_pmu_stop(event, PERF_EF_UPDATE);
 
 	cpuhw->lsctl.es = 0;
+	cpuhw->lsctl.ed = 0;
 	cpuhw->flags &= ~PMU_F_IN_USE;
 	cpuhw->event = NULL;
 
@@ -1198,9 +1408,11 @@ static int cpumsf_pmu_event_idx(struct perf_event *event)
 }
 
 CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC, PERF_EVENT_CPUM_SF);
+CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC_DIAG, PERF_EVENT_CPUM_SF_DIAG);
 
 static struct attribute *cpumsf_pmu_events_attr[] = {
 	CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC),
+	CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG),
 	NULL,
 };
 
@@ -1351,8 +1563,9 @@ static int param_set_sfb_size(const char *val, const struct kernel_param *kp)
 		return rc;
 
 	sfb_set_limits(min, max);
-	pr_info("Changed sampling buffer settings: min=%lu max=%lu\n",
-		CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
+	pr_info("The sampling buffer limits have changed to: "
+		"min=%lu max=%lu (diag=x%lu)\n",
+		CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB, CPUM_SF_SDB_DIAG_FACTOR);
 	return 0;
 }
 
@@ -1362,13 +1575,38 @@ static struct kernel_param_ops param_ops_sfb_size = {
 	.get = param_get_sfb_size,
 };
 
+#define RS_INIT_FAILURE_QSI	  0x0001
+#define RS_INIT_FAILURE_BSDES	  0x0002
+#define RS_INIT_FAILURE_ALRT	  0x0003
+#define RS_INIT_FAILURE_PERF	  0x0004
+static void __init pr_cpumsf_err(unsigned int reason)
+{
+	pr_err("Sampling facility support for perf is not available: "
+	       "reason=%04x\n", reason);
+}
+
 static int __init init_cpum_sampling_pmu(void)
 {
+	struct hws_qsi_info_block si;
 	int err;
 
 	if (!cpum_sf_avail())
 		return -ENODEV;
 
+	memset(&si, 0, sizeof(si));
+	if (qsi(&si)) {
+		pr_cpumsf_err(RS_INIT_FAILURE_QSI);
+		return -ENODEV;
+	}
+
+	if (si.bsdes != sizeof(struct hws_basic_entry)) {
+		pr_cpumsf_err(RS_INIT_FAILURE_BSDES);
+		return -EINVAL;
+	}
+
+	if (si.ad)
+		sfb_set_limits(CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
+
 	sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80);
 	if (!sfdbg)
 		pr_err("Registering for s390dbf failed\n");
@@ -1376,13 +1614,13 @@ static int __init init_cpum_sampling_pmu(void)
 
 	err = register_external_interrupt(0x1407, cpumf_measurement_alert);
 	if (err) {
-		pr_err("Failed to register for CPU-measurement alerts\n");
+		pr_cpumsf_err(RS_INIT_FAILURE_ALRT);
 		goto out;
 	}
 
 	err = perf_pmu_register(&cpumf_sampling, "cpum_sf", PERF_TYPE_RAW);
 	if (err) {
-		pr_err("Failed to register cpum_sf pmu\n");
+		pr_cpumsf_err(RS_INIT_FAILURE_PERF);
 		unregister_external_interrupt(0x1407, cpumf_measurement_alert);
 		goto out;
 	}
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index 60a68261d091..91aa215f947f 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -139,16 +139,21 @@ static void print_debug_sf(void)
 	int cpu = smp_processor_id();
 
 	memset(&si, 0, sizeof(si));
-	if (qsi(&si)) {
-		pr_err("CPU[%i]: CPM_SF: qsi failed\n");
+	if (qsi(&si))
 		return;
-	}
 
-	pr_info("CPU[%i]: CPM_SF: as=%i es=%i cs=%i bsdes=%i dsdes=%i"
-		" min=%i max=%i cpu_speed=%i tear=%p dear=%p\n",
-		cpu, si.as, si.es, si.cs, si.bsdes, si.dsdes,
-		si.min_sampl_rate, si.max_sampl_rate, si.cpu_speed,
-		si.tear, si.dear);
+	pr_info("CPU[%i] CPUM_SF: basic=%i diag=%i min=%i max=%i cpu_speed=%i\n",
+		cpu, si.as, si.ad, si.min_sampl_rate, si.max_sampl_rate,
+		si.cpu_speed);
+
+	if (si.as)
+		pr_info("CPU[%i] CPUM_SF: Basic-sampling: a=%i e=%i c=%i"
+			" bsdes=%i tear=%p dear=%p\n", cpu,
+			si.as, si.es, si.cs, si.bsdes, si.tear, si.dear);
+	if (si.ad)
+		pr_info("CPU[%i] CPUM_SF: Diagnostic-sampling: a=%i e=%i c=%i"
+			" dsdes=%i tear=%p dear=%p\n", cpu,
+			si.ad, si.ed, si.cd, si.dsdes, si.tear, si.dear);
 }
 
 void perf_event_print_debug(void)
diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c
index eb095874540d..a32c96761eab 100644
--- a/arch/s390/oprofile/hwsampler.c
+++ b/arch/s390/oprofile/hwsampler.c
@@ -799,7 +799,7 @@ static void worker_on_interrupt(unsigned int cpu)
 static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt,
 		unsigned long *dear)
 {
-	struct hws_data_entry *sample_data_ptr;
+	struct hws_basic_entry *sample_data_ptr;
 	unsigned long *trailer;
 
 	trailer = trailer_entry_ptr(*sdbt);
@@ -809,7 +809,7 @@ static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt,
 		trailer = dear;
 	}
 
-	sample_data_ptr = (struct hws_data_entry *)(*sdbt);
+	sample_data_ptr = (struct hws_basic_entry *)(*sdbt);
 
 	while ((unsigned long *)sample_data_ptr < trailer) {
 		struct pt_regs *regs = NULL;
-- 
cgit v1.2.3


From d7528862cf035994972c2c6f42c927db78f2f3a2 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Fri, 13 Dec 2013 12:45:01 +0100
Subject: s390/cpum_sf: Add flag to process full SDBs only

Add the PERF_CPUM_SF_FULL_BLOCKS flag to process only sample-data-blocks that
have the block-full-indicator bit set.  Sample-data-blocks that are partially
filled are discarded.  Use this flag if the sampling buffer is likely to be
shared among perf events that use different sampling modes.  In such
environments, flushing sample-data-blocks that are not completely filled, might
cause invalid-data-formats.

Setting PERF_CPUM_SF_FULL_BLOCKS prevents potentially invalid sampling data to
be processed but, in contrast, also discards valid samples in partially filled
sample-data-blocks.  Note that sample-data-blocks might not become full for
small sampling frequencies or for workload that is scheduled for tiny intervals.

To sample with the PERF_CPUM_SF_FULL_BLOCKS flag, set the perf->attr.config1
to 0x0004.  For example:

	perf record -e cpum_sf/config=0xB000,config1=0x0004/

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/perf_event.h |  2 ++
 arch/s390/kernel/perf_cpum_sf.c    | 13 +++++++++++--
 2 files changed, 13 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index bd4573f1d65c..159a8ec6da9a 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -59,6 +59,7 @@ struct perf_sf_sde_regs {
 #define PERF_CPUM_SF_DIAG_MODE		0x0002	  /* Diagnostic-sampling flag */
 #define PERF_CPUM_SF_MODE_MASK		(PERF_CPUM_SF_BASIC_MODE| \
 					 PERF_CPUM_SF_DIAG_MODE)
+#define PERF_CPUM_SF_FULL_BLOCKS	0x0004	  /* Process full SDBs only */
 
 #define REG_NONE		0
 #define REG_OVERFLOW		1
@@ -69,6 +70,7 @@ struct perf_sf_sde_regs {
 #define SAMPL_RATE(hwc)		((hwc)->event_base)
 #define SAMPL_FLAGS(hwc)	((hwc)->config_base)
 #define SAMPL_DIAG_MODE(hwc)	(SAMPL_FLAGS(hwc) & PERF_CPUM_SF_DIAG_MODE)
+#define SDB_FULL_BLOCKS(hwc)	(SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FULL_BLOCKS)
 
 /* Structure for sampling data entries to be passed as perf raw sample data
  * to user space.  Note that raw sample data must be aligned and, thus, might
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index b4ec058c4f10..3c3bc8d7b220 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -733,6 +733,10 @@ static int __hw_perf_event_init(struct perf_event *event)
 		SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_DIAG_MODE;
 	}
 
+	/* Check and set other sampling flags */
+	if (attr->config1 & PERF_CPUM_SF_FULL_BLOCKS)
+		SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_FULL_BLOCKS;
+
 	/* The sampling information (si) contains information about the
 	 * min/max sampling intervals and the CPU speed.  So calculate the
 	 * correct sampling interval and avoid the whole period adjust
@@ -1203,8 +1207,10 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
  * register of the specified perf event.
  *
  * Only full sample-data-blocks are processed.	Specify the flash_all flag
- * to also walk through partially filled sample-data-blocks.
- *
+ * to also walk through partially filled sample-data-blocks.  It is ignored
+ * if PERF_CPUM_SF_FULL_BLOCKS is set.	The PERF_CPUM_SF_FULL_BLOCKS flag
+ * enforces the processing of full sample-data-blocks only (trailer entries
+ * with the block-full-indicator bit set).
  */
 static void hw_perf_event_update(struct perf_event *event, int flush_all)
 {
@@ -1214,6 +1220,9 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
 	unsigned long long event_overflow, sampl_overflow, num_sdb, te_flags;
 	int done;
 
+	if (flush_all && SDB_FULL_BLOCKS(hwc))
+		flush_all = 0;
+
 	sdbt = (unsigned long *) TEAR_REG(hwc);
 	done = event_overflow = sampl_overflow = num_sdb = 0;
 	while (!done) {
-- 
cgit v1.2.3


From 61aa4884b70cdf3b2d373e18ebbbada43789eade Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 1 Nov 2013 10:08:20 +0100
Subject: s390: use IS_ENABLED to check if a CONFIG is set to y or m

This is shorter and should be used instead of the longer form
which checks for both possible config options.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/entry64.S    | 6 +++---
 arch/s390/kernel/perf_event.c | 2 +-
 arch/s390/kernel/s390_ksyms.c | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index e5b43c97a834..9532fe23be47 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -74,7 +74,7 @@ _TIF_TRACE    = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
 	.endm
 
 	.macro LPP newpp
-#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
+#if IS_ENABLED(CONFIG_KVM)
 	tm	__LC_MACHINE_FLAGS+6,0x20	# MACHINE_FLAG_LPP
 	jz	.+8
 	.insn	s,0xb2800000,\newpp
@@ -82,7 +82,7 @@ _TIF_TRACE    = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
 	.endm
 
 	.macro	HANDLE_SIE_INTERCEPT scratch,reason
-#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
+#if IS_ENABLED(CONFIG_KVM)
 	tmhh	%r8,0x0001		# interrupting from user ?
 	jnz	.+62
 	lgr	\scratch,%r9
@@ -946,7 +946,7 @@ cleanup_idle_insn:
 	.quad	__critical_end - __critical_start
 
 
-#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
+#if IS_ENABLED(CONFIG_KVM)
 /*
  * sie64a calling convention:
  * %r2 pointer to sie control block
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index 91aa215f947f..a76d602f5928 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -60,7 +60,7 @@ static bool is_in_guest(struct pt_regs *regs)
 {
 	if (user_mode(regs))
 		return false;
-#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
+#if IS_ENABLED(CONFIG_KVM)
 	return instruction_pointer(regs) == (unsigned long) &sie_exit;
 #else
 	return false;
diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c
index 3bac589844a7..9f60467938d1 100644
--- a/arch/s390/kernel/s390_ksyms.c
+++ b/arch/s390/kernel/s390_ksyms.c
@@ -5,7 +5,7 @@
 #ifdef CONFIG_FUNCTION_TRACER
 EXPORT_SYMBOL(_mcount);
 #endif
-#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
+#if IS_ENABLED(CONFIG_KVM)
 EXPORT_SYMBOL(sie64a);
 EXPORT_SYMBOL(sie_exit);
 #endif
-- 
cgit v1.2.3


From 7fd565e27547c913b83b46d94662103be81a88ec Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Mon, 16 Dec 2013 21:20:23 +0100
Subject: perf/x86: enable Haswell Celeron RAPL support

Enable RAPL support for Haswell Celeron (model 69).

Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: ak@linux.intel.com
Cc: acme@redhat.com
Cc: jolsa@redhat.com
Cc: zheng.z.yan@intel.com
Cc: bp@alien8.de
Cc: vincent.weaver@maine.edu
Cc: maria.n.dimakopoulou@gmail.com
Cc: peterz@infradead.org
Link: http://lkml.kernel.org/r/1387225224-27799-2-git-send-email-eranian@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_rapl.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
index bf8e4a736d48..0e3754e450d9 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
@@ -615,6 +615,7 @@ static int __init rapl_pmu_init(void)
 	case 42: /* Sandy Bridge */
 	case 58: /* Ivy Bridge */
 	case 60: /* Haswell */
+	case 69: /* Haswell-Celeron */
 		rapl_cntr_mask = RAPL_IDX_CLN;
 		rapl_pmu_events_group.attrs = rapl_events_cln_attr;
 		break;
-- 
cgit v1.2.3


From 9450d14fb959336803e5209119eb422b667b96aa Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Wed, 27 Nov 2013 13:54:39 +0000
Subject: Revert "ARM: 7556/1: perf: fix updated event period in response to
 PERF_EVENT_IOC_PERIOD"

This reverts commit 3581fe0ef37ce12ac7a4f74831168352ae848edc.

Fixes to the handling of PERF_EVENT_IOC_PERIOD in the core code mean
we no longer have to play this horrible game.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1385560479-11014-2-git-send-email-will.deacon@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm/kernel/perf_event.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index bc3f2efa0d86..789d846a9184 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -99,10 +99,6 @@ int armpmu_event_set_period(struct perf_event *event)
 	s64 period = hwc->sample_period;
 	int ret = 0;
 
-	/* The period may have been changed by PERF_EVENT_IOC_PERIOD */
-	if (unlikely(period != hwc->last_period))
-		left = period - (hwc->last_period - left);
-
 	if (unlikely(left <= -period)) {
 		left = period;
 		local64_set(&hwc->period_left, left);
-- 
cgit v1.2.3


From d80512f87474f2dfd67ef931737659acce20fe69 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 16 Dec 2013 14:31:26 +0100
Subject: s390/smp: improve setup of possible cpu mask

Since under z/VM we cannot have more than 64 cpus, make sure the
cpu_possible_mask does not contain more bits.
This avoids wasting memory for dynamic per-cpu allocations if
CONFIG_NR_CPUS is larger than 64.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/Kconfig           |  1 -
 arch/s390/include/asm/smp.h |  2 ++
 arch/s390/kernel/setup.c    |  1 +
 arch/s390/kernel/smp.c      | 25 ++++++++++++++++---------
 4 files changed, 19 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 1e1a03d2d19f..e9f312532526 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -135,7 +135,6 @@ config S390
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_UID16 if 32BIT
 	select HAVE_VIRT_CPU_ACCOUNTING
-	select INIT_ALL_POSSIBLE
 	select KTIME_SCALAR if 32BIT
 	select MODULES_USE_ELF_RELA
 	select OLD_SIGACTION
diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h
index ac9bed8e103f..160779394096 100644
--- a/arch/s390/include/asm/smp.h
+++ b/arch/s390/include/asm/smp.h
@@ -31,6 +31,7 @@ extern void smp_yield(void);
 extern void smp_stop_cpu(void);
 extern void smp_cpu_set_polarization(int cpu, int val);
 extern int smp_cpu_get_polarization(int cpu);
+extern void smp_fill_possible_mask(void);
 
 #else /* CONFIG_SMP */
 
@@ -50,6 +51,7 @@ static inline int smp_vcpu_scheduled(int cpu) { return 1; }
 static inline void smp_yield_cpu(int cpu) { }
 static inline void smp_yield(void) { }
 static inline void smp_stop_cpu(void) { }
+static inline void smp_fill_possible_mask(void) { }
 
 #endif /* CONFIG_SMP */
 
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 4444875266ee..0f3d44ecbfc6 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -1023,6 +1023,7 @@ void __init setup_arch(char **cmdline_p)
 	setup_vmcoreinfo();
 	setup_lowcore();
 
+	smp_fill_possible_mask();
         cpu_init();
 	s390_init_cpu_topology();
 
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index dc4a53465060..958704798f4a 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -721,18 +721,14 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
 	return 0;
 }
 
-static int __init setup_possible_cpus(char *s)
-{
-	int max, cpu;
+static unsigned int setup_possible_cpus __initdata;
 
-	if (kstrtoint(s, 0, &max) < 0)
-		return 0;
-	init_cpu_possible(cpumask_of(0));
-	for (cpu = 1; cpu < max && cpu < nr_cpu_ids; cpu++)
-		set_cpu_possible(cpu, true);
+static int __init _setup_possible_cpus(char *s)
+{
+	get_option(&s, &setup_possible_cpus);
 	return 0;
 }
-early_param("possible_cpus", setup_possible_cpus);
+early_param("possible_cpus", _setup_possible_cpus);
 
 #ifdef CONFIG_HOTPLUG_CPU
 
@@ -775,6 +771,17 @@ void __noreturn cpu_die(void)
 
 #endif /* CONFIG_HOTPLUG_CPU */
 
+void __init smp_fill_possible_mask(void)
+{
+	unsigned int possible, cpu;
+
+	possible = setup_possible_cpus;
+	if (!possible)
+		possible = MACHINE_IS_VM ? 64 : nr_cpu_ids;
+	for (cpu = 0; cpu < possible && cpu < nr_cpu_ids; cpu++)
+		set_cpu_possible(cpu, true);
+}
+
 void __init smp_prepare_cpus(unsigned int max_cpus)
 {
 	/* request the 0x1201 emergency signal external interrupt */
-- 
cgit v1.2.3


From 9efe4f2992025c3a4027c60bf36ae9d710ca3781 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Tue, 17 Dec 2013 13:41:31 +0100
Subject: s390/mm: optimize randomize_et_dyn for !PF_RANDOMIZE

Skip the call to brk_rnd() if the PF_RANDOMIZE flag is not set for
the process. This avoids the costly get_random_int() call. Modify
arch_randomize_brk() as well to make it look like randomize_et_dyn().

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/process.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 7ed0d4e2a435..dd145321d215 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -261,20 +261,18 @@ static inline unsigned long brk_rnd(void)
 
 unsigned long arch_randomize_brk(struct mm_struct *mm)
 {
-	unsigned long ret = PAGE_ALIGN(mm->brk + brk_rnd());
+	unsigned long ret;
 
-	if (ret < mm->brk)
-		return mm->brk;
-	return ret;
+	ret = PAGE_ALIGN(mm->brk + brk_rnd());
+	return (ret > mm->brk) ? ret : mm->brk;
 }
 
 unsigned long randomize_et_dyn(unsigned long base)
 {
-	unsigned long ret = PAGE_ALIGN(base + brk_rnd());
+	unsigned long ret;
 
 	if (!(current->flags & PF_RANDOMIZE))
 		return base;
-	if (ret < base)
-		return base;
-	return ret;
+	ret = PAGE_ALIGN(base + brk_rnd());
+	return (ret > base) ? ret : base;
 }
-- 
cgit v1.2.3


From 91f3e3eaba4413e76ce8e12e3ef10525a889142f Mon Sep 17 00:00:00 2001
From: Ingo Tuchscherer <ingo.tuchscherer@de.ibm.com>
Date: Wed, 20 Nov 2013 10:47:13 +0100
Subject: s390/zcrypt: add support for EP11 coprocessor cards

This feature extends the generic cryptographic device driver (zcrypt)
with a new capability to service EP11 requests for the Crypto Express4S
card in EP11 (Enterprise PKCS#11 mode) coprocessor mode.

Signed-off-by: Ingo Tuchscherer <ingo.tuchscherer@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 Documentation/kmsg/s390/zcrypt         |  20 +++
 arch/s390/include/uapi/asm/zcrypt.h    |  65 +++++++++
 drivers/s390/crypto/ap_bus.c           |  31 +++-
 drivers/s390/crypto/ap_bus.h           |   4 +-
 drivers/s390/crypto/zcrypt_api.c       | 109 +++++++++++++-
 drivers/s390/crypto/zcrypt_api.h       |   2 +
 drivers/s390/crypto/zcrypt_cex4.c      |  20 ++-
 drivers/s390/crypto/zcrypt_error.h     |  18 ++-
 drivers/s390/crypto/zcrypt_msgtype50.c |  12 ++
 drivers/s390/crypto/zcrypt_msgtype6.c  | 260 +++++++++++++++++++++++++++++++++
 drivers/s390/crypto/zcrypt_msgtype6.h  |   2 +
 drivers/s390/crypto/zcrypt_pcica.c     |  11 ++
 drivers/s390/crypto/zcrypt_pcicc.c     |  12 ++
 13 files changed, 552 insertions(+), 14 deletions(-)
 create mode 100644 Documentation/kmsg/s390/zcrypt

(limited to 'arch')

diff --git a/Documentation/kmsg/s390/zcrypt b/Documentation/kmsg/s390/zcrypt
new file mode 100644
index 000000000000..7fb2087409d6
--- /dev/null
+++ b/Documentation/kmsg/s390/zcrypt
@@ -0,0 +1,20 @@
+/*?
+ * Text: "Cryptographic device %x failed and was set offline\n"
+ * Severity: Error
+ * Parameter:
+ *   @1: device index
+ * Description:
+ * A cryptographic device failed to process a cryptographic request.
+ * The cryptographic device driver could not correct the error and
+ * set the device offline. The application that issued the
+ * request received an indication that the request has failed.
+ * User action:
+ * Use the lszcrypt command to confirm that the cryptographic
+ * hardware is still configured to your LPAR or z/VM guest virtual
+ * machine. If the device is available to your Linux instance the
+ * command output contains a line that begins with 'card<device index>',
+ * where <device index> is the two-digit decimal number in the message text.
+ * After ensuring that the device is available, use the chzcrypt command to
+ * set it online again.
+ * If the error persists, contact your support organization.
+ */
diff --git a/arch/s390/include/uapi/asm/zcrypt.h b/arch/s390/include/uapi/asm/zcrypt.h
index e83fc116f5bf..f2b18eacaca8 100644
--- a/arch/s390/include/uapi/asm/zcrypt.h
+++ b/arch/s390/include/uapi/asm/zcrypt.h
@@ -154,6 +154,67 @@ struct ica_xcRB {
 	unsigned short	priority_window;
 	unsigned int	status;
 } __attribute__((packed));
+
+/**
+ * struct ep11_cprb - EP11 connectivity programming request block
+ * @cprb_len:		CPRB header length [0x0020]
+ * @cprb_ver_id:	CPRB version id.   [0x04]
+ * @pad_000:		Alignment pad bytes
+ * @flags:		Admin cmd [0x80] or functional cmd [0x00]
+ * @func_id:		Function id / subtype [0x5434]
+ * @source_id:		Source id [originator id]
+ * @target_id:		Target id [usage/ctrl domain id]
+ * @ret_code:		Return code
+ * @reserved1:		Reserved
+ * @reserved2:		Reserved
+ * @payload_len:	Payload length
+ */
+struct ep11_cprb {
+	uint16_t	cprb_len;
+	unsigned char	cprb_ver_id;
+	unsigned char	pad_000[2];
+	unsigned char	flags;
+	unsigned char	func_id[2];
+	uint32_t	source_id;
+	uint32_t	target_id;
+	uint32_t	ret_code;
+	uint32_t	reserved1;
+	uint32_t	reserved2;
+	uint32_t	payload_len;
+} __attribute__((packed));
+
+/**
+ * struct ep11_target_dev - EP11 target device list
+ * @ap_id:	AP device id
+ * @dom_id:	Usage domain id
+ */
+struct ep11_target_dev {
+	uint16_t ap_id;
+	uint16_t dom_id;
+};
+
+/**
+ * struct ep11_urb - EP11 user request block
+ * @targets_num:	Number of target adapters
+ * @targets:		Addr to target adapter list
+ * @weight:		Level of request priority
+ * @req_no:		Request id/number
+ * @req_len:		Request length
+ * @req:		Addr to request block
+ * @resp_len:		Response length
+ * @resp:		Addr to response block
+ */
+struct ep11_urb {
+	uint16_t		targets_num;
+	uint64_t		targets;
+	uint64_t		weight;
+	uint64_t		req_no;
+	uint64_t		req_len;
+	uint64_t		req;
+	uint64_t		resp_len;
+	uint64_t		resp;
+} __attribute__((packed));
+
 #define AUTOSELECT ((unsigned int)0xFFFFFFFF)
 
 #define ZCRYPT_IOCTL_MAGIC 'z'
@@ -183,6 +244,9 @@ struct ica_xcRB {
  *   ZSECSENDCPRB
  *     Send an arbitrary CPRB to a crypto card.
  *
+ *   ZSENDEP11CPRB
+ *     Send an arbitrary EP11 CPRB to an EP11 coprocessor crypto card.
+ *
  *   Z90STAT_STATUS_MASK
  *     Return an 64 element array of unsigned chars for the status of
  *     all devices.
@@ -256,6 +320,7 @@ struct ica_xcRB {
 #define ICARSAMODEXPO	_IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x05, 0)
 #define ICARSACRT	_IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x06, 0)
 #define ZSECSENDCPRB	_IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x81, 0)
+#define ZSENDEP11CPRB	_IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x04, 0)
 
 /* New status calls */
 #define Z90STAT_TOTALCOUNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x40, int)
diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
index 02300dcfac91..ab3baa7f9508 100644
--- a/drivers/s390/crypto/ap_bus.c
+++ b/drivers/s390/crypto/ap_bus.c
@@ -591,7 +591,13 @@ static int ap_init_queue(ap_qid_t qid)
 		if (rc != -ENODEV && rc != -EBUSY)
 			break;
 		if (i < AP_MAX_RESET - 1) {
-			udelay(5);
+			/* Time we are waiting until we give up (0.7sec * 90).
+			 * Since the actual request (in progress) will not
+			 * interrupted immediately for the reset command,
+			 * we have to be patient. In worst case we have to
+			 * wait 60sec + reset time (some msec).
+			 */
+			schedule_timeout(AP_RESET_TIMEOUT);
 			status = ap_test_queue(qid, &dummy, &dummy);
 		}
 	}
@@ -992,6 +998,28 @@ static ssize_t ap_domain_show(struct bus_type *bus, char *buf)
 
 static BUS_ATTR(ap_domain, 0444, ap_domain_show, NULL);
 
+static ssize_t ap_control_domain_mask_show(struct bus_type *bus, char *buf)
+{
+	if (ap_configuration != NULL) { /* QCI not supported */
+		if (test_facility(76)) { /* format 1 - 256 bit domain field */
+			return snprintf(buf, PAGE_SIZE,
+				"0x%08x%08x%08x%08x%08x%08x%08x%08x\n",
+			ap_configuration->adm[0], ap_configuration->adm[1],
+			ap_configuration->adm[2], ap_configuration->adm[3],
+			ap_configuration->adm[4], ap_configuration->adm[5],
+			ap_configuration->adm[6], ap_configuration->adm[7]);
+		} else { /* format 0 - 16 bit domain field */
+			return snprintf(buf, PAGE_SIZE, "%08x%08x\n",
+			ap_configuration->adm[0], ap_configuration->adm[1]);
+		  }
+	} else {
+		return snprintf(buf, PAGE_SIZE, "not supported\n");
+	  }
+}
+
+static BUS_ATTR(ap_control_domain_mask, 0444,
+		ap_control_domain_mask_show, NULL);
+
 static ssize_t ap_config_time_show(struct bus_type *bus, char *buf)
 {
 	return snprintf(buf, PAGE_SIZE, "%d\n", ap_config_time);
@@ -1077,6 +1105,7 @@ static BUS_ATTR(poll_timeout, 0644, poll_timeout_show, poll_timeout_store);
 
 static struct bus_attribute *const ap_bus_attrs[] = {
 	&bus_attr_ap_domain,
+	&bus_attr_ap_control_domain_mask,
 	&bus_attr_config_time,
 	&bus_attr_poll_thread,
 	&bus_attr_ap_interrupts,
diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h
index 685f6cc022f9..6405ae24a7a6 100644
--- a/drivers/s390/crypto/ap_bus.h
+++ b/drivers/s390/crypto/ap_bus.h
@@ -33,7 +33,7 @@
 #define AP_DEVICES 64		/* Number of AP devices. */
 #define AP_DOMAINS 16		/* Number of AP domains. */
 #define AP_MAX_RESET 90		/* Maximum number of resets. */
-#define AP_RESET_TIMEOUT (HZ/2)	/* Time in ticks for reset timeouts. */
+#define AP_RESET_TIMEOUT (HZ*0.7)	/* Time in ticks for reset timeouts. */
 #define AP_CONFIG_TIME 30	/* Time in seconds between AP bus rescans. */
 #define AP_POLL_TIME 1		/* Time in ticks between receive polls. */
 
@@ -125,6 +125,8 @@ static inline int ap_test_bit(unsigned int *ptr, unsigned int nr)
 #define AP_FUNC_CRT4K 2
 #define AP_FUNC_COPRO 3
 #define AP_FUNC_ACCEL 4
+#define AP_FUNC_EP11  5
+#define AP_FUNC_APXA  6
 
 /*
  * AP reset flag states
diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c
index 31cfaa556072..4b824b15194f 100644
--- a/drivers/s390/crypto/zcrypt_api.c
+++ b/drivers/s390/crypto/zcrypt_api.c
@@ -44,6 +44,8 @@
 #include "zcrypt_debug.h"
 #include "zcrypt_api.h"
 
+#include "zcrypt_msgtype6.h"
+
 /*
  * Module description.
  */
@@ -554,9 +556,9 @@ static long zcrypt_send_cprb(struct ica_xcRB *xcRB)
 	spin_lock_bh(&zcrypt_device_lock);
 	list_for_each_entry(zdev, &zcrypt_device_list, list) {
 		if (!zdev->online || !zdev->ops->send_cprb ||
-		    (xcRB->user_defined != AUTOSELECT &&
-			AP_QID_DEVICE(zdev->ap_dev->qid) != xcRB->user_defined)
-		    )
+		   (zdev->ops->variant == MSGTYPE06_VARIANT_EP11) ||
+		   (xcRB->user_defined != AUTOSELECT &&
+		    AP_QID_DEVICE(zdev->ap_dev->qid) != xcRB->user_defined))
 			continue;
 		zcrypt_device_get(zdev);
 		get_device(&zdev->ap_dev->device);
@@ -581,6 +583,90 @@ static long zcrypt_send_cprb(struct ica_xcRB *xcRB)
 	return -ENODEV;
 }
 
+struct ep11_target_dev_list {
+	unsigned short		targets_num;
+	struct ep11_target_dev	*targets;
+};
+
+static bool is_desired_ep11dev(unsigned int dev_qid,
+			       struct ep11_target_dev_list dev_list)
+{
+	int n;
+
+	for (n = 0; n < dev_list.targets_num; n++, dev_list.targets++) {
+		if ((AP_QID_DEVICE(dev_qid) == dev_list.targets->ap_id) &&
+		    (AP_QID_QUEUE(dev_qid) == dev_list.targets->dom_id)) {
+			return true;
+		}
+	}
+	return false;
+}
+
+static long zcrypt_send_ep11_cprb(struct ep11_urb *xcrb)
+{
+	struct zcrypt_device *zdev;
+	bool autoselect = false;
+	int rc;
+	struct ep11_target_dev_list ep11_dev_list = {
+		.targets_num	=  0x00,
+		.targets	=  NULL,
+	};
+
+	ep11_dev_list.targets_num = (unsigned short) xcrb->targets_num;
+
+	/* empty list indicates autoselect (all available targets) */
+	if (ep11_dev_list.targets_num == 0)
+		autoselect = true;
+	else {
+		ep11_dev_list.targets = kcalloc((unsigned short)
+						xcrb->targets_num,
+						sizeof(struct ep11_target_dev),
+						GFP_KERNEL);
+		if (!ep11_dev_list.targets)
+			return -ENOMEM;
+
+		if (copy_from_user(ep11_dev_list.targets,
+				   (struct ep11_target_dev *)xcrb->targets,
+				   xcrb->targets_num *
+				   sizeof(struct ep11_target_dev)))
+			return -EFAULT;
+	}
+
+	spin_lock_bh(&zcrypt_device_lock);
+	list_for_each_entry(zdev, &zcrypt_device_list, list) {
+		/* check if device is eligible */
+		if (!zdev->online ||
+		    zdev->ops->variant != MSGTYPE06_VARIANT_EP11)
+			continue;
+
+		/* check if device is selected as valid target */
+		if (!is_desired_ep11dev(zdev->ap_dev->qid, ep11_dev_list) &&
+		    !autoselect)
+			continue;
+
+		zcrypt_device_get(zdev);
+		get_device(&zdev->ap_dev->device);
+		zdev->request_count++;
+		__zcrypt_decrease_preference(zdev);
+		if (try_module_get(zdev->ap_dev->drv->driver.owner)) {
+			spin_unlock_bh(&zcrypt_device_lock);
+			rc = zdev->ops->send_ep11_cprb(zdev, xcrb);
+			spin_lock_bh(&zcrypt_device_lock);
+			module_put(zdev->ap_dev->drv->driver.owner);
+		} else {
+			rc = -EAGAIN;
+		  }
+		zdev->request_count--;
+		__zcrypt_increase_preference(zdev);
+		put_device(&zdev->ap_dev->device);
+		zcrypt_device_put(zdev);
+		spin_unlock_bh(&zcrypt_device_lock);
+		return rc;
+	}
+	spin_unlock_bh(&zcrypt_device_lock);
+	return -ENODEV;
+}
+
 static long zcrypt_rng(char *buffer)
 {
 	struct zcrypt_device *zdev;
@@ -784,6 +870,23 @@ static long zcrypt_unlocked_ioctl(struct file *filp, unsigned int cmd,
 			return -EFAULT;
 		return rc;
 	}
+	case ZSENDEP11CPRB: {
+		struct ep11_urb __user *uxcrb = (void __user *)arg;
+		struct ep11_urb xcrb;
+		if (copy_from_user(&xcrb, uxcrb, sizeof(xcrb)))
+			return -EFAULT;
+		do {
+			rc = zcrypt_send_ep11_cprb(&xcrb);
+		} while (rc == -EAGAIN);
+		/* on failure: retry once again after a requested rescan */
+		if ((rc == -ENODEV) && (zcrypt_process_rescan()))
+			do {
+				rc = zcrypt_send_ep11_cprb(&xcrb);
+			} while (rc == -EAGAIN);
+		if (copy_to_user(uxcrb, &xcrb, sizeof(xcrb)))
+			return -EFAULT;
+		return rc;
+	}
 	case Z90STAT_STATUS_MASK: {
 		char status[AP_DEVICES];
 		zcrypt_status_mask(status);
diff --git a/drivers/s390/crypto/zcrypt_api.h b/drivers/s390/crypto/zcrypt_api.h
index 89632919c993..b3d496bfaa7e 100644
--- a/drivers/s390/crypto/zcrypt_api.h
+++ b/drivers/s390/crypto/zcrypt_api.h
@@ -74,6 +74,7 @@ struct ica_z90_status {
 #define ZCRYPT_CEX2A		6
 #define ZCRYPT_CEX3C		7
 #define ZCRYPT_CEX3A		8
+#define ZCRYPT_CEX4	       10
 
 /**
  * Large random numbers are pulled in 4096 byte chunks from the crypto cards
@@ -89,6 +90,7 @@ struct zcrypt_ops {
 	long (*rsa_modexpo_crt)(struct zcrypt_device *,
 				struct ica_rsa_modexpo_crt *);
 	long (*send_cprb)(struct zcrypt_device *, struct ica_xcRB *);
+	long (*send_ep11_cprb)(struct zcrypt_device *, struct ep11_urb *);
 	long (*rng)(struct zcrypt_device *, char *);
 	struct list_head list;		/* zcrypt ops list. */
 	struct module *owner;
diff --git a/drivers/s390/crypto/zcrypt_cex4.c b/drivers/s390/crypto/zcrypt_cex4.c
index ce1226398ac9..569f8b1d86c0 100644
--- a/drivers/s390/crypto/zcrypt_cex4.c
+++ b/drivers/s390/crypto/zcrypt_cex4.c
@@ -30,7 +30,12 @@
 #define CEX4A_MAX_MESSAGE_SIZE	MSGTYPE50_CRB3_MAX_MSG_SIZE
 #define CEX4C_MAX_MESSAGE_SIZE	MSGTYPE06_MAX_MSG_SIZE
 
-#define CEX4_CLEANUP_TIME	(15*HZ)
+/* Waiting time for requests to be processed.
+ * Currently there are some types of request which are not deterministic.
+ * But the maximum time limit managed by the stomper code is set to 60sec.
+ * Hence we have to wait at least that time period.
+ */
+#define CEX4_CLEANUP_TIME	(61*HZ)
 
 static struct ap_device_id zcrypt_cex4_ids[] = {
 	{ AP_DEVICE(AP_DEVICE_TYPE_CEX4)  },
@@ -101,6 +106,19 @@ static int zcrypt_cex4_probe(struct ap_device *ap_dev)
 			zdev->speed_rating = CEX4C_SPEED_RATING;
 			zdev->ops = zcrypt_msgtype_request(MSGTYPE06_NAME,
 							   MSGTYPE06_VARIANT_DEFAULT);
+		} else if (ap_test_bit(&ap_dev->functions, AP_FUNC_EP11)) {
+			zdev = zcrypt_device_alloc(CEX4C_MAX_MESSAGE_SIZE);
+			if (!zdev)
+				return -ENOMEM;
+			zdev->type_string = "CEX4P";
+			zdev->user_space_type = ZCRYPT_CEX4;
+			zdev->min_mod_size = CEX4C_MIN_MOD_SIZE;
+			zdev->max_mod_size = CEX4C_MAX_MOD_SIZE;
+			zdev->max_exp_bit_length = CEX4C_MAX_MOD_SIZE;
+			zdev->short_crt = 0;
+			zdev->speed_rating = CEX4C_SPEED_RATING;
+			zdev->ops = zcrypt_msgtype_request(MSGTYPE06_NAME,
+							MSGTYPE06_VARIANT_EP11);
 		}
 		break;
 	}
diff --git a/drivers/s390/crypto/zcrypt_error.h b/drivers/s390/crypto/zcrypt_error.h
index 0079b6617211..7b23f43c7b08 100644
--- a/drivers/s390/crypto/zcrypt_error.h
+++ b/drivers/s390/crypto/zcrypt_error.h
@@ -106,15 +106,15 @@ static inline int convert_error(struct zcrypt_device *zdev,
 	//   REP88_ERROR_MESSAGE_TYPE		// '20' CEX2A
 		/*
 		 * To sent a message of the wrong type is a bug in the
-		 * device driver. Warn about it, disable the device
+		 * device driver. Send error msg, disable the device
 		 * and then repeat the request.
 		 */
-		WARN_ON(1);
 		atomic_set(&zcrypt_rescan_req, 1);
 		zdev->online = 0;
+		pr_err("Cryptographic device %x failed and was set offline\n",
+		       zdev->ap_dev->qid);
 		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-			       zdev->ap_dev->qid,
-			       zdev->online, ehdr->reply_code);
+			zdev->ap_dev->qid, zdev->online, ehdr->reply_code);
 		return -EAGAIN;
 	case REP82_ERROR_TRANSPORT_FAIL:
 	case REP82_ERROR_MACHINE_FAILURE:
@@ -122,15 +122,17 @@ static inline int convert_error(struct zcrypt_device *zdev,
 		/* If a card fails disable it and repeat the request. */
 		atomic_set(&zcrypt_rescan_req, 1);
 		zdev->online = 0;
+		pr_err("Cryptographic device %x failed and was set offline\n",
+		       zdev->ap_dev->qid);
 		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-			       zdev->ap_dev->qid,
-			       zdev->online, ehdr->reply_code);
+			zdev->ap_dev->qid, zdev->online, ehdr->reply_code);
 		return -EAGAIN;
 	default:
 		zdev->online = 0;
+		pr_err("Cryptographic device %x failed and was set offline\n",
+		       zdev->ap_dev->qid);
 		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-			       zdev->ap_dev->qid,
-			       zdev->online, ehdr->reply_code);
+			zdev->ap_dev->qid, zdev->online, ehdr->reply_code);
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
 }
diff --git a/drivers/s390/crypto/zcrypt_msgtype50.c b/drivers/s390/crypto/zcrypt_msgtype50.c
index 7c522f338bda..334e282f255b 100644
--- a/drivers/s390/crypto/zcrypt_msgtype50.c
+++ b/drivers/s390/crypto/zcrypt_msgtype50.c
@@ -25,6 +25,9 @@
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
+#define KMSG_COMPONENT "zcrypt"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/init.h>
@@ -332,6 +335,11 @@ static int convert_type80(struct zcrypt_device *zdev,
 	if (t80h->len < sizeof(*t80h) + outputdatalength) {
 		/* The result is too short, the CEX2A card may not do that.. */
 		zdev->online = 0;
+		pr_err("Cryptographic device %x failed and was set offline\n",
+		       zdev->ap_dev->qid);
+		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
+			       zdev->ap_dev->qid, zdev->online, t80h->code);
+
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
 	if (zdev->user_space_type == ZCRYPT_CEX2A)
@@ -359,6 +367,10 @@ static int convert_response(struct zcrypt_device *zdev,
 				      outputdata, outputdatalength);
 	default: /* Unknown response type, this should NEVER EVER happen */
 		zdev->online = 0;
+		pr_err("Cryptographic device %x failed and was set offline\n",
+		       zdev->ap_dev->qid);
+		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
+			       zdev->ap_dev->qid, zdev->online);
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
 }
diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c
index 7d97fa5a26d0..57bfda1bd71a 100644
--- a/drivers/s390/crypto/zcrypt_msgtype6.c
+++ b/drivers/s390/crypto/zcrypt_msgtype6.c
@@ -25,6 +25,9 @@
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
+#define KMSG_COMPONENT "zcrypt"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/err.h>
@@ -50,6 +53,7 @@ struct response_type {
 };
 #define PCIXCC_RESPONSE_TYPE_ICA  0
 #define PCIXCC_RESPONSE_TYPE_XCRB 1
+#define PCIXCC_RESPONSE_TYPE_EP11 2
 
 MODULE_AUTHOR("IBM Corporation");
 MODULE_DESCRIPTION("Cryptographic Coprocessor (message type 6), " \
@@ -358,6 +362,91 @@ static int XCRB_msg_to_type6CPRB_msgX(struct zcrypt_device *zdev,
 	return 0;
 }
 
+static int xcrb_msg_to_type6_ep11cprb_msgx(struct zcrypt_device *zdev,
+				       struct ap_message *ap_msg,
+				       struct ep11_urb *xcRB)
+{
+	unsigned int lfmt;
+
+	static struct type6_hdr static_type6_ep11_hdr = {
+		.type		=  0x06,
+		.rqid		= {0x00, 0x01},
+		.function_code	= {0x00, 0x00},
+		.agent_id[0]	=  0x58,	/* {'X'} */
+		.agent_id[1]	=  0x43,	/* {'C'} */
+		.offset1	=  0x00000058,
+	};
+
+	struct {
+		struct type6_hdr hdr;
+		struct ep11_cprb cprbx;
+		unsigned char	pld_tag;	/* fixed value 0x30 */
+		unsigned char	pld_lenfmt;	/* payload length format */
+	} __packed * msg = ap_msg->message;
+
+	struct pld_hdr {
+		unsigned char	func_tag;	/* fixed value 0x4 */
+		unsigned char	func_len;	/* fixed value 0x4 */
+		unsigned int	func_val;	/* function ID	   */
+		unsigned char	dom_tag;	/* fixed value 0x4 */
+		unsigned char	dom_len;	/* fixed value 0x4 */
+		unsigned int	dom_val;	/* domain id	   */
+	} __packed * payload_hdr;
+
+	/* length checks */
+	ap_msg->length = sizeof(struct type6_hdr) + xcRB->req_len;
+	if (CEIL4(xcRB->req_len) > MSGTYPE06_MAX_MSG_SIZE -
+				   (sizeof(struct type6_hdr)))
+		return -EINVAL;
+
+	if (CEIL4(xcRB->resp_len) > MSGTYPE06_MAX_MSG_SIZE -
+				    (sizeof(struct type86_fmt2_msg)))
+		return -EINVAL;
+
+	/* prepare type6 header */
+	msg->hdr = static_type6_ep11_hdr;
+	msg->hdr.ToCardLen1   = xcRB->req_len;
+	msg->hdr.FromCardLen1 = xcRB->resp_len;
+
+	/* Import CPRB data from the ioctl input parameter */
+	if (copy_from_user(&(msg->cprbx.cprb_len),
+			   (char *)xcRB->req, xcRB->req_len)) {
+		return -EFAULT;
+	}
+
+	/*
+	 The target domain field within the cprb body/payload block will be
+	 replaced by the usage domain for non-management commands only.
+	 Therefore we check the first bit of the 'flags' parameter for
+	 management command indication.
+	   0 - non managment command
+	   1 - management command
+	*/
+	if (!((msg->cprbx.flags & 0x80) == 0x80)) {
+		msg->cprbx.target_id = (unsigned int)
+					AP_QID_QUEUE(zdev->ap_dev->qid);
+
+		if ((msg->pld_lenfmt & 0x80) == 0x80) { /*ext.len.fmt 2 or 3*/
+			switch (msg->pld_lenfmt & 0x03) {
+			case 1:
+				lfmt = 2;
+				break;
+			case 2:
+				lfmt = 3;
+				break;
+			default:
+				return -EINVAL;
+			}
+		} else {
+			lfmt = 1; /* length format #1 */
+		  }
+		payload_hdr = (struct pld_hdr *)((&(msg->pld_lenfmt))+lfmt);
+		payload_hdr->dom_val = (unsigned int)
+					AP_QID_QUEUE(zdev->ap_dev->qid);
+	}
+	return 0;
+}
+
 /**
  * Copy results from a type 86 ICA reply message back to user space.
  *
@@ -377,6 +466,12 @@ struct type86x_reply {
 	char text[0];
 } __packed;
 
+struct type86_ep11_reply {
+	struct type86_hdr hdr;
+	struct type86_fmt2_ext fmt2;
+	struct ep11_cprb cprbx;
+} __packed;
+
 static int convert_type86_ica(struct zcrypt_device *zdev,
 			  struct ap_message *reply,
 			  char __user *outputdata,
@@ -440,6 +535,11 @@ static int convert_type86_ica(struct zcrypt_device *zdev,
 		if (service_rc == 8 && service_rs == 72)
 			return -EINVAL;
 		zdev->online = 0;
+		pr_err("Cryptographic device %x failed and was set offline\n",
+		       zdev->ap_dev->qid);
+		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
+			       zdev->ap_dev->qid, zdev->online,
+			       msg->hdr.reply_code);
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
 	data = msg->text;
@@ -503,6 +603,33 @@ static int convert_type86_xcrb(struct zcrypt_device *zdev,
 	return 0;
 }
 
+/**
+ * Copy results from a type 86 EP11 XCRB reply message back to user space.
+ *
+ * @zdev: crypto device pointer
+ * @reply: reply AP message.
+ * @xcRB: pointer to EP11 user request block
+ *
+ * Returns 0 on success or -EINVAL, -EFAULT, -EAGAIN in case of an error.
+ */
+static int convert_type86_ep11_xcrb(struct zcrypt_device *zdev,
+				    struct ap_message *reply,
+				    struct ep11_urb *xcRB)
+{
+	struct type86_fmt2_msg *msg = reply->message;
+	char *data = reply->message;
+
+	if (xcRB->resp_len < msg->fmt2.count1)
+		return -EINVAL;
+
+	/* Copy response CPRB to user */
+	if (copy_to_user((char *)xcRB->resp,
+			 data + msg->fmt2.offset1, msg->fmt2.count1))
+		return -EFAULT;
+	xcRB->resp_len = msg->fmt2.count1;
+	return 0;
+}
+
 static int convert_type86_rng(struct zcrypt_device *zdev,
 			  struct ap_message *reply,
 			  char *buffer)
@@ -551,6 +678,10 @@ static int convert_response_ica(struct zcrypt_device *zdev,
 		 * response */
 	default: /* Unknown response type, this should NEVER EVER happen */
 		zdev->online = 0;
+		pr_err("Cryptographic device %x failed and was set offline\n",
+		       zdev->ap_dev->qid);
+		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
+			       zdev->ap_dev->qid, zdev->online);
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
 }
@@ -579,10 +710,40 @@ static int convert_response_xcrb(struct zcrypt_device *zdev,
 	default: /* Unknown response type, this should NEVER EVER happen */
 		xcRB->status = 0x0008044DL; /* HDD_InvalidParm */
 		zdev->online = 0;
+		pr_err("Cryptographic device %x failed and was set offline\n",
+		       zdev->ap_dev->qid);
+		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
+			       zdev->ap_dev->qid, zdev->online);
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
 }
 
+static int convert_response_ep11_xcrb(struct zcrypt_device *zdev,
+	struct ap_message *reply, struct ep11_urb *xcRB)
+{
+	struct type86_ep11_reply *msg = reply->message;
+
+	/* Response type byte is the second byte in the response. */
+	switch (((unsigned char *)reply->message)[1]) {
+	case TYPE82_RSP_CODE:
+	case TYPE87_RSP_CODE:
+		return convert_error(zdev, reply);
+	case TYPE86_RSP_CODE:
+		if (msg->hdr.reply_code)
+			return convert_error(zdev, reply);
+		if (msg->cprbx.cprb_ver_id == 0x04)
+			return convert_type86_ep11_xcrb(zdev, reply, xcRB);
+	/* Fall through, no break, incorrect cprb version is an unknown resp.*/
+	default: /* Unknown response type, this should NEVER EVER happen */
+		zdev->online = 0;
+		pr_err("Cryptographic device %x failed and was set offline\n",
+		       zdev->ap_dev->qid);
+		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
+			       zdev->ap_dev->qid, zdev->online);
+		return -EAGAIN; /* repeat the request on a different device. */
+	}
+}
+
 static int convert_response_rng(struct zcrypt_device *zdev,
 				 struct ap_message *reply,
 				 char *data)
@@ -602,6 +763,10 @@ static int convert_response_rng(struct zcrypt_device *zdev,
 		 * response */
 	default: /* Unknown response type, this should NEVER EVER happen */
 		zdev->online = 0;
+		pr_err("Cryptographic device %x failed and was set offline\n",
+		       zdev->ap_dev->qid);
+		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
+			       zdev->ap_dev->qid, zdev->online);
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
 }
@@ -657,6 +822,51 @@ out:
 	complete(&(resp_type->work));
 }
 
+/**
+ * This function is called from the AP bus code after a crypto request
+ * "msg" has finished with the reply message "reply".
+ * It is called from tasklet context.
+ * @ap_dev: pointer to the AP device
+ * @msg: pointer to the AP message
+ * @reply: pointer to the AP reply message
+ */
+static void zcrypt_msgtype6_receive_ep11(struct ap_device *ap_dev,
+					 struct ap_message *msg,
+					 struct ap_message *reply)
+{
+	static struct error_hdr error_reply = {
+		.type = TYPE82_RSP_CODE,
+		.reply_code = REP82_ERROR_MACHINE_FAILURE,
+	};
+	struct response_type *resp_type =
+		(struct response_type *)msg->private;
+	struct type86_ep11_reply *t86r;
+	int length;
+
+	/* Copy the reply message to the request message buffer. */
+	if (IS_ERR(reply)) {
+		memcpy(msg->message, &error_reply, sizeof(error_reply));
+		goto out;
+	}
+	t86r = reply->message;
+	if (t86r->hdr.type == TYPE86_RSP_CODE &&
+	    t86r->cprbx.cprb_ver_id == 0x04) {
+		switch (resp_type->type) {
+		case PCIXCC_RESPONSE_TYPE_EP11:
+			length = t86r->fmt2.offset1 + t86r->fmt2.count1;
+			length = min(MSGTYPE06_MAX_MSG_SIZE, length);
+			memcpy(msg->message, reply->message, length);
+			break;
+		default:
+			memcpy(msg->message, &error_reply, sizeof(error_reply));
+		}
+	} else {
+		memcpy(msg->message, reply->message, sizeof(error_reply));
+	  }
+out:
+	complete(&(resp_type->work));
+}
+
 static atomic_t zcrypt_step = ATOMIC_INIT(0);
 
 /**
@@ -781,6 +991,46 @@ out_free:
 	return rc;
 }
 
+/**
+ * The request distributor calls this function if it picked the CEX4P
+ * device to handle a send_ep11_cprb request.
+ * @zdev: pointer to zcrypt_device structure that identifies the
+ *	  CEX4P device to the request distributor
+ * @xcRB: pointer to the ep11 user request block
+ */
+static long zcrypt_msgtype6_send_ep11_cprb(struct zcrypt_device *zdev,
+						struct ep11_urb *xcrb)
+{
+	struct ap_message ap_msg;
+	struct response_type resp_type = {
+		.type = PCIXCC_RESPONSE_TYPE_EP11,
+	};
+	int rc;
+
+	ap_init_message(&ap_msg);
+	ap_msg.message = kmalloc(MSGTYPE06_MAX_MSG_SIZE, GFP_KERNEL);
+	if (!ap_msg.message)
+		return -ENOMEM;
+	ap_msg.receive = zcrypt_msgtype6_receive_ep11;
+	ap_msg.psmid = (((unsigned long long) current->pid) << 32) +
+				atomic_inc_return(&zcrypt_step);
+	ap_msg.private = &resp_type;
+	rc = xcrb_msg_to_type6_ep11cprb_msgx(zdev, &ap_msg, xcrb);
+	if (rc)
+		goto out_free;
+	init_completion(&resp_type.work);
+	ap_queue_message(zdev->ap_dev, &ap_msg);
+	rc = wait_for_completion_interruptible(&resp_type.work);
+	if (rc == 0)
+		rc = convert_response_ep11_xcrb(zdev, &ap_msg, xcrb);
+	else /* Signal pending. */
+		ap_cancel_message(zdev->ap_dev, &ap_msg);
+
+out_free:
+	kzfree(ap_msg.message);
+	return rc;
+}
+
 /**
  * The request distributor calls this function if it picked the PCIXCC/CEX2C
  * device to generate random data.
@@ -839,10 +1089,19 @@ static struct zcrypt_ops zcrypt_msgtype6_ops = {
 	.rng = zcrypt_msgtype6_rng,
 };
 
+static struct zcrypt_ops zcrypt_msgtype6_ep11_ops = {
+	.owner = THIS_MODULE,
+	.variant = MSGTYPE06_VARIANT_EP11,
+	.rsa_modexpo = NULL,
+	.rsa_modexpo_crt = NULL,
+	.send_ep11_cprb = zcrypt_msgtype6_send_ep11_cprb,
+};
+
 int __init zcrypt_msgtype6_init(void)
 {
 	zcrypt_msgtype_register(&zcrypt_msgtype6_norng_ops);
 	zcrypt_msgtype_register(&zcrypt_msgtype6_ops);
+	zcrypt_msgtype_register(&zcrypt_msgtype6_ep11_ops);
 	return 0;
 }
 
@@ -850,6 +1109,7 @@ void __exit zcrypt_msgtype6_exit(void)
 {
 	zcrypt_msgtype_unregister(&zcrypt_msgtype6_norng_ops);
 	zcrypt_msgtype_unregister(&zcrypt_msgtype6_ops);
+	zcrypt_msgtype_unregister(&zcrypt_msgtype6_ep11_ops);
 }
 
 module_init(zcrypt_msgtype6_init);
diff --git a/drivers/s390/crypto/zcrypt_msgtype6.h b/drivers/s390/crypto/zcrypt_msgtype6.h
index 1e500d3c0735..207247570623 100644
--- a/drivers/s390/crypto/zcrypt_msgtype6.h
+++ b/drivers/s390/crypto/zcrypt_msgtype6.h
@@ -32,6 +32,7 @@
 #define MSGTYPE06_NAME			"zcrypt_msgtype6"
 #define MSGTYPE06_VARIANT_DEFAULT	0
 #define MSGTYPE06_VARIANT_NORNG		1
+#define MSGTYPE06_VARIANT_EP11		2
 
 #define MSGTYPE06_MAX_MSG_SIZE		(12*1024)
 
@@ -99,6 +100,7 @@ struct type86_hdr {
 } __packed;
 
 #define TYPE86_RSP_CODE 0x86
+#define TYPE87_RSP_CODE 0x87
 #define TYPE86_FMT2	0x02
 
 struct type86_fmt2_ext {
diff --git a/drivers/s390/crypto/zcrypt_pcica.c b/drivers/s390/crypto/zcrypt_pcica.c
index f2b71d8df01f..7a743f4c646c 100644
--- a/drivers/s390/crypto/zcrypt_pcica.c
+++ b/drivers/s390/crypto/zcrypt_pcica.c
@@ -24,6 +24,9 @@
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
+#define KMSG_COMPONENT "zcrypt"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/init.h>
@@ -199,6 +202,10 @@ static int convert_type84(struct zcrypt_device *zdev,
 	if (t84h->len < sizeof(*t84h) + outputdatalength) {
 		/* The result is too short, the PCICA card may not do that.. */
 		zdev->online = 0;
+		pr_err("Cryptographic device %x failed and was set offline\n",
+		       zdev->ap_dev->qid);
+		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
+			       zdev->ap_dev->qid, zdev->online, t84h->code);
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
 	BUG_ON(t84h->len > PCICA_MAX_RESPONSE_SIZE);
@@ -223,6 +230,10 @@ static int convert_response(struct zcrypt_device *zdev,
 				      outputdata, outputdatalength);
 	default: /* Unknown response type, this should NEVER EVER happen */
 		zdev->online = 0;
+		pr_err("Cryptographic device %x failed and was set offline\n",
+		       zdev->ap_dev->qid);
+		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
+			       zdev->ap_dev->qid, zdev->online);
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
 }
diff --git a/drivers/s390/crypto/zcrypt_pcicc.c b/drivers/s390/crypto/zcrypt_pcicc.c
index 0d90a4334055..4d14c04b746e 100644
--- a/drivers/s390/crypto/zcrypt_pcicc.c
+++ b/drivers/s390/crypto/zcrypt_pcicc.c
@@ -24,6 +24,9 @@
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
+#define KMSG_COMPONENT "zcrypt"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/gfp.h>
@@ -372,6 +375,11 @@ static int convert_type86(struct zcrypt_device *zdev,
 		if (service_rc == 8 && service_rs == 72)
 			return -EINVAL;
 		zdev->online = 0;
+		pr_err("Cryptographic device %x failed and was set offline\n",
+		       zdev->ap_dev->qid);
+		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
+			       zdev->ap_dev->qid, zdev->online,
+			       msg->hdr.reply_code);
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
 	data = msg->text;
@@ -425,6 +433,10 @@ static int convert_response(struct zcrypt_device *zdev,
 		/* no break, incorrect cprb version is an unknown response */
 	default: /* Unknown response type, this should NEVER EVER happen */
 		zdev->online = 0;
+		pr_err("Cryptographic device %x failed and was set offline\n",
+		       zdev->ap_dev->qid);
+		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
+			       zdev->ap_dev->qid, zdev->online);
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
 }
-- 
cgit v1.2.3


From bbca4d39175a96b882def1628e8b532998750dfa Mon Sep 17 00:00:00 2001
From: Gerhard Sittig <gsi@denx.de>
Date: Tue, 10 Dec 2013 10:51:08 +0100
Subject: powerpc/512x: dts: remove misplaced IRQ spec from 'soc' node (5125)

the 'soc' node in the MPC5125 "tower" board .dts has an '#interrupt-cells'
property although this node is not an interrupt controller

remove this erroneously placed property because starting with v3.13-rc1
lookup and resolution of 'interrupts' specs for peripherals gets misled
(tries to use the 'soc' as the interrupt parent which fails), emits
'no irq domain found' WARN() messages and breaks the boot process

[ best viewed with 'git diff -U5' to have DT node names in the context ]

Cc: Anatolij Gustschin <agust@denx.de>
Cc: linuxppc-dev@lists.ozlabs.org
Cc: devicetree@vger.kernel.org
Signed-off-by: Gerhard Sittig <gsi@denx.de>
Signed-off-by: Anatolij Gustschin <agust@denx.de>
---
 arch/powerpc/boot/dts/mpc5125twr.dts | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/boot/dts/mpc5125twr.dts b/arch/powerpc/boot/dts/mpc5125twr.dts
index 4177b62240c2..0a0fe92216ae 100644
--- a/arch/powerpc/boot/dts/mpc5125twr.dts
+++ b/arch/powerpc/boot/dts/mpc5125twr.dts
@@ -58,7 +58,6 @@
 		compatible = "fsl,mpc5121-immr";
 		#address-cells = <1>;
 		#size-cells = <1>;
-		#interrupt-cells = <2>;
 		ranges = <0x0 0x80000000 0x400000>;
 		reg = <0x80000000 0x400000>;
 		bus-frequency = <66000000>;	// 66 MHz ips bus
-- 
cgit v1.2.3


From 7306006f103567fcf595d23dd741da0b8642f4c5 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Wed, 18 Dec 2013 15:52:13 -0800
Subject: x86, realmode: Pointer walk cleanups, pull out invariant use of
 __pa()

The pointer arithmetic in this function was really bizarre, where in
fact all we really wanted was a simple pointer array walk.  Use the
much more idiomatic construction for that (*ptr++).

Factor an invariant use of __pa() out of the relocation loop.  At
least on 64 bits it seems gcc isn't capable of doing that
automatically.

Change the scope of a couple of variables to make it extra obvious
that they are extremely local temp variables.

Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Link: http://lkml.kernel.org/n/tip-rd908t9c8kvcojdabtmm94mb@git.kernel.org
---
 arch/x86/realmode/init.c | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index a44f457e70a1..bad628a620c4 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -29,12 +29,10 @@ void __init reserve_real_mode(void)
 void __init setup_real_mode(void)
 {
 	u16 real_mode_seg;
-	u32 *rel;
+	const u32 *rel;
 	u32 count;
-	u32 *ptr;
-	u16 *seg;
-	int i;
 	unsigned char *base;
+	unsigned long phys_base;
 	struct trampoline_header *trampoline_header;
 	size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob);
 #ifdef CONFIG_X86_64
@@ -46,23 +44,23 @@ void __init setup_real_mode(void)
 
 	memcpy(base, real_mode_blob, size);
 
-	real_mode_seg = __pa(base) >> 4;
+	phys_base = __pa(base);
+	real_mode_seg = phys_base >> 4;
+
 	rel = (u32 *) real_mode_relocs;
 
 	/* 16-bit segment relocations. */
-	count = rel[0];
-	rel = &rel[1];
-	for (i = 0; i < count; i++) {
-		seg = (u16 *) (base + rel[i]);
+	count = *rel++;
+	while (count--) {
+		u16 *seg = (u16 *) (base + *rel++);
 		*seg = real_mode_seg;
 	}
 
 	/* 32-bit linear relocations. */
-	count = rel[i];
-	rel =  &rel[i + 1];
-	for (i = 0; i < count; i++) {
-		ptr = (u32 *) (base + rel[i]);
-		*ptr += __pa(base);
+	count = *rel++;
+	while (count--) {
+		u32 *ptr = (u32 *) (base + *rel++);
+		*ptr += phys_base;
 	}
 
 	/* Must be perfomed *after* relocation. */
-- 
cgit v1.2.3


From 40979d327c99b20a938b2672d7e506f8fc41ddcc Mon Sep 17 00:00:00 2001
From: Chen Gang <gang.chen.5i5j@gmail.com>
Date: Thu, 19 Dec 2013 16:03:40 +0800
Subject: metag: topology: export 'cpu_core_map'

We need to export 'cpu_core_map' since the topology_core_cpumask macro
refers to it and is used by certain kernel modules.

Found in allmodconfig build:
  ERROR: "cpu_core_map" [drivers/staging/lustre/lustre/libcfs/libcfs.ko] undefined!

Signed-off-by: Chen Gang <gang.chen.5i5j@gmail.com>
Signed-off-by: James Hogan <james.hogan@imgtec.com>
---
 arch/metag/kernel/topology.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/metag/kernel/topology.c b/arch/metag/kernel/topology.c
index bec3dec4922e..4ba595701f7d 100644
--- a/arch/metag/kernel/topology.c
+++ b/arch/metag/kernel/topology.c
@@ -19,6 +19,7 @@
 DEFINE_PER_CPU(struct cpuinfo_metag, cpu_data);
 
 cpumask_t cpu_core_map[NR_CPUS];
+EXPORT_SYMBOL(cpu_core_map);
 
 static cpumask_t cpu_coregroup_map(unsigned int cpu)
 {
-- 
cgit v1.2.3


From f3d815cb854b2f6262ade56a4d91a1ed3f1e50c4 Mon Sep 17 00:00:00 2001
From: Lans Zhang <jia.zhang@windriver.com>
Date: Fri, 6 Dec 2013 12:18:30 +0800
Subject: x86/mm/numa: Fix 32-bit kernel NUMA boot

When booting a 32-bit x86 kernel on a NUMA machine, node data
cannot be allocated from local node if the account of memory for
node 0 covers the low memory space entirely:

  [    0.000000] Initmem setup node 0 [mem 0x00000000-0x83fffffff]
  [    0.000000]   NODE_DATA [mem 0x367ed000-0x367edfff]
  [    0.000000] Initmem setup node 1 [mem 0x840000000-0xfffffffff]
  [    0.000000] Cannot find 4096 bytes in node 1
  [    0.000000] 64664MB HIGHMEM available.
  [    0.000000] 871MB LOWMEM available.

To fix this issue, node data is allowed to be allocated from
other nodes if the memory of local node is still not mapped. The
expected result looks like this:

  [    0.000000] Initmem setup node 0 [mem 0x00000000-0x83fffffff]
  [    0.000000]   NODE_DATA [mem 0x367ed000-0x367edfff]
  [    0.000000] Initmem setup node 1 [mem 0x840000000-0xfffffffff]
  [    0.000000]   NODE_DATA [mem 0x367ec000-0x367ecfff]
  [    0.000000]     NODE_DATA(1) on node 0
  [    0.000000] 64664MB HIGHMEM available.
  [    0.000000] 871MB LOWMEM available.

Signed-off-by: Lans Zhang <jia.zhang@windriver.com>
Cc: <andi@firstfloor.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Link: http://lkml.kernel.org/r/1386303510-18574-1-git-send-email-jia.zhang@windriver.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/numa.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 24aec58d6afd..c85da7bb6b60 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -211,9 +211,13 @@ static void __init setup_node_data(int nid, u64 start, u64 end)
 	 */
 	nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid);
 	if (!nd_pa) {
-		pr_err("Cannot find %zu bytes in node %d\n",
-		       nd_size, nid);
-		return;
+		nd_pa = __memblock_alloc_base(nd_size, SMP_CACHE_BYTES,
+					      MEMBLOCK_ALLOC_ACCESSIBLE);
+		if (!nd_pa) {
+			pr_err("Cannot find %zu bytes in node %d\n",
+			       nd_size, nid);
+			return;
+		}
 	}
 	nd = __va(nd_pa);
 
-- 
cgit v1.2.3


From 16824255394f55adf31b9a96a9965d8c15bdac4c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 12 Dec 2013 15:08:36 +0100
Subject: x86, acpi, idle: Restructure the mwait idle routines

People seem to delight in writing wrong and broken mwait idle routines;
collapse the lot.

This leaves mwait_play_dead() the sole remaining user of __mwait() and
new __mwait() users are probably doing it wrong.

Also remove __sti_mwait() as its unused.

Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Jacob Jun Pan <jacob.jun.pan@linux.intel.com>
Cc: Mike Galbraith <bitbucket@online.de>
Cc: Len Brown <lenb@kernel.org>
Cc: Rui Zhang <rui.zhang@intel.com>
Acked-by: Rafael Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20131212141654.616820819@infradead.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/include/asm/mwait.h       | 40 ++++++++++++++++++++++++++++++++++++++
 arch/x86/include/asm/processor.h   | 23 ----------------------
 arch/x86/kernel/acpi/cstate.c      | 23 ----------------------
 drivers/acpi/acpi_pad.c            |  5 +----
 drivers/acpi/processor_idle.c      | 15 --------------
 drivers/idle/intel_idle.c          | 11 +----------
 drivers/thermal/intel_powerclamp.c |  4 +---
 7 files changed, 43 insertions(+), 78 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index 2f366d0ac6b4..361b02ef128c 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -1,6 +1,8 @@
 #ifndef _ASM_X86_MWAIT_H
 #define _ASM_X86_MWAIT_H
 
+#include <linux/sched.h>
+
 #define MWAIT_SUBSTATE_MASK		0xf
 #define MWAIT_CSTATE_MASK		0xf
 #define MWAIT_SUBSTATE_SIZE		4
@@ -13,4 +15,42 @@
 
 #define MWAIT_ECX_INTERRUPT_BREAK	0x1
 
+static inline void __monitor(const void *eax, unsigned long ecx,
+			     unsigned long edx)
+{
+	/* "monitor %eax, %ecx, %edx;" */
+	asm volatile(".byte 0x0f, 0x01, 0xc8;"
+		     :: "a" (eax), "c" (ecx), "d"(edx));
+}
+
+static inline void __mwait(unsigned long eax, unsigned long ecx)
+{
+	/* "mwait %eax, %ecx;" */
+	asm volatile(".byte 0x0f, 0x01, 0xc9;"
+		     :: "a" (eax), "c" (ecx));
+}
+
+/*
+ * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
+ * which can obviate IPI to trigger checking of need_resched.
+ * We execute MONITOR against need_resched and enter optimized wait state
+ * through MWAIT. Whenever someone changes need_resched, we would be woken
+ * up from MWAIT (without an IPI).
+ *
+ * New with Core Duo processors, MWAIT can take some hints based on CPU
+ * capability.
+ */
+static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
+{
+	if (!current_set_polling_and_test()) {
+		if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
+			clflush((void *)&current_thread_info()->flags);
+
+		__monitor((void *)&current_thread_info()->flags, 0, 0);
+		if (!need_resched())
+			__mwait(eax, ecx);
+	}
+	__current_clr_polling();
+}
+
 #endif /* _ASM_X86_MWAIT_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 7b034a4057f9..24821f5768bc 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -700,29 +700,6 @@ static inline void sync_core(void)
 #endif
 }
 
-static inline void __monitor(const void *eax, unsigned long ecx,
-			     unsigned long edx)
-{
-	/* "monitor %eax, %ecx, %edx;" */
-	asm volatile(".byte 0x0f, 0x01, 0xc8;"
-		     :: "a" (eax), "c" (ecx), "d"(edx));
-}
-
-static inline void __mwait(unsigned long eax, unsigned long ecx)
-{
-	/* "mwait %eax, %ecx;" */
-	asm volatile(".byte 0x0f, 0x01, 0xc9;"
-		     :: "a" (eax), "c" (ecx));
-}
-
-static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
-{
-	trace_hardirqs_on();
-	/* "mwait %eax, %ecx;" */
-	asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
-		     :: "a" (eax), "c" (ecx));
-}
-
 extern void select_idle_routine(const struct cpuinfo_x86 *c);
 extern void init_amd_e400_c1e_mask(void);
 
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index d2b7f27781bc..e69182fd01cf 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -150,29 +150,6 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu,
 }
 EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe);
 
-/*
- * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
- * which can obviate IPI to trigger checking of need_resched.
- * We execute MONITOR against need_resched and enter optimized wait state
- * through MWAIT. Whenever someone changes need_resched, we would be woken
- * up from MWAIT (without an IPI).
- *
- * New with Core Duo processors, MWAIT can take some hints based on CPU
- * capability.
- */
-void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
-{
-	if (!need_resched()) {
-		if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
-			clflush((void *)&current_thread_info()->flags);
-
-		__monitor((void *)&current_thread_info()->flags, 0, 0);
-		smp_mb();
-		if (!need_resched())
-			__mwait(ax, cx);
-	}
-}
-
 void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx)
 {
 	unsigned int cpu = smp_processor_id();
diff --git a/drivers/acpi/acpi_pad.c b/drivers/acpi/acpi_pad.c
index fc6008fbce35..509452a62f96 100644
--- a/drivers/acpi/acpi_pad.c
+++ b/drivers/acpi/acpi_pad.c
@@ -193,10 +193,7 @@ static int power_saving_thread(void *data)
 					CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
 			stop_critical_timings();
 
-			__monitor((void *)&current_thread_info()->flags, 0, 0);
-			smp_mb();
-			if (!need_resched())
-				__mwait(power_saving_mwait_eax, 1);
+			mwait_idle_with_hints(power_saving_mwait_eax, 1);
 
 			start_critical_timings();
 			if (lapic_marked_unstable)
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 644516d9bde6..f90c56c8379e 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -727,11 +727,6 @@ static int acpi_idle_enter_c1(struct cpuidle_device *dev,
 	if (unlikely(!pr))
 		return -EINVAL;
 
-	if (cx->entry_method == ACPI_CSTATE_FFH) {
-		if (current_set_polling_and_test())
-			return -EINVAL;
-	}
-
 	lapic_timer_state_broadcast(pr, cx, 1);
 	acpi_idle_do_entry(cx);
 
@@ -785,11 +780,6 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev,
 	if (unlikely(!pr))
 		return -EINVAL;
 
-	if (cx->entry_method == ACPI_CSTATE_FFH) {
-		if (current_set_polling_and_test())
-			return -EINVAL;
-	}
-
 	/*
 	 * Must be done before busmaster disable as we might need to
 	 * access HPET !
@@ -841,11 +831,6 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev,
 		}
 	}
 
-	if (cx->entry_method == ACPI_CSTATE_FFH) {
-		if (current_set_polling_and_test())
-			return -EINVAL;
-	}
-
 	acpi_unlazy_tlb(smp_processor_id());
 
 	/* Tell the scheduler that we are going deep-idle: */
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index f80b700f821c..efec4055fd5e 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -375,16 +375,7 @@ static int intel_idle(struct cpuidle_device *dev,
 	if (!(lapic_timer_reliable_states & (1 << (cstate))))
 		clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
 
-	if (!current_set_polling_and_test()) {
-
-		if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
-			clflush((void *)&current_thread_info()->flags);
-
-		__monitor((void *)&current_thread_info()->flags, 0, 0);
-		smp_mb();
-		if (!need_resched())
-			__mwait(eax, ecx);
-	}
+	mwait_idle_with_hints(eax, ecx);
 
 	if (!(lapic_timer_reliable_states & (1 << (cstate))))
 		clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
diff --git a/drivers/thermal/intel_powerclamp.c b/drivers/thermal/intel_powerclamp.c
index 8f181b3f842b..e8275f2df9af 100644
--- a/drivers/thermal/intel_powerclamp.c
+++ b/drivers/thermal/intel_powerclamp.c
@@ -438,9 +438,7 @@ static int clamp_thread(void *arg)
 			 */
 			local_touch_nmi();
 			stop_critical_timings();
-			__monitor((void *)&current_thread_info()->flags, 0, 0);
-			cpu_relax(); /* allow HT sibling to run */
-			__mwait(eax, ecx);
+			mwait_idle_with_hints(eax, ecx);
 			start_critical_timings();
 			atomic_inc(&idle_wakeup_counter);
 		}
-- 
cgit v1.2.3


From 7e98b71920464b8d15fa95c74366416cd3c88861 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Thu, 19 Dec 2013 11:58:16 -0800
Subject: x86, idle: Use static_cpu_has() for CLFLUSH workaround, add barriers

Use static_cpu_has() to conditionalize the CLFLUSH workaround, and add
memory barriers around it since the documentation is explicit that
CLFLUSH is only ordered with respect to MFENCE.

Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Len Brown <len.brown@intel.com>
Link: http://lkml.kernel.org/r/CA%2B55aFzGxcML7j8CEvQPYzh0W81uVoAAVmGctMOUZ7CZ1yYd2A@mail.gmail.com
---
 arch/x86/include/asm/mwait.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index 361b02ef128c..19b71c439256 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -43,8 +43,11 @@ static inline void __mwait(unsigned long eax, unsigned long ecx)
 static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
 {
 	if (!current_set_polling_and_test()) {
-		if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
+		if (static_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) {
+			mb();
 			clflush((void *)&current_thread_info()->flags);
+			mb();
+		}
 
 		__monitor((void *)&current_thread_info()->flags, 0, 0);
 		if (!need_resched())
-- 
cgit v1.2.3


From 7d590cca7cd2cce4ed7c47d221d6f90566653ba8 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Thu, 19 Dec 2013 12:30:03 -0800
Subject: x86, idle: Add memory barriers around clflush in mwait_play_dead()

For consistency with mwait_idle_with_hints().  Not sure they help, but
they really won't hurt...

Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Len Brown <len.brown@intel.com>
Link: http://lkml.kernel.org/r/CA%2B55aFzGxcML7j8CEvQPYzh0W81uVoAAVmGctMOUZ7CZ1yYd2A@mail.gmail.com
---
 arch/x86/kernel/smpboot.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 85dc05a3aa02..f5252c4eec8c 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1417,7 +1417,9 @@ static inline void mwait_play_dead(void)
 		 * The WBINVD is insufficient due to the spurious-wakeup
 		 * case where we return around the loop.
 		 */
+		mb();
 		clflush(mwait_ptr);
+		mb();
 		__monitor(mwait_ptr, 0, 0);
 		mb();
 		__mwait(eax, 0);
-- 
cgit v1.2.3


From 19952a92037e752f9d3bbbad552d596f9a56e146 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 19 Dec 2013 11:35:58 -0800
Subject: stackprotector: Unify the HAVE_CC_STACKPROTECTOR logic between
 architectures

Instead of duplicating the CC_STACKPROTECTOR Kconfig and
Makefile logic in each architecture, switch to using
HAVE_CC_STACKPROTECTOR and keep everything in one place. This
retains the x86-specific bug verification scripts.

Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Michal Marek <mmarek@suse.cz>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Shawn Guo <shawn.guo@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-mips@linux-mips.org
Cc: linux-arch@vger.kernel.org
Link: http://lkml.kernel.org/r/1387481759-14535-2-git-send-email-keescook@chromium.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 Makefile           | 14 +++++++++++---
 arch/Kconfig       | 22 ++++++++++++++++++++++
 arch/arm/Kconfig   | 13 +------------
 arch/arm/Makefile  |  4 ----
 arch/mips/Kconfig  | 14 +-------------
 arch/mips/Makefile |  4 ----
 arch/sh/Kconfig    | 15 +--------------
 arch/sh/Makefile   |  4 ----
 arch/x86/Kconfig   | 17 +----------------
 arch/x86/Makefile  |  8 +++-----
 10 files changed, 40 insertions(+), 75 deletions(-)

(limited to 'arch')

diff --git a/Makefile b/Makefile
index 858a147fd836..84fb5cd092d2 100644
--- a/Makefile
+++ b/Makefile
@@ -595,10 +595,18 @@ ifneq ($(CONFIG_FRAME_WARN),0)
 KBUILD_CFLAGS += $(call cc-option,-Wframe-larger-than=${CONFIG_FRAME_WARN})
 endif
 
-# Force gcc to behave correct even for buggy distributions
-ifndef CONFIG_CC_STACKPROTECTOR
-KBUILD_CFLAGS += $(call cc-option, -fno-stack-protector)
+# Handle stack protector mode.
+ifdef CONFIG_CC_STACKPROTECTOR
+  stackp-flag := -fstack-protector
+  ifeq ($(call cc-option, $(stackp-flag)),)
+    $(warning Cannot use CONFIG_CC_STACKPROTECTOR: \
+	      -fstack-protector not supported by compiler))
+  endif
+else
+  # Force off for distro compilers that enable stack protector by default.
+  stackp-flag := $(call cc-option, -fno-stack-protector)
 endif
+KBUILD_CFLAGS += $(stackp-flag)
 
 # This warning generated too much noise in a regular build.
 # Use make W=1 to enable this warning (see scripts/Makefile.build)
diff --git a/arch/Kconfig b/arch/Kconfig
index f1cf895c040f..24e026d83072 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -336,6 +336,28 @@ config SECCOMP_FILTER
 
 	  See Documentation/prctl/seccomp_filter.txt for details.
 
+config HAVE_CC_STACKPROTECTOR
+	bool
+	help
+	  An arch should select this symbol if:
+	  - its compiler supports the -fstack-protector option
+	  - it has implemented a stack canary (e.g. __stack_chk_guard)
+
+config CC_STACKPROTECTOR
+	bool "Enable -fstack-protector buffer overflow detection"
+	depends on HAVE_CC_STACKPROTECTOR
+	help
+	  This option turns on the -fstack-protector GCC feature. This
+	  feature puts, at the beginning of functions, a canary value on
+	  the stack just before the return address, and validates
+	  the value just before actually returning.  Stack based buffer
+	  overflows (that need to overwrite this return address) now also
+	  overwrite the canary, which gets detected and the attack is then
+	  neutralized via a kernel panic.
+
+	  This feature requires gcc version 4.2 or above, or a distribution
+	  gcc with the feature backported.
+
 config HAVE_CONTEXT_TRACKING
 	bool
 	help
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index c1f1a7eee953..9c909fc29272 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -30,6 +30,7 @@ config ARM
 	select HAVE_BPF_JIT
 	select HAVE_CONTEXT_TRACKING
 	select HAVE_C_RECORDMCOUNT
+	select HAVE_CC_STACKPROTECTOR
 	select HAVE_DEBUG_KMEMLEAK
 	select HAVE_DMA_API_DEBUG
 	select HAVE_DMA_ATTRS
@@ -1856,18 +1857,6 @@ config SECCOMP
 	  and the task is only allowed to execute a few safe syscalls
 	  defined by each seccomp mode.
 
-config CC_STACKPROTECTOR
-	bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)"
-	help
-	  This option turns on the -fstack-protector GCC feature. This
-	  feature puts, at the beginning of functions, a canary value on
-	  the stack just before the return address, and validates
-	  the value just before actually returning.  Stack based buffer
-	  overflows (that need to overwrite this return address) now also
-	  overwrite the canary, which gets detected and the attack is then
-	  neutralized via a kernel panic.
-	  This feature requires gcc version 4.2 or above.
-
 config SWIOTLB
 	def_bool y
 
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index c99b1086d83d..55b4255ad6ed 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -40,10 +40,6 @@ ifeq ($(CONFIG_FRAME_POINTER),y)
 KBUILD_CFLAGS	+=-fno-omit-frame-pointer -mapcs -mno-sched-prolog
 endif
 
-ifeq ($(CONFIG_CC_STACKPROTECTOR),y)
-KBUILD_CFLAGS	+=-fstack-protector
-endif
-
 ifeq ($(CONFIG_CPU_BIG_ENDIAN),y)
 KBUILD_CPPFLAGS	+= -mbig-endian
 AS		+= -EB
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 650de3976e7a..c93d92beb3d6 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -47,6 +47,7 @@ config MIPS
 	select MODULES_USE_ELF_RELA if MODULES && 64BIT
 	select CLONE_BACKWARDS
 	select HAVE_DEBUG_STACKOVERFLOW
+	select HAVE_CC_STACKPROTECTOR
 
 menu "Machine selection"
 
@@ -2322,19 +2323,6 @@ config SECCOMP
 
 	  If unsure, say Y. Only embedded should say N here.
 
-config CC_STACKPROTECTOR
-	bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)"
-	help
-	  This option turns on the -fstack-protector GCC feature. This
-	  feature puts, at the beginning of functions, a canary value on
-	  the stack just before the return address, and validates
-	  the value just before actually returning.  Stack based buffer
-	  overflows (that need to overwrite this return address) now also
-	  overwrite the canary, which gets detected and the attack is then
-	  neutralized via a kernel panic.
-
-	  This feature requires gcc version 4.2 or above.
-
 config USE_OF
 	bool
 	select OF
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index de300b993607..efe50787cd89 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -232,10 +232,6 @@ bootvars-y	= VMLINUX_LOAD_ADDRESS=$(load-y) \
 
 LDFLAGS			+= -m $(ld-emul)
 
-ifdef CONFIG_CC_STACKPROTECTOR
-  KBUILD_CFLAGS += -fstack-protector
-endif
-
 ifdef CONFIG_MIPS
 CHECKFLAGS += $(shell $(CC) $(KBUILD_CFLAGS) -dM -E -x c /dev/null | \
 	egrep -vw '__GNUC_(|MINOR_|PATCHLEVEL_)_' | \
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 9b0979f4df7a..ce298317a73e 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -66,6 +66,7 @@ config SUPERH32
 	select PERF_EVENTS
 	select ARCH_HIBERNATION_POSSIBLE if MMU
 	select SPARSE_IRQ
+	select HAVE_CC_STACKPROTECTOR
 
 config SUPERH64
 	def_bool ARCH = "sh64"
@@ -695,20 +696,6 @@ config SECCOMP
 
 	  If unsure, say N.
 
-config CC_STACKPROTECTOR
-	bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)"
-	depends on SUPERH32
-	help
-	  This option turns on the -fstack-protector GCC feature. This
-	  feature puts, at the beginning of functions, a canary value on
-	  the stack just before the return address, and validates
-	  the value just before actually returning.  Stack based buffer
-	  overflows (that need to overwrite this return address) now also
-	  overwrite the canary, which gets detected and the attack is then
-	  neutralized via a kernel panic.
-
-	  This feature requires gcc version 4.2 or above.
-
 config SMP
 	bool "Symmetric multi-processing support"
 	depends on SYS_SUPPORTS_SMP
diff --git a/arch/sh/Makefile b/arch/sh/Makefile
index aed701c7b11b..d4d16e4be07c 100644
--- a/arch/sh/Makefile
+++ b/arch/sh/Makefile
@@ -199,10 +199,6 @@ ifeq ($(CONFIG_DWARF_UNWINDER),y)
   KBUILD_CFLAGS += -fasynchronous-unwind-tables
 endif
 
-ifeq ($(CONFIG_CC_STACKPROTECTOR),y)
-  KBUILD_CFLAGS += -fstack-protector
-endif
-
 libs-$(CONFIG_SUPERH32)		:= arch/sh/lib/	$(libs-y)
 libs-$(CONFIG_SUPERH64)		:= arch/sh/lib64/ $(libs-y)
 
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0952ecd60eca..838e7c34dd60 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -125,6 +125,7 @@ config X86
 	select RTC_LIB
 	select HAVE_DEBUG_STACKOVERFLOW
 	select HAVE_IRQ_EXIT_ON_IRQ_STACK if X86_64
+	select HAVE_CC_STACKPROTECTOR
 
 config INSTRUCTION_DECODER
 	def_bool y
@@ -1617,22 +1618,6 @@ config SECCOMP
 
 	  If unsure, say Y. Only embedded should say N here.
 
-config CC_STACKPROTECTOR
-	bool "Enable -fstack-protector buffer overflow detection"
-	---help---
-	  This option turns on the -fstack-protector GCC feature. This
-	  feature puts, at the beginning of functions, a canary value on
-	  the stack just before the return address, and validates
-	  the value just before actually returning.  Stack based buffer
-	  overflows (that need to overwrite this return address) now also
-	  overwrite the canary, which gets detected and the attack is then
-	  neutralized via a kernel panic.
-
-	  This feature requires gcc version 4.2 or above, or a distribution
-	  gcc with the feature backported. Older versions are automatically
-	  detected and for those versions, this configuration option is
-	  ignored. (and a warning is printed during bootup)
-
 source kernel/Kconfig.hz
 
 config KEXEC
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 57d021507120..13b22e0f681d 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -89,13 +89,11 @@ else
         KBUILD_CFLAGS += -maccumulate-outgoing-args
 endif
 
+# Make sure compiler does not have buggy stack-protector support.
 ifdef CONFIG_CC_STACKPROTECTOR
 	cc_has_sp := $(srctree)/scripts/gcc-x86_$(BITS)-has-stack-protector.sh
-        ifeq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC) $(KBUILD_CPPFLAGS) $(biarch)),y)
-                stackp-y := -fstack-protector
-                KBUILD_CFLAGS += $(stackp-y)
-        else
-                $(warning stack protector enabled but no compiler support)
+        ifneq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC) $(KBUILD_CPPFLAGS) $(biarch)),y)
+                $(warning stack-protector enabled but compiler support broken)
         endif
 endif
 
-- 
cgit v1.2.3


From 8779657d29c0ebcc0c94ede4df2f497baf1b563f Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 19 Dec 2013 11:35:59 -0800
Subject: stackprotector: Introduce CONFIG_CC_STACKPROTECTOR_STRONG

This changes the stack protector config option into a choice of
"None", "Regular", and "Strong":

   CONFIG_CC_STACKPROTECTOR_NONE
   CONFIG_CC_STACKPROTECTOR_REGULAR
   CONFIG_CC_STACKPROTECTOR_STRONG

"Regular" means the old CONFIG_CC_STACKPROTECTOR=y option.

"Strong" is a new mode introduced by this patch. With "Strong" the
kernel is built with -fstack-protector-strong (available in
gcc 4.9 and later). This option increases the coverage of the stack
protector without the heavy performance hit of -fstack-protector-all.

For reference, the stack protector options available in gcc are:

-fstack-protector-all:
  Adds the stack-canary saving prefix and stack-canary checking
  suffix to _all_ function entry and exit. Results in substantial
  use of stack space for saving the canary for deep stack users
  (e.g. historically xfs), and measurable (though shockingly still
  low) performance hit due to all the saving/checking. Really not
  suitable for sane systems, and was entirely removed as an option
  from the kernel many years ago.

-fstack-protector:
  Adds the canary save/check to functions that define an 8
  (--param=ssp-buffer-size=N, N=8 by default) or more byte local
  char array. Traditionally, stack overflows happened with
  string-based manipulations, so this was a way to find those
  functions. Very few total functions actually get the canary; no
  measurable performance or size overhead.

-fstack-protector-strong
  Adds the canary for a wider set of functions, since it's not
  just those with strings that have ultimately been vulnerable to
  stack-busting. With this superset, more functions end up with a
  canary, but it still remains small compared to all functions
  with only a small change in performance. Based on the original
  design document, a function gets the canary when it contains any
  of:

    - local variable's address used as part of the right hand side
      of an assignment or function argument
    - local variable is an array (or union containing an array),
      regardless of array type or length
    - uses register local variables

  https://docs.google.com/a/google.com/document/d/1xXBH6rRZue4f296vGt9YQcuLVQHeE516stHwt8M9xyU

Find below a comparison of "size" and "objdump" output when built with
gcc-4.9 in three configurations:

  - defconfig
	11430641 kernel text size
	36110 function bodies

  - defconfig + CONFIG_CC_STACKPROTECTOR_REGULAR
	11468490 kernel text size (+0.33%)
	1015 of 36110 functions are stack-protected (2.81%)

  - defconfig + CONFIG_CC_STACKPROTECTOR_STRONG via this patch
	11692790 kernel text size (+2.24%)
	7401 of 36110 functions are stack-protected (20.5%)

With -strong, ARM's compressed boot code now triggers stack
protection, so a static guard was added. Since this is only used
during decompression and was never used before, the exposure
here is very small. Once it switches to the full kernel, the
stack guard is back to normal.

Chrome OS has been using -fstack-protector-strong for its kernel
builds for the last 8 months with no problems.

Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Michal Marek <mmarek@suse.cz>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Shawn Guo <shawn.guo@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-mips@linux-mips.org
Cc: linux-arch@vger.kernel.org
Link: http://lkml.kernel.org/r/1387481759-14535-3-git-send-email-keescook@chromium.org
[ Improved the changelog and descriptions some more. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 Makefile                        |  8 ++++++-
 arch/Kconfig                    | 51 ++++++++++++++++++++++++++++++++++++++---
 arch/arm/boot/compressed/misc.c | 14 +++++++++++
 3 files changed, 69 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/Makefile b/Makefile
index 84fb5cd092d2..5271b9623aa3 100644
--- a/Makefile
+++ b/Makefile
@@ -596,12 +596,18 @@ KBUILD_CFLAGS += $(call cc-option,-Wframe-larger-than=${CONFIG_FRAME_WARN})
 endif
 
 # Handle stack protector mode.
-ifdef CONFIG_CC_STACKPROTECTOR
+ifdef CONFIG_CC_STACKPROTECTOR_REGULAR
   stackp-flag := -fstack-protector
   ifeq ($(call cc-option, $(stackp-flag)),)
     $(warning Cannot use CONFIG_CC_STACKPROTECTOR: \
 	      -fstack-protector not supported by compiler))
   endif
+else ifdef CONFIG_CC_STACKPROTECTOR_STRONG
+  stackp-flag := -fstack-protector-strong
+  ifeq ($(call cc-option, $(stackp-flag)),)
+    $(warning Cannot use CONFIG_CC_STACKPROTECTOR_STRONG: \
+	      -fstack-protector-strong not supported by compiler)
+  endif
 else
   # Force off for distro compilers that enable stack protector by default.
   stackp-flag := $(call cc-option, -fno-stack-protector)
diff --git a/arch/Kconfig b/arch/Kconfig
index 24e026d83072..80bbb8ccd0d1 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -344,10 +344,17 @@ config HAVE_CC_STACKPROTECTOR
 	  - it has implemented a stack canary (e.g. __stack_chk_guard)
 
 config CC_STACKPROTECTOR
-	bool "Enable -fstack-protector buffer overflow detection"
+	def_bool n
+	help
+	  Set when a stack-protector mode is enabled, so that the build
+	  can enable kernel-side support for the GCC feature.
+
+choice
+	prompt "Stack Protector buffer overflow detection"
 	depends on HAVE_CC_STACKPROTECTOR
+	default CC_STACKPROTECTOR_NONE
 	help
-	  This option turns on the -fstack-protector GCC feature. This
+	  This option turns on the "stack-protector" GCC feature. This
 	  feature puts, at the beginning of functions, a canary value on
 	  the stack just before the return address, and validates
 	  the value just before actually returning.  Stack based buffer
@@ -355,8 +362,46 @@ config CC_STACKPROTECTOR
 	  overwrite the canary, which gets detected and the attack is then
 	  neutralized via a kernel panic.
 
+config CC_STACKPROTECTOR_NONE
+	bool "None"
+	help
+	  Disable "stack-protector" GCC feature.
+
+config CC_STACKPROTECTOR_REGULAR
+	bool "Regular"
+	select CC_STACKPROTECTOR
+	help
+	  Functions will have the stack-protector canary logic added if they
+	  have an 8-byte or larger character array on the stack.
+
 	  This feature requires gcc version 4.2 or above, or a distribution
-	  gcc with the feature backported.
+	  gcc with the feature backported ("-fstack-protector").
+
+	  On an x86 "defconfig" build, this feature adds canary checks to
+	  about 3% of all kernel functions, which increases kernel code size
+	  by about 0.3%.
+
+config CC_STACKPROTECTOR_STRONG
+	bool "Strong"
+	select CC_STACKPROTECTOR
+	help
+	  Functions will have the stack-protector canary logic added in any
+	  of the following conditions:
+
+	  - local variable's address used as part of the right hand side of an
+	    assignment or function argument
+	  - local variable is an array (or union containing an array),
+	    regardless of array type or length
+	  - uses register local variables
+
+	  This feature requires gcc version 4.9 or above, or a distribution
+	  gcc with the feature backported ("-fstack-protector-strong").
+
+	  On an x86 "defconfig" build, this feature adds canary checks to
+	  about 20% of all kernel functions, which increases the kernel code
+	  size by about 2%.
+
+endchoice
 
 config HAVE_CONTEXT_TRACKING
 	bool
diff --git a/arch/arm/boot/compressed/misc.c b/arch/arm/boot/compressed/misc.c
index 31bd43b82095..d4f891f56996 100644
--- a/arch/arm/boot/compressed/misc.c
+++ b/arch/arm/boot/compressed/misc.c
@@ -127,6 +127,18 @@ asmlinkage void __div0(void)
 	error("Attempting division by 0!");
 }
 
+unsigned long __stack_chk_guard;
+
+void __stack_chk_guard_setup(void)
+{
+	__stack_chk_guard = 0x000a0dff;
+}
+
+void __stack_chk_fail(void)
+{
+	error("stack-protector: Kernel stack is corrupted\n");
+}
+
 extern int do_decompress(u8 *input, int len, u8 *output, void (*error)(char *x));
 
 
@@ -137,6 +149,8 @@ decompress_kernel(unsigned long output_start, unsigned long free_mem_ptr_p,
 {
 	int ret;
 
+	__stack_chk_guard_setup();
+
 	output_data		= (unsigned char *)output_start;
 	free_mem_ptr		= free_mem_ptr_p;
 	free_mem_end_ptr	= free_mem_ptr_end_p;
-- 
cgit v1.2.3


From 11daf32be9a6041a2406c2cbf76b75a9c4a6eaaa Mon Sep 17 00:00:00 2001
From: Matteo Facchinetti <matteo.facchinetti@sirius-es.it>
Date: Fri, 20 Dec 2013 10:16:22 +0100
Subject: powerpc/512x: dts: disable MPC5125 usb module

At the moment the USB controller's pin muxing is not setup
correctly and causes a kernel panic upon system startup, so
disable the USB1 device tree node in the MPC5125 tower board
dts file.

The USB controller is connected to an USB3320 ULPI transceiver
and the device tree should receive an update to reflect correct
dependencies and required initialization data before the USB1
node can get re-enabled.

Signed-off-by: Matteo Facchinetti <matteo.facchinetti@sirius-es.it>
Signed-off-by: Anatolij Gustschin <agust@denx.de>
---
 arch/powerpc/boot/dts/mpc5125twr.dts | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/boot/dts/mpc5125twr.dts b/arch/powerpc/boot/dts/mpc5125twr.dts
index 0a0fe92216ae..a618dfc13e4c 100644
--- a/arch/powerpc/boot/dts/mpc5125twr.dts
+++ b/arch/powerpc/boot/dts/mpc5125twr.dts
@@ -188,6 +188,10 @@
 			reg = <0xA000 0x1000>;
 		};
 
+		// disable USB1 port
+		// TODO:
+		// correct pinmux config and fix USB3320 ulpi dependency
+		// before re-enabling it
 		usb@3000 {
 			compatible = "fsl,mpc5121-usb2-dr";
 			reg = <0x3000 0x400>;
@@ -196,6 +200,7 @@
 			interrupts = <43 0x8>;
 			dr_mode = "host";
 			phy_type = "ulpi";
+			status = "disabled";
 		};
 
 		// 5125 PSCs are not 52xx or 5121 PSC compatible
-- 
cgit v1.2.3


From addccbb264e5e0e5762f4893f6df24afad327c8c Mon Sep 17 00:00:00 2001
From: "Chen, Gong" <gong.chen@linux.intel.com>
Date: Mon, 25 Nov 2013 02:15:00 -0500
Subject: ACPI, APEI, GHES: Do not report only correctable errors with SCI

Currently SCI is employed to handle corrected errors - memory corrected
errors, more specifically but in fact SCI still can be used to handle
any errors, e.g. uncorrected or even fatal ones if enabled by the BIOS.
Enable logging for those kinds of errors too.

Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
Acked-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Cc: Tony Luck <tony.luck@intel.com>
Link: http://lkml.kernel.org/r/1385363701-12387-1-git-send-email-gong.chen@linux.intel.com
[ Boris: massage commit message, rename function arg. ]
Signed-off-by: Borislav Petkov <bp@suse.de>
---
 arch/x86/kernel/cpu/mcheck/mce-apei.c | 14 ++++++++++----
 drivers/acpi/apei/ghes.c              |  3 +--
 2 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c
index de8b60a53f69..a1aef9533154 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-apei.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c
@@ -33,22 +33,28 @@
 #include <linux/acpi.h>
 #include <linux/cper.h>
 #include <acpi/apei.h>
+#include <acpi/ghes.h>
 #include <asm/mce.h>
 
 #include "mce-internal.h"
 
-void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err)
+void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
 {
 	struct mce m;
 
-	/* Only corrected MC is reported */
-	if (!corrected || !(mem_err->validation_bits & CPER_MEM_VALID_PA))
+	if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
 		return;
 
 	mce_setup(&m);
 	m.bank = 1;
-	/* Fake a memory read corrected error with unknown channel */
+	/* Fake a memory read error with unknown channel */
 	m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | 0x9f;
+
+	if (severity >= GHES_SEV_RECOVERABLE)
+		m.status |= MCI_STATUS_UC;
+	if (severity >= GHES_SEV_PANIC)
+		m.status |= MCI_STATUS_PCC;
+
 	m.addr = mem_err->physical_addr;
 	mce_log(&m);
 	mce_notify_irq();
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index a30bc313787b..ce3683d93a13 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -453,8 +453,7 @@ static void ghes_do_proc(struct ghes *ghes,
 			ghes_edac_report_mem_error(ghes, sev, mem_err);
 
 #ifdef CONFIG_X86_MCE
-			apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED,
-						  mem_err);
+			apei_mce_report_mem_error(sev, mem_err);
 #endif
 			ghes_handle_memory_failure(gdata, sev);
 		}
-- 
cgit v1.2.3


From 2da6e57cce14a1c3b0692d6f877b72e185110e2e Mon Sep 17 00:00:00 2001
From: Dave Young <dyoung@redhat.com>
Date: Fri, 20 Dec 2013 18:02:13 +0800
Subject: x86/efi: Remove unused variables in __map_region()

variables size and end is useless in this function, thus remove them.

Reported-by: Toshi Kani <toshi.kani@hp.com>
Tested-by: Toshi Kani <toshi.kani@hp.com>
Signed-off-by: Dave Young <dyoung@redhat.com>
Acked-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/x86/platform/efi/efi_64.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index bf286c386d33..c5a6491d95da 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -148,15 +148,11 @@ void efi_setup_page_tables(void)
 static void __init __map_region(efi_memory_desc_t *md, u64 va)
 {
 	pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd);
-	unsigned long pf = 0, size;
-	u64 end;
+	unsigned long pf = 0;
 
 	if (!(md->attribute & EFI_MEMORY_WB))
 		pf |= _PAGE_PCD;
 
-	size = md->num_pages << PAGE_SHIFT;
-	end  = va + size;
-
 	if (kernel_map_pages_in_pgd(pgd, md->phys_addr, va, md->num_pages, pf))
 		pr_warn("Error mapping PA 0x%llx -> VA 0x%llx!\n",
 			   md->phys_addr, va);
-- 
cgit v1.2.3


From 3b2664964bc886ae9d5127c8d3708b1acc0626d2 Mon Sep 17 00:00:00 2001
From: Dave Young <dyoung@redhat.com>
Date: Fri, 20 Dec 2013 18:02:14 +0800
Subject: x86/efi: Add a wrapper function efi_map_region_fixed()

Kexec kernel will use saved runtime virtual mapping, so add a new
function efi_map_region_fixed() for directly mapping a md to md->virt.

The md is passed in from 1st kernel, the virtual addr is saved in
md->virt_addr.

Signed-off-by: Dave Young <dyoung@redhat.com>
Acked-by: Borislav Petkov <bp@suse.de>
Tested-by: Toshi Kani <toshi.kani@hp.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/x86/include/asm/efi.h     |  1 +
 arch/x86/platform/efi/efi_32.c |  2 ++
 arch/x86/platform/efi/efi_64.c | 10 ++++++++++
 3 files changed, 13 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 89a05b0507b9..9fbaeb239bde 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -128,6 +128,7 @@ extern void efi_call_phys_epilog(void);
 extern void efi_unmap_memmap(void);
 extern void efi_memory_uc(u64 addr, unsigned long size);
 extern void __init efi_map_region(efi_memory_desc_t *md);
+extern void __init efi_map_region_fixed(efi_memory_desc_t *md);
 extern void efi_sync_low_kernel_mappings(void);
 extern void efi_setup_page_tables(void);
 extern void __init old_map_region(efi_memory_desc_t *md);
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index e94557cf5487..7b3ec6ed99af 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -47,6 +47,8 @@ void __init efi_map_region(efi_memory_desc_t *md)
 	old_map_region(md);
 }
 
+void __init efi_map_region_fixed(efi_memory_desc_t *md) {}
+
 void efi_call_phys_prelog(void)
 {
 	struct desc_ptr gdt_descr;
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index c5a6491d95da..ff08cb19630b 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -199,6 +199,16 @@ void __init efi_map_region(efi_memory_desc_t *md)
 	md->virt_addr = efi_va;
 }
 
+/*
+ * kexec kernel will use efi_map_region_fixed to map efi runtime memory ranges.
+ * md->virt_addr is the original virtual address which had been mapped in kexec
+ * 1st kernel.
+ */
+void __init efi_map_region_fixed(efi_memory_desc_t *md)
+{
+	__map_region(md, md->virt_addr);
+}
+
 void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size,
 				 u32 type, u64 attribute)
 {
-- 
cgit v1.2.3


From a7f84f03f660d93574ac88835d056c0d6468aebe Mon Sep 17 00:00:00 2001
From: Dave Young <dyoung@redhat.com>
Date: Fri, 20 Dec 2013 18:02:15 +0800
Subject: x86/efi: Fix off-by-one bug in EFI Boot Services reservation

Current code check boot service region with kernel text region by:
start+size >= __pa_symbol(_text)
The end of the above region should be start + size - 1 instead.

I see this problem in ovmf + Fedora 19 grub boot:
text start: 1000000 md start: 800000 md size: 800000

Signed-off-by: Dave Young <dyoung@redhat.com>
Acked-by: Borislav Petkov <bp@suse.de>
Acked-by: Toshi Kani <toshi.kani@hp.com>
Tested-by: Toshi Kani <toshi.kani@hp.com>
Cc: stable@vger.kernel.org
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/x86/platform/efi/efi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index f8ec4dafc74e..15e3b5ea31b5 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -438,7 +438,7 @@ void __init efi_reserve_boot_services(void)
 		 * - Not within any part of the kernel
 		 * - Not the bios reserved area
 		*/
-		if ((start+size >= __pa_symbol(_text)
+		if ((start + size > __pa_symbol(_text)
 				&& start <= __pa_symbol(_end)) ||
 			!e820_all_mapped(start, start+size, E820_RAM) ||
 			memblock_is_region_reserved(start, size)) {
-- 
cgit v1.2.3


From 481f75c043cf44ec11c7fbdbbf37d43463f1e719 Mon Sep 17 00:00:00 2001
From: Dave Young <dyoung@redhat.com>
Date: Fri, 20 Dec 2013 18:02:16 +0800
Subject: x86/efi: Cleanup efi_enter_virtual_mode() function

Add two small functions:
efi_merge_regions() and efi_map_regions(), efi_enter_virtual_mode()
calls them instead of embedding two long for loop.

Signed-off-by: Dave Young <dyoung@redhat.com>
Acked-by: Borislav Petkov <bp@suse.de>
Tested-by: Toshi Kani <toshi.kani@hp.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/x86/platform/efi/efi.c | 132 ++++++++++++++++++++++++++------------------
 1 file changed, 79 insertions(+), 53 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 15e3b5ea31b5..4694632ef581 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -773,44 +773,12 @@ void __init old_map_region(efi_memory_desc_t *md)
 		       (unsigned long long)md->phys_addr);
 }
 
-/*
- * This function will switch the EFI runtime services to virtual mode.
- * Essentially, we look through the EFI memmap and map every region that
- * has the runtime attribute bit set in its memory descriptor into the
- * ->trampoline_pgd page table using a top-down VA allocation scheme.
- *
- * The old method which used to update that memory descriptor with the
- * virtual address obtained from ioremap() is still supported when the
- * kernel is booted with efi=old_map on its command line. Same old
- * method enabled the runtime services to be called without having to
- * thunk back into physical mode for every invocation.
- *
- * The new method does a pagetable switch in a preemption-safe manner
- * so that we're in a different address space when calling a runtime
- * function. For function arguments passing we do copy the PGDs of the
- * kernel page table into ->trampoline_pgd prior to each call.
- */
-void __init efi_enter_virtual_mode(void)
+/* Merge contiguous regions of the same type and attribute */
+static void __init efi_merge_regions(void)
 {
+	void *p;
 	efi_memory_desc_t *md, *prev_md = NULL;
-	void *p, *new_memmap = NULL;
-	unsigned long size;
-	efi_status_t status;
-	u64 end, systab;
-	int count = 0;
-
-	efi.systab = NULL;
 
-	/*
-	 * We don't do virtual mode, since we don't do runtime services, on
-	 * non-native EFI
-	 */
-	if (!efi_is_native()) {
-		efi_unmap_memmap();
-		return;
-	}
-
-	/* Merge contiguous regions of the same type and attribute */
 	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
 		u64 prev_size;
 		md = p;
@@ -835,8 +803,31 @@ void __init efi_enter_virtual_mode(void)
 			continue;
 		}
 		prev_md = md;
+	}
+}
+
+static void __init get_systab_virt_addr(efi_memory_desc_t *md)
+{
+	unsigned long size;
+	u64 end, systab;
 
+	size = md->num_pages << EFI_PAGE_SHIFT;
+	end = md->phys_addr + size;
+	systab = (u64)(unsigned long)efi_phys.systab;
+	if (md->phys_addr <= systab && systab < end) {
+		systab += md->virt_addr - md->phys_addr;
+		efi.systab = (efi_system_table_t *)(unsigned long)systab;
 	}
+}
+
+/*
+ * Map efi memory ranges for runtime serivce and update new_memmap with virtual
+ * addresses.
+ */
+static void * __init efi_map_regions(int *count)
+{
+	efi_memory_desc_t *md;
+	void *p, *tmp, *new_memmap = NULL;
 
 	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
 		md = p;
@@ -849,26 +840,64 @@ void __init efi_enter_virtual_mode(void)
 		}
 
 		efi_map_region(md);
+		get_systab_virt_addr(md);
+
+		tmp = krealloc(new_memmap, (*count + 1) * memmap.desc_size,
+			       GFP_KERNEL);
+		if (!tmp)
+			goto out_krealloc;
+		new_memmap = tmp;
+		memcpy(new_memmap + (*count * memmap.desc_size), md,
+		       memmap.desc_size);
+		(*count)++;
+	}
 
-		size = md->num_pages << EFI_PAGE_SHIFT;
-		end = md->phys_addr + size;
+	return new_memmap;
+out_krealloc:
+	kfree(new_memmap);
+	return NULL;
+}
+
+/*
+ * This function will switch the EFI runtime services to virtual mode.
+ * Essentially, we look through the EFI memmap and map every region that
+ * has the runtime attribute bit set in its memory descriptor into the
+ * ->trampoline_pgd page table using a top-down VA allocation scheme.
+ *
+ * The old method which used to update that memory descriptor with the
+ * virtual address obtained from ioremap() is still supported when the
+ * kernel is booted with efi=old_map on its command line. Same old
+ * method enabled the runtime services to be called without having to
+ * thunk back into physical mode for every invocation.
+ *
+ * The new method does a pagetable switch in a preemption-safe manner
+ * so that we're in a different address space when calling a runtime
+ * function. For function arguments passing we do copy the PGDs of the
+ * kernel page table into ->trampoline_pgd prior to each call.
+ */
+void __init efi_enter_virtual_mode(void)
+{
+	efi_status_t status;
+	void *new_memmap = NULL;
+	int count = 0;
 
-		systab = (u64) (unsigned long) efi_phys.systab;
-		if (md->phys_addr <= systab && systab < end) {
-			systab += md->virt_addr - md->phys_addr;
+	efi.systab = NULL;
 
-			efi.systab = (efi_system_table_t *) (unsigned long) systab;
-		}
+	/*
+	 * We don't do virtual mode, since we don't do runtime services, on
+	 * non-native EFI
+	 */
+	if (!efi_is_native()) {
+		efi_unmap_memmap();
+		return;
+	}
 
-		new_memmap = krealloc(new_memmap,
-				      (count + 1) * memmap.desc_size,
-				      GFP_KERNEL);
-		if (!new_memmap)
-			goto err_out;
+	efi_merge_regions();
 
-		memcpy(new_memmap + (count * memmap.desc_size), md,
-		       memmap.desc_size);
-		count++;
+	new_memmap = efi_map_regions(&count);
+	if (!new_memmap) {
+		pr_err("Error reallocating memory, EFI runtime non-functional!\n");
+		return;
 	}
 
 	BUG_ON(!efi.systab);
@@ -922,9 +951,6 @@ void __init efi_enter_virtual_mode(void)
 			 0, NULL);
 
 	return;
-
- err_out:
-	pr_err("Error reallocating memory, EFI runtime non-functional!\n");
 }
 
 /*
-- 
cgit v1.2.3


From a0998eb15afeffbf52a2c2829318f67df9ac57b8 Mon Sep 17 00:00:00 2001
From: Dave Young <dyoung@redhat.com>
Date: Fri, 20 Dec 2013 18:02:17 +0800
Subject: efi: Export more EFI table variables to sysfs

Export fw_vendor, runtime and config table physical addresses to
/sys/firmware/efi/{fw_vendor,runtime,config_table} because kexec kernels
need them.

From EFI spec these 3 variables will be updated to virtual address after
entering virtual mode. But kernel startup code will need the physical
address.

Signed-off-by: Dave Young <dyoung@redhat.com>
Tested-by: Toshi Kani <toshi.kani@hp.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 Documentation/ABI/testing/sysfs-firmware-efi | 20 ++++++++++++++
 arch/x86/platform/efi/efi.c                  |  4 +++
 drivers/firmware/efi/efi.c                   | 41 +++++++++++++++++++++++++++-
 include/linux/efi.h                          |  3 ++
 4 files changed, 67 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/ABI/testing/sysfs-firmware-efi

(limited to 'arch')

diff --git a/Documentation/ABI/testing/sysfs-firmware-efi b/Documentation/ABI/testing/sysfs-firmware-efi
new file mode 100644
index 000000000000..05874da7ce80
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-firmware-efi
@@ -0,0 +1,20 @@
+What:		/sys/firmware/efi/fw_vendor
+Date:		December 2013
+Contact:	Dave Young <dyoung@redhat.com>
+Description:	It shows the physical address of firmware vendor field in the
+		EFI system table.
+Users:		Kexec
+
+What:		/sys/firmware/efi/runtime
+Date:		December 2013
+Contact:	Dave Young <dyoung@redhat.com>
+Description:	It shows the physical address of runtime service table entry in
+		the EFI system table.
+Users:		Kexec
+
+What:		/sys/firmware/efi/config_table
+Date:		December 2013
+Contact:	Dave Young <dyoung@redhat.com>
+Description:	It shows the physical address of config table entry in the EFI
+		system table.
+Users:		Kexec
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 4694632ef581..28591072fbb7 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -653,6 +653,10 @@ void __init efi_init(void)
 
 	set_bit(EFI_SYSTEM_TABLES, &x86_efi_facility);
 
+	efi.config_table = (unsigned long)efi.systab->tables;
+	efi.fw_vendor	 = (unsigned long)efi.systab->fw_vendor;
+	efi.runtime	 = (unsigned long)efi.systab->runtime;
+
 	/*
 	 * Show what we know for posterity
 	 */
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 2e2fbdec0845..72533af72b98 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -32,6 +32,9 @@ struct efi __read_mostly efi = {
 	.hcdp       = EFI_INVALID_TABLE_ADDR,
 	.uga        = EFI_INVALID_TABLE_ADDR,
 	.uv_systab  = EFI_INVALID_TABLE_ADDR,
+	.fw_vendor  = EFI_INVALID_TABLE_ADDR,
+	.runtime    = EFI_INVALID_TABLE_ADDR,
+	.config_table  = EFI_INVALID_TABLE_ADDR,
 };
 EXPORT_SYMBOL(efi);
 
@@ -71,13 +74,49 @@ static ssize_t systab_show(struct kobject *kobj,
 static struct kobj_attribute efi_attr_systab =
 			__ATTR(systab, 0400, systab_show, NULL);
 
+#define EFI_FIELD(var) efi.var
+
+#define EFI_ATTR_SHOW(name) \
+static ssize_t name##_show(struct kobject *kobj, \
+				struct kobj_attribute *attr, char *buf) \
+{ \
+	return sprintf(buf, "0x%lx\n", EFI_FIELD(name)); \
+}
+
+EFI_ATTR_SHOW(fw_vendor);
+EFI_ATTR_SHOW(runtime);
+EFI_ATTR_SHOW(config_table);
+
+static struct kobj_attribute efi_attr_fw_vendor = __ATTR_RO(fw_vendor);
+static struct kobj_attribute efi_attr_runtime = __ATTR_RO(runtime);
+static struct kobj_attribute efi_attr_config_table = __ATTR_RO(config_table);
+
 static struct attribute *efi_subsys_attrs[] = {
 	&efi_attr_systab.attr,
-	NULL,	/* maybe more in the future? */
+	&efi_attr_fw_vendor.attr,
+	&efi_attr_runtime.attr,
+	&efi_attr_config_table.attr,
+	NULL,
 };
 
+static umode_t efi_attr_is_visible(struct kobject *kobj,
+				   struct attribute *attr, int n)
+{
+	umode_t mode = attr->mode;
+
+	if (attr == &efi_attr_fw_vendor.attr)
+		return (efi.fw_vendor == EFI_INVALID_TABLE_ADDR) ? 0 : mode;
+	else if (attr == &efi_attr_runtime.attr)
+		return (efi.runtime == EFI_INVALID_TABLE_ADDR) ? 0 : mode;
+	else if (attr == &efi_attr_config_table.attr)
+		return (efi.config_table == EFI_INVALID_TABLE_ADDR) ? 0 : mode;
+
+	return mode;
+}
+
 static struct attribute_group efi_subsys_attr_group = {
 	.attrs = efi_subsys_attrs,
+	.is_visible = efi_attr_is_visible,
 };
 
 static struct efivars generic_efivars;
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 6c0ca528300c..fb60b10b7bd9 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -556,6 +556,9 @@ extern struct efi {
 	unsigned long hcdp;		/* HCDP table */
 	unsigned long uga;		/* UGA table */
 	unsigned long uv_systab;	/* UV system table */
+	unsigned long fw_vendor;	/* fw_vendor */
+	unsigned long runtime;		/* runtime table */
+	unsigned long config_table;	/* config tables */
 	efi_get_time_t *get_time;
 	efi_set_time_t *set_time;
 	efi_get_wakeup_time_t *get_wakeup_time;
-- 
cgit v1.2.3


From 926172d46038d7610b6b8d84e40db727cefb482d Mon Sep 17 00:00:00 2001
From: Dave Young <dyoung@redhat.com>
Date: Fri, 20 Dec 2013 18:02:18 +0800
Subject: efi: Export EFI runtime memory mapping to sysfs

kexec kernel will need exactly same mapping for EFI runtime memory
ranges. Thus here export the runtime ranges mapping to sysfs,
kexec-tools will assemble them and pass to 2nd kernel via setup_data.

Introducing a new directory /sys/firmware/efi/runtime-map just like
/sys/firmware/memmap. Containing below attribute in each file of that
directory:

attribute  num_pages  phys_addr  type  virt_addr

Signed-off-by: Dave Young <dyoung@redhat.com>
Tested-by: Toshi Kani <toshi.kani@hp.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 .../ABI/testing/sysfs-firmware-efi-runtime-map     |  34 ++++
 arch/x86/platform/efi/efi.c                        |  46 +++++-
 drivers/firmware/efi/Kconfig                       |  11 ++
 drivers/firmware/efi/Makefile                      |   1 +
 drivers/firmware/efi/efi.c                         |   4 +
 drivers/firmware/efi/runtime-map.c                 | 181 +++++++++++++++++++++
 include/linux/efi.h                                |  13 ++
 7 files changed, 287 insertions(+), 3 deletions(-)
 create mode 100644 Documentation/ABI/testing/sysfs-firmware-efi-runtime-map
 create mode 100644 drivers/firmware/efi/runtime-map.c

(limited to 'arch')

diff --git a/Documentation/ABI/testing/sysfs-firmware-efi-runtime-map b/Documentation/ABI/testing/sysfs-firmware-efi-runtime-map
new file mode 100644
index 000000000000..c61b9b348e99
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-firmware-efi-runtime-map
@@ -0,0 +1,34 @@
+What:		/sys/firmware/efi/runtime-map/
+Date:		December 2013
+Contact:	Dave Young <dyoung@redhat.com>
+Description:	Switching efi runtime services to virtual mode requires
+		that all efi memory ranges which have the runtime attribute
+		bit set to be mapped to virtual addresses.
+
+		The efi runtime services can only be switched to virtual
+		mode once without rebooting. The kexec kernel must maintain
+		the same physical to virtual address mappings as the first
+		kernel. The mappings are exported to sysfs so userspace tools
+		can reassemble them and pass them into the kexec kernel.
+
+		/sys/firmware/efi/runtime-map/ is the directory the kernel
+		exports that information in.
+
+		subdirectories are named with the number of the memory range:
+
+			/sys/firmware/efi/runtime-map/0
+			/sys/firmware/efi/runtime-map/1
+			/sys/firmware/efi/runtime-map/2
+			/sys/firmware/efi/runtime-map/3
+			...
+
+		Each subdirectory contains five files:
+
+		attribute : The attributes of the memory range.
+		num_pages : The size of the memory range in pages.
+		phys_addr : The physical address of the memory range.
+		type      : The type of the memory range.
+		virt_addr : The virtual address of the memory range.
+
+		Above values are all hexadecimal numbers with the '0x' prefix.
+Users:		Kexec
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 28591072fbb7..74fe7a719508 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -76,6 +76,9 @@ static __initdata efi_config_table_type_t arch_tables[] = {
 	{NULL_GUID, NULL, NULL},
 };
 
+static void *efi_runtime_map;
+static int nr_efi_runtime_map;
+
 /*
  * Returns 1 if 'facility' is enabled, 0 otherwise.
  */
@@ -824,6 +827,39 @@ static void __init get_systab_virt_addr(efi_memory_desc_t *md)
 	}
 }
 
+static int __init save_runtime_map(void)
+{
+	efi_memory_desc_t *md;
+	void *tmp, *p, *q = NULL;
+	int count = 0;
+
+	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+		md = p;
+
+		if (!(md->attribute & EFI_MEMORY_RUNTIME) ||
+		    (md->type == EFI_BOOT_SERVICES_CODE) ||
+		    (md->type == EFI_BOOT_SERVICES_DATA))
+			continue;
+		tmp = krealloc(q, (count + 1) * memmap.desc_size, GFP_KERNEL);
+		if (!tmp)
+			goto out;
+		q = tmp;
+
+		memcpy(q + count * memmap.desc_size, md, memmap.desc_size);
+		count++;
+	}
+
+	efi_runtime_map = q;
+	nr_efi_runtime_map = count;
+	efi_runtime_map_setup(efi_runtime_map, nr_efi_runtime_map,
+			      boot_params.efi_info.efi_memdesc_size);
+
+	return 0;
+out:
+	kfree(q);
+	return -ENOMEM;
+}
+
 /*
  * Map efi memory ranges for runtime serivce and update new_memmap with virtual
  * addresses.
@@ -849,7 +885,7 @@ static void * __init efi_map_regions(int *count)
 		tmp = krealloc(new_memmap, (*count + 1) * memmap.desc_size,
 			       GFP_KERNEL);
 		if (!tmp)
-			goto out_krealloc;
+			goto out;
 		new_memmap = tmp;
 		memcpy(new_memmap + (*count * memmap.desc_size), md,
 		       memmap.desc_size);
@@ -857,7 +893,7 @@ static void * __init efi_map_regions(int *count)
 	}
 
 	return new_memmap;
-out_krealloc:
+out:
 	kfree(new_memmap);
 	return NULL;
 }
@@ -883,7 +919,7 @@ void __init efi_enter_virtual_mode(void)
 {
 	efi_status_t status;
 	void *new_memmap = NULL;
-	int count = 0;
+	int err, count = 0;
 
 	efi.systab = NULL;
 
@@ -904,6 +940,10 @@ void __init efi_enter_virtual_mode(void)
 		return;
 	}
 
+	err = save_runtime_map();
+	if (err)
+		pr_err("Error saving runtime map, efi runtime on kexec non-functional!!\n");
+
 	BUG_ON(!efi.systab);
 
 	efi_setup_page_tables();
diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig
index 3150aa4874e8..730f5f2e8b7f 100644
--- a/drivers/firmware/efi/Kconfig
+++ b/drivers/firmware/efi/Kconfig
@@ -39,4 +39,15 @@ config EFI_VARS_PSTORE_DEFAULT_DISABLE
 config UEFI_CPER
 	def_bool n
 
+config EFI_RUNTIME_MAP
+	bool "Export efi runtime maps to sysfs"
+	depends on X86 && EFI && KEXEC
+	default y
+	help
+	  Export efi runtime memory maps to /sys/firmware/efi/runtime-map.
+	  That memory map is used for example by kexec to set up efi virtual
+	  mapping the 2nd kernel, but can also be used for debugging purposes.
+
+	  See also Documentation/ABI/testing/sysfs-firmware-efi-runtime-map.
+
 endmenu
diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile
index 9ba156d3c775..a58e0f183a08 100644
--- a/drivers/firmware/efi/Makefile
+++ b/drivers/firmware/efi/Makefile
@@ -5,3 +5,4 @@ obj-y					+= efi.o vars.o
 obj-$(CONFIG_EFI_VARS)			+= efivars.o
 obj-$(CONFIG_EFI_VARS_PSTORE)		+= efi-pstore.o
 obj-$(CONFIG_UEFI_CPER)			+= cper.o
+obj-$(CONFIG_EFI_RUNTIME_MAP)		+= runtime-map.o
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 72533af72b98..4753bac65279 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -167,6 +167,10 @@ static int __init efisubsys_init(void)
 		goto err_unregister;
 	}
 
+	error = efi_runtime_map_init(efi_kobj);
+	if (error)
+		goto err_remove_group;
+
 	/* and the standard mountpoint for efivarfs */
 	efivars_kobj = kobject_create_and_add("efivars", efi_kobj);
 	if (!efivars_kobj) {
diff --git a/drivers/firmware/efi/runtime-map.c b/drivers/firmware/efi/runtime-map.c
new file mode 100644
index 000000000000..97cdd16a2169
--- /dev/null
+++ b/drivers/firmware/efi/runtime-map.c
@@ -0,0 +1,181 @@
+/*
+ * linux/drivers/efi/runtime-map.c
+ * Copyright (C) 2013 Red Hat, Inc., Dave Young <dyoung@redhat.com>
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/efi.h>
+#include <linux/slab.h>
+
+#include <asm/setup.h>
+
+static void *efi_runtime_map;
+static int nr_efi_runtime_map;
+static u32 efi_memdesc_size;
+
+struct efi_runtime_map_entry {
+	efi_memory_desc_t md;
+	struct kobject kobj;   /* kobject for each entry */
+};
+
+static struct efi_runtime_map_entry **map_entries;
+
+struct map_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct efi_runtime_map_entry *entry, char *buf);
+};
+
+static inline struct map_attribute *to_map_attr(struct attribute *attr)
+{
+	return container_of(attr, struct map_attribute, attr);
+}
+
+static ssize_t type_show(struct efi_runtime_map_entry *entry, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "0x%x\n", entry->md.type);
+}
+
+#define EFI_RUNTIME_FIELD(var) entry->md.var
+
+#define EFI_RUNTIME_U64_ATTR_SHOW(name) \
+static ssize_t name##_show(struct efi_runtime_map_entry *entry, char *buf) \
+{ \
+	return snprintf(buf, PAGE_SIZE, "0x%llx\n", EFI_RUNTIME_FIELD(name)); \
+}
+
+EFI_RUNTIME_U64_ATTR_SHOW(phys_addr);
+EFI_RUNTIME_U64_ATTR_SHOW(virt_addr);
+EFI_RUNTIME_U64_ATTR_SHOW(num_pages);
+EFI_RUNTIME_U64_ATTR_SHOW(attribute);
+
+static inline struct efi_runtime_map_entry *to_map_entry(struct kobject *kobj)
+{
+	return container_of(kobj, struct efi_runtime_map_entry, kobj);
+}
+
+static ssize_t map_attr_show(struct kobject *kobj, struct attribute *attr,
+			      char *buf)
+{
+	struct efi_runtime_map_entry *entry = to_map_entry(kobj);
+	struct map_attribute *map_attr = to_map_attr(attr);
+
+	return map_attr->show(entry, buf);
+}
+
+static struct map_attribute map_type_attr = __ATTR_RO(type);
+static struct map_attribute map_phys_addr_attr   = __ATTR_RO(phys_addr);
+static struct map_attribute map_virt_addr_attr  = __ATTR_RO(virt_addr);
+static struct map_attribute map_num_pages_attr  = __ATTR_RO(num_pages);
+static struct map_attribute map_attribute_attr  = __ATTR_RO(attribute);
+
+/*
+ * These are default attributes that are added for every memmap entry.
+ */
+static struct attribute *def_attrs[] = {
+	&map_type_attr.attr,
+	&map_phys_addr_attr.attr,
+	&map_virt_addr_attr.attr,
+	&map_num_pages_attr.attr,
+	&map_attribute_attr.attr,
+	NULL
+};
+
+static const struct sysfs_ops map_attr_ops = {
+	.show = map_attr_show,
+};
+
+static void map_release(struct kobject *kobj)
+{
+	struct efi_runtime_map_entry *entry;
+
+	entry = to_map_entry(kobj);
+	kfree(entry);
+}
+
+static struct kobj_type __refdata map_ktype = {
+	.sysfs_ops	= &map_attr_ops,
+	.default_attrs	= def_attrs,
+	.release	= map_release,
+};
+
+static struct kset *map_kset;
+
+static struct efi_runtime_map_entry *
+add_sysfs_runtime_map_entry(struct kobject *kobj, int nr)
+{
+	int ret;
+	struct efi_runtime_map_entry *entry;
+
+	if (!map_kset) {
+		map_kset = kset_create_and_add("runtime-map", NULL, kobj);
+		if (!map_kset)
+			return ERR_PTR(-ENOMEM);
+	}
+
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry) {
+		kset_unregister(map_kset);
+		return entry;
+	}
+
+	memcpy(&entry->md, efi_runtime_map + nr * efi_memdesc_size,
+	       sizeof(efi_memory_desc_t));
+
+	kobject_init(&entry->kobj, &map_ktype);
+	entry->kobj.kset = map_kset;
+	ret = kobject_add(&entry->kobj, NULL, "%d", nr);
+	if (ret) {
+		kobject_put(&entry->kobj);
+		kset_unregister(map_kset);
+		return ERR_PTR(ret);
+	}
+
+	return entry;
+}
+
+void efi_runtime_map_setup(void *map, int nr_entries, u32 desc_size)
+{
+	efi_runtime_map = map;
+	nr_efi_runtime_map = nr_entries;
+	efi_memdesc_size = desc_size;
+}
+
+int __init efi_runtime_map_init(struct kobject *efi_kobj)
+{
+	int i, j, ret = 0;
+	struct efi_runtime_map_entry *entry;
+
+	if (!efi_runtime_map)
+		return 0;
+
+	map_entries = kzalloc(nr_efi_runtime_map * sizeof(entry), GFP_KERNEL);
+	if (!map_entries) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	for (i = 0; i < nr_efi_runtime_map; i++) {
+		entry = add_sysfs_runtime_map_entry(efi_kobj, i);
+		if (IS_ERR(entry)) {
+			ret = PTR_ERR(entry);
+			goto out_add_entry;
+		}
+		*(map_entries + i) = entry;
+	}
+
+	return 0;
+out_add_entry:
+	for (j = i - 1; j > 0; j--) {
+		entry = *(map_entries + j);
+		kobject_put(&entry->kobj);
+	}
+	if (map_kset)
+		kset_unregister(map_kset);
+out:
+	return ret;
+}
diff --git a/include/linux/efi.h b/include/linux/efi.h
index fb60b10b7bd9..e64540746c63 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -872,4 +872,17 @@ int efivars_sysfs_init(void);
 
 #endif /* CONFIG_EFI_VARS */
 
+#ifdef CONFIG_EFI_RUNTIME_MAP
+int efi_runtime_map_init(struct kobject *);
+void efi_runtime_map_setup(void *, int, u32);
+#else
+static inline int efi_runtime_map_init(struct kobject *kobj)
+{
+	return 0;
+}
+
+static inline void
+efi_runtime_map_setup(void *map, int nr_entries, u32 desc_size) {}
+#endif
+
 #endif /* _LINUX_EFI_H */
-- 
cgit v1.2.3


From dcd740b645003b866d7eb30d13d34d0729cce9db Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Mon, 16 Dec 2013 19:16:07 +0100
Subject: ARM: shmobile: armadillo: Fix coherent DMA mask

Commit 4dcfa60071b3d23f0181f27d8519f12e37cefbb9 ("ARM: DMA-API: better
handing of DMA masks for coherent allocations") added an additional
check to the coherent DMA mask that results in an error when the mask is
larger than what dma_addr_t can address.

Set the LCDC coherent DMA mask to DMA_BIT_MASK(32) instead of ~0 to fix
the problem.

Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
---
 arch/arm/mach-shmobile/board-armadillo800eva.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-shmobile/board-armadillo800eva.c b/arch/arm/mach-shmobile/board-armadillo800eva.c
index c18689123023..8ea87bd45c33 100644
--- a/arch/arm/mach-shmobile/board-armadillo800eva.c
+++ b/arch/arm/mach-shmobile/board-armadillo800eva.c
@@ -483,7 +483,7 @@ static struct platform_device lcdc0_device = {
 	.id		= 0,
 	.dev	= {
 		.platform_data	= &lcdc0_info,
-		.coherent_dma_mask = ~0,
+		.coherent_dma_mask = DMA_BIT_MASK(32),
 	},
 };
 
@@ -580,7 +580,7 @@ static struct platform_device hdmi_lcdc_device = {
 	.id		= 1,
 	.dev	= {
 		.platform_data	= &hdmi_lcdc_info,
-		.coherent_dma_mask = ~0,
+		.coherent_dma_mask = DMA_BIT_MASK(32),
 	},
 };
 
-- 
cgit v1.2.3


From 4f387323853c495ac589210832fad4503f75a0e7 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Mon, 16 Dec 2013 19:16:08 +0100
Subject: ARM: shmobile: kzm9g: Fix coherent DMA mask

Commit 4dcfa60071b3d23f0181f27d8519f12e37cefbb9 ("ARM: DMA-API: better
handing of DMA masks for coherent allocations") added an additional
check to the coherent DMA mask that results in an error when the mask is
larger than what dma_addr_t can address.

Set the LCDC coherent DMA mask to DMA_BIT_MASK(32) instead of ~0 to fix
the problem.

Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
---
 arch/arm/mach-shmobile/board-kzm9g.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/mach-shmobile/board-kzm9g.c b/arch/arm/mach-shmobile/board-kzm9g.c
index fe689b7fdc9e..bc40b853ffd3 100644
--- a/arch/arm/mach-shmobile/board-kzm9g.c
+++ b/arch/arm/mach-shmobile/board-kzm9g.c
@@ -334,7 +334,7 @@ static struct platform_device lcdc_device = {
 	.resource	= lcdc_resources,
 	.dev	= {
 		.platform_data	= &lcdc_info,
-		.coherent_dma_mask = ~0,
+		.coherent_dma_mask = DMA_BIT_MASK(32),
 	},
 };
 
-- 
cgit v1.2.3


From b6328a6b7ba57fc84c38248f6f0e387e1170f1a8 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Mon, 16 Dec 2013 19:16:09 +0100
Subject: ARM: shmobile: mackerel: Fix coherent DMA mask

Commit 4dcfa60071b3d23f0181f27d8519f12e37cefbb9 ("ARM: DMA-API: better
handing of DMA masks for coherent allocations") added an additional
check to the coherent DMA mask that results in an error when the mask is
larger than what dma_addr_t can address.

Set the LCDC coherent DMA mask to DMA_BIT_MASK(32) instead of ~0 to fix
the problem.

Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
---
 arch/arm/mach-shmobile/board-mackerel.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-shmobile/board-mackerel.c b/arch/arm/mach-shmobile/board-mackerel.c
index af06753eb809..e721d2ccceae 100644
--- a/arch/arm/mach-shmobile/board-mackerel.c
+++ b/arch/arm/mach-shmobile/board-mackerel.c
@@ -409,7 +409,7 @@ static struct platform_device lcdc_device = {
 	.resource	= lcdc_resources,
 	.dev	= {
 		.platform_data	= &lcdc_info,
-		.coherent_dma_mask = ~0,
+		.coherent_dma_mask = DMA_BIT_MASK(32),
 	},
 };
 
@@ -499,7 +499,7 @@ static struct platform_device hdmi_lcdc_device = {
 	.id		= 1,
 	.dev	= {
 		.platform_data	= &hdmi_lcdc_info,
-		.coherent_dma_mask = ~0,
+		.coherent_dma_mask = DMA_BIT_MASK(32),
 	},
 };
 
-- 
cgit v1.2.3


From c5fe5d80680e2949ffe102180f5fc6cefc0d145f Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 27 Dec 2013 15:30:58 -0800
Subject: x86: Replace assembly access_ok() with a C variant

It turns out that the assembly variant doesn't actually produce that
good code, presumably partly because it creates a long dependency
chain with no scheduling, and partly because we cannot get a flags
result out of gcc (which could be fixed with asm goto, but it turns
out not to be worth it.)

The C code allows gcc to schedule and generate multiple (easily
predictable) branches, and as a side benefit we can really optimize
the case where the size is constant.

Link: http://lkml.kernel.org/r/CA%2B55aFzPBdbfKovMT8Edr4SmE2_=%2BOKJFac9XW2awegogTkVTA@mail.gmail.com
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/uaccess.h | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 8ec57c07b125..84ecf1df2ac6 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -40,22 +40,28 @@
 /*
  * Test whether a block of memory is a valid user space address.
  * Returns 0 if the range is valid, nonzero otherwise.
- *
- * This is equivalent to the following test:
- * (u33)addr + (u33)size > (u33)current->addr_limit.seg (u65 for x86_64)
- *
- * This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry...
  */
+static inline int __chk_range_not_ok(unsigned long addr, unsigned long size, unsigned long limit)
+{
+	/*
+	 * If we have used "sizeof()" for the size,
+	 * we know it won't overflow the limit (but
+	 * it might overflow the 'addr', so it's
+	 * important to subtract the size from the
+	 * limit, not add it to the address).
+	 */
+	if (__builtin_constant_p(size))
+		return addr > limit - size;
+
+	/* Arbitrary sizes? Be careful about overflow */
+	addr += size;
+	return (addr < size) || (addr > limit);
+}
 
 #define __range_not_ok(addr, size, limit)				\
 ({									\
-	unsigned long flag, roksum;					\
 	__chk_user_ptr(addr);						\
-	asm("add %3,%1 ; sbb %0,%0 ; cmp %1,%4 ; sbb $0,%0"		\
-	    : "=&r" (flag), "=r" (roksum)				\
-	    : "1" (addr), "g" ((long)(size)),				\
-	      "rm" (limit));						\
-	flag;								\
+	__chk_range_not_ok((unsigned long __force)(addr), size, limit); \
 })
 
 /**
-- 
cgit v1.2.3


From a740576a4abf933de8f50787f24f24456cebd761 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Fri, 27 Dec 2013 16:52:47 -0800
Subject: x86: Slightly tweak the access_ok() C variant for better code

gcc can under very specific circumstances realize that the code
sequence:

	foo += bar;
	if (foo < bar) ...

... is equivalent to a carry out from the addition.  Tweak the
implementation of access_ok() (specifically __chk_range_not_ok()) to
make it more likely that gcc will make that connection.  It isn't
fool-proof (sometimes gcc seems to think it can make better code with
lea, and ends up with a second comparison), still, but it seems to be
able to connect the two more frequently this way.

Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/CA%2B55aFzPBdbfKovMT8Edr4SmE2_=%2BOKJFac9XW2awegogTkVTA@mail.gmail.com
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/uaccess.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 84ecf1df2ac6..6f1bb74d547b 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -41,7 +41,7 @@
  * Test whether a block of memory is a valid user space address.
  * Returns 0 if the range is valid, nonzero otherwise.
  */
-static inline int __chk_range_not_ok(unsigned long addr, unsigned long size, unsigned long limit)
+static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, unsigned long limit)
 {
 	/*
 	 * If we have used "sizeof()" for the size,
@@ -55,7 +55,9 @@ static inline int __chk_range_not_ok(unsigned long addr, unsigned long size, uns
 
 	/* Arbitrary sizes? Be careful about overflow */
 	addr += size;
-	return (addr < size) || (addr > limit);
+	if (addr < size)
+		return true;
+	return addr > limit;
 }
 
 #define __range_not_ok(addr, size, limit)				\
@@ -84,7 +86,7 @@ static inline int __chk_range_not_ok(unsigned long addr, unsigned long size, uns
  * this function, memory access functions may still return -EFAULT.
  */
 #define access_ok(type, addr, size) \
-	(likely(__range_not_ok(addr, size, user_addr_max()) == 0))
+	likely(!__range_not_ok(addr, size, user_addr_max()))
 
 /*
  * The exception table consists of pairs of addresses relative to the
-- 
cgit v1.2.3


From 4ff859fe1dc0da0f87bbdfff78f527898878fa4a Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Sun, 29 Dec 2013 12:39:50 +0000
Subject: ARM: fix footbridge clockevent device

The clockevents code was being told that the footbridge clock event
device ticks at 16x the rate which it actually does.  This leads to
timekeeping problems since it allows the clocksource to wrap before
the kernel notices.  Fix this by using the correct clock.

Fixes: 4e8d76373c9fd ("ARM: footbridge: convert to clockevents/clocksource")
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Cc: <stable@vger.kernel.org>
---
 arch/arm/mach-footbridge/dc21285-timer.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-footbridge/dc21285-timer.c b/arch/arm/mach-footbridge/dc21285-timer.c
index 9ee78f7b4990..782f6c71fa0a 100644
--- a/arch/arm/mach-footbridge/dc21285-timer.c
+++ b/arch/arm/mach-footbridge/dc21285-timer.c
@@ -96,11 +96,12 @@ static struct irqaction footbridge_timer_irq = {
 void __init footbridge_timer_init(void)
 {
 	struct clock_event_device *ce = &ckevt_dc21285;
+	unsigned rate = DIV_ROUND_CLOSEST(mem_fclk_21285, 16);
 
-	clocksource_register_hz(&cksrc_dc21285, (mem_fclk_21285 + 8) / 16);
+	clocksource_register_hz(&cksrc_dc21285, rate);
 
 	setup_irq(ce->irq, &footbridge_timer_irq);
 
 	ce->cpumask = cpumask_of(smp_processor_id());
-	clockevents_config_and_register(ce, mem_fclk_21285, 0x4, 0xffffff);
+	clockevents_config_and_register(ce, rate, 0x4, 0xffffff);
 }
-- 
cgit v1.2.3


From 2a7cfcbc0553365d75716f69ee7b704cac7c9248 Mon Sep 17 00:00:00 2001
From: Steven Capper <steve.capper@linaro.org>
Date: Mon, 16 Dec 2013 17:25:52 +0100
Subject: ARM: 7923/1: mm: fix dcache flush logic for compound high pages

When given a compound high page, __flush_dcache_page will only flush
the first page of the compound page repeatedly rather than the entire
set of constituent pages.

This error was introduced by:
   0b19f93 ARM: mm: Add support for flushing HugeTLB pages.

This patch corrects the logic such that all constituent pages are now
flushed.

Cc: stable@vger.kernel.org # 3.10+
Signed-off-by: Steve Capper <steve.capper@linaro.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/flush.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index 6d5ba9afb16a..3387e60e4ea3 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -175,16 +175,16 @@ void __flush_dcache_page(struct address_space *mapping, struct page *page)
 		unsigned long i;
 		if (cache_is_vipt_nonaliasing()) {
 			for (i = 0; i < (1 << compound_order(page)); i++) {
-				void *addr = kmap_atomic(page);
+				void *addr = kmap_atomic(page + i);
 				__cpuc_flush_dcache_area(addr, PAGE_SIZE);
 				kunmap_atomic(addr);
 			}
 		} else {
 			for (i = 0; i < (1 << compound_order(page)); i++) {
-				void *addr = kmap_high_get(page);
+				void *addr = kmap_high_get(page + i);
 				if (addr) {
 					__cpuc_flush_dcache_area(addr, PAGE_SIZE);
-					kunmap_high(page);
+					kunmap_high(page + i);
 				}
 			}
 		}
-- 
cgit v1.2.3


From efea3403d4b7c6d1dd5d5ac3234c161e8b314d66 Mon Sep 17 00:00:00 2001
From: Laura Abbott <lauraa@codeaurora.org>
Date: Sat, 21 Dec 2013 01:03:06 +0100
Subject: ARM: 7931/1: Correct virt_addr_valid

The definition of virt_addr_valid is that virt_addr_valid should
return true if and only if virt_to_page returns a valid pointer.
The current definition of virt_addr_valid only checks against the
virtual address range. There's no guarantee that just because a
virtual address falls bewteen PAGE_OFFSET and high_memory the
associated physical memory has a valid backing struct page. Follow
the example of other architectures and convert to pfn_valid to
verify that the virtual address is actually valid. The check for
an address between PAGE_OFFSET and high_memory is still necessary
as vmalloc/highmem addresses are not valid with virt_to_page.

Cc: Will Deacon <will.deacon@arm.com>
Cc: Nicolas Pitre <nico@linaro.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/memory.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 6976b03e5213..8756e4bcdba0 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -347,7 +347,8 @@ static inline __deprecated void *bus_to_virt(unsigned long x)
 #define ARCH_PFN_OFFSET		PHYS_PFN_OFFSET
 
 #define virt_to_page(kaddr)	pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
-#define virt_addr_valid(kaddr)	((unsigned long)(kaddr) >= PAGE_OFFSET && (unsigned long)(kaddr) < (unsigned long)high_memory)
+#define virt_addr_valid(kaddr)	(((unsigned long)(kaddr) >= PAGE_OFFSET && (unsigned long)(kaddr) < (unsigned long)high_memory) \
+					&& pfn_valid(__pa(kaddr) >> PAGE_SHIFT) )
 
 #endif
 
-- 
cgit v1.2.3


From 1fec0533693cd74f2d1a46edd29449cfee429df0 Mon Sep 17 00:00:00 2001
From: Dave Young <dyoung@redhat.com>
Date: Fri, 20 Dec 2013 18:02:19 +0800
Subject: x86/efi: Pass necessary EFI data for kexec via setup_data

Add a new setup_data type SETUP_EFI for kexec use.  Passing the saved
fw_vendor, runtime, config tables and EFI runtime mappings.

When entering virtual mode, directly mapping the EFI runtime regions
which we passed in previously. And skip the step to call
SetVirtualAddressMap().

Specially for HP z420 workstation we need save the smbios physical
address.  The kernel boot sequence proceeds in the following order.
Step 2 requires efi.smbios to be the physical address.  However, I found
that on HP z420 EFI system table has a virtual address of SMBIOS in step
1.  Hence, we need set it back to the physical address with the smbios
in efi_setup_data.  (When it is still the physical address, it simply
sets the same value.)

1. efi_init() - Set efi.smbios from EFI system table
2. dmi_scan_machine() - Temporary map efi.smbios to access SMBIOS table
3. efi_enter_virtual_mode() - Map EFI ranges

Tested on ovmf+qemu, lenovo thinkpad, a dell laptop and an
HP z420 workstation.

Signed-off-by: Dave Young <dyoung@redhat.com>
Tested-by: Toshi Kani <toshi.kani@hp.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/x86/include/asm/efi.h            |  12 +++
 arch/x86/include/uapi/asm/bootparam.h |   1 +
 arch/x86/kernel/setup.c               |   3 +
 arch/x86/platform/efi/efi.c           | 158 ++++++++++++++++++++++++++++------
 arch/x86/platform/efi/efi_32.c        |   1 +
 arch/x86/platform/efi/efi_64.c        |   6 ++
 6 files changed, 156 insertions(+), 25 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 9fbaeb239bde..4d1ba80b6ff1 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -133,6 +133,18 @@ extern void efi_sync_low_kernel_mappings(void);
 extern void efi_setup_page_tables(void);
 extern void __init old_map_region(efi_memory_desc_t *md);
 
+struct efi_setup_data {
+	u64 fw_vendor;
+	u64 runtime;
+	u64 tables;
+	u64 smbios;
+	u64 reserved[8];
+};
+
+extern u64 efi_setup;
+extern u32 efi_data_len;
+extern void parse_efi_setup(u64 phys_addr, u32 data_len);
+
 #ifdef CONFIG_EFI
 
 static inline bool efi_is_native(void)
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index 9c3733c5f8f7..64fe421aab65 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -6,6 +6,7 @@
 #define SETUP_E820_EXT			1
 #define SETUP_DTB			2
 #define SETUP_PCI			3
+#define SETUP_EFI			4
 
 /* ram_size flags */
 #define RAMDISK_IMAGE_START_MASK	0x07FF
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index cb233bc9dee3..24536f7a0ae6 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -447,6 +447,9 @@ static void __init parse_setup_data(void)
 		case SETUP_DTB:
 			add_dtb(pa_data);
 			break;
+		case SETUP_EFI:
+			parse_efi_setup(pa_data, data_len);
+			break;
 		default:
 			break;
 		}
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 74fe7a719508..9965ff403c6e 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -78,6 +78,8 @@ static __initdata efi_config_table_type_t arch_tables[] = {
 
 static void *efi_runtime_map;
 static int nr_efi_runtime_map;
+u64 efi_setup;		/* efi setup_data physical address */
+u32 efi_data_len;	/* efi setup_data payload length */
 
 /*
  * Returns 1 if 'facility' is enabled, 0 otherwise.
@@ -115,7 +117,6 @@ static int __init setup_storage_paranoia(char *arg)
 }
 early_param("efi_no_storage_paranoia", setup_storage_paranoia);
 
-
 static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
 {
 	unsigned long flags;
@@ -494,18 +495,27 @@ static int __init efi_systab_init(void *phys)
 {
 	if (efi_enabled(EFI_64BIT)) {
 		efi_system_table_64_t *systab64;
+		struct efi_setup_data *data = NULL;
 		u64 tmp = 0;
 
+		if (efi_setup) {
+			data = early_memremap(efi_setup, sizeof(*data));
+			if (!data)
+				return -ENOMEM;
+		}
 		systab64 = early_ioremap((unsigned long)phys,
 					 sizeof(*systab64));
 		if (systab64 == NULL) {
 			pr_err("Couldn't map the system table!\n");
+			if (data)
+				early_iounmap(data, sizeof(*data));
 			return -ENOMEM;
 		}
 
 		efi_systab.hdr = systab64->hdr;
-		efi_systab.fw_vendor = systab64->fw_vendor;
-		tmp |= systab64->fw_vendor;
+		efi_systab.fw_vendor = data ? (unsigned long)data->fw_vendor :
+					      systab64->fw_vendor;
+		tmp |= data ? data->fw_vendor : systab64->fw_vendor;
 		efi_systab.fw_revision = systab64->fw_revision;
 		efi_systab.con_in_handle = systab64->con_in_handle;
 		tmp |= systab64->con_in_handle;
@@ -519,15 +529,20 @@ static int __init efi_systab_init(void *phys)
 		tmp |= systab64->stderr_handle;
 		efi_systab.stderr = systab64->stderr;
 		tmp |= systab64->stderr;
-		efi_systab.runtime = (void *)(unsigned long)systab64->runtime;
-		tmp |= systab64->runtime;
+		efi_systab.runtime = data ?
+				     (void *)(unsigned long)data->runtime :
+				     (void *)(unsigned long)systab64->runtime;
+		tmp |= data ? data->runtime : systab64->runtime;
 		efi_systab.boottime = (void *)(unsigned long)systab64->boottime;
 		tmp |= systab64->boottime;
 		efi_systab.nr_tables = systab64->nr_tables;
-		efi_systab.tables = systab64->tables;
-		tmp |= systab64->tables;
+		efi_systab.tables = data ? (unsigned long)data->tables :
+					   systab64->tables;
+		tmp |= data ? data->tables : systab64->tables;
 
 		early_iounmap(systab64, sizeof(*systab64));
+		if (data)
+			early_iounmap(data, sizeof(*data));
 #ifdef CONFIG_X86_32
 		if (tmp >> 32) {
 			pr_err("EFI data located above 4GB, disabling EFI.\n");
@@ -631,6 +646,71 @@ static int __init efi_memmap_init(void)
 	return 0;
 }
 
+/*
+ * A number of config table entries get remapped to virtual addresses
+ * after entering EFI virtual mode. However, the kexec kernel requires
+ * their physical addresses therefore we pass them via setup_data and
+ * correct those entries to their respective physical addresses here.
+ *
+ * Currently only handles smbios which is necessary for some firmware
+ * implementation.
+ */
+static int __init efi_reuse_config(u64 tables, int nr_tables)
+{
+	int i, sz, ret = 0;
+	void *p, *tablep;
+	struct efi_setup_data *data;
+
+	if (!efi_setup)
+		return 0;
+
+	if (!efi_enabled(EFI_64BIT))
+		return 0;
+
+	data = early_memremap(efi_setup, sizeof(*data));
+	if (!data) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	if (!data->smbios)
+		goto out_memremap;
+
+	sz = sizeof(efi_config_table_64_t);
+
+	p = tablep = early_memremap(tables, nr_tables * sz);
+	if (!p) {
+		pr_err("Could not map Configuration table!\n");
+		ret = -ENOMEM;
+		goto out_memremap;
+	}
+
+	for (i = 0; i < efi.systab->nr_tables; i++) {
+		efi_guid_t guid;
+
+		guid = ((efi_config_table_64_t *)p)->guid;
+
+		if (!efi_guidcmp(guid, SMBIOS_TABLE_GUID))
+			((efi_config_table_64_t *)p)->table = data->smbios;
+		p += sz;
+	}
+	early_iounmap(tablep, nr_tables * sz);
+
+out_memremap:
+	early_iounmap(data, sizeof(*data));
+out:
+	return ret;
+}
+
+static void get_nr_runtime_map(void)
+{
+	if (!efi_setup)
+		return;
+
+	nr_efi_runtime_map = (efi_data_len - sizeof(struct efi_setup_data)) /
+			     sizeof(efi_memory_desc_t);
+}
+
 void __init efi_init(void)
 {
 	efi_char16_t *c16;
@@ -638,6 +718,7 @@ void __init efi_init(void)
 	int i = 0;
 	void *tmp;
 
+	get_nr_runtime_map();
 #ifdef CONFIG_X86_32
 	if (boot_params.efi_info.efi_systab_hi ||
 	    boot_params.efi_info.efi_memmap_hi) {
@@ -676,6 +757,9 @@ void __init efi_init(void)
 		efi.systab->hdr.revision >> 16,
 		efi.systab->hdr.revision & 0xffff, vendor);
 
+	if (efi_reuse_config(efi.systab->tables, efi.systab->nr_tables))
+		return;
+
 	if (efi_config_init(arch_tables))
 		return;
 
@@ -860,6 +944,23 @@ out:
 	return -ENOMEM;
 }
 
+/*
+ * Map efi regions which were passed via setup_data. The virt_addr is a fixed
+ * addr which was used in first kernel of a kexec boot.
+ */
+static void __init efi_map_regions_fixed(void)
+{
+	void *p;
+	efi_memory_desc_t *md;
+
+	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+		md = p;
+		efi_map_region_fixed(md); /* FIXME: add error handling */
+		get_systab_virt_addr(md);
+	}
+
+}
+
 /*
  * Map efi memory ranges for runtime serivce and update new_memmap with virtual
  * addresses.
@@ -914,6 +1015,10 @@ out:
  * so that we're in a different address space when calling a runtime
  * function. For function arguments passing we do copy the PGDs of the
  * kernel page table into ->trampoline_pgd prior to each call.
+ *
+ * Specially for kexec boot, efi runtime maps in previous kernel should
+ * be passed in via setup_data. In that case runtime ranges will be mapped
+ * to the same virtual addresses as the first kernel.
  */
 void __init efi_enter_virtual_mode(void)
 {
@@ -932,12 +1037,15 @@ void __init efi_enter_virtual_mode(void)
 		return;
 	}
 
-	efi_merge_regions();
-
-	new_memmap = efi_map_regions(&count);
-	if (!new_memmap) {
-		pr_err("Error reallocating memory, EFI runtime non-functional!\n");
-		return;
+	if (efi_setup) {
+		efi_map_regions_fixed();
+	} else {
+		efi_merge_regions();
+		new_memmap = efi_map_regions(&count);
+		if (!new_memmap) {
+			pr_err("Error reallocating memory, EFI runtime non-functional!\n");
+			return;
+		}
 	}
 
 	err = save_runtime_map();
@@ -949,16 +1057,18 @@ void __init efi_enter_virtual_mode(void)
 	efi_setup_page_tables();
 	efi_sync_low_kernel_mappings();
 
-	status = phys_efi_set_virtual_address_map(
-		memmap.desc_size * count,
-		memmap.desc_size,
-		memmap.desc_version,
-		(efi_memory_desc_t *)__pa(new_memmap));
-
-	if (status != EFI_SUCCESS) {
-		pr_alert("Unable to switch EFI into virtual mode "
-			 "(status=%lx)!\n", status);
-		panic("EFI call to SetVirtualAddressMap() failed!");
+	if (!efi_setup) {
+		status = phys_efi_set_virtual_address_map(
+			memmap.desc_size * count,
+			memmap.desc_size,
+			memmap.desc_version,
+			(efi_memory_desc_t *)__pa(new_memmap));
+
+		if (status != EFI_SUCCESS) {
+			pr_alert("Unable to switch EFI into virtual mode (status=%lx)!\n",
+				 status);
+			panic("EFI call to SetVirtualAddressMap() failed!");
+		}
 	}
 
 	/*
@@ -993,8 +1103,6 @@ void __init efi_enter_virtual_mode(void)
 			 EFI_VARIABLE_BOOTSERVICE_ACCESS |
 			 EFI_VARIABLE_RUNTIME_ACCESS,
 			 0, NULL);
-
-	return;
 }
 
 /*
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index 7b3ec6ed99af..249b183cf417 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -48,6 +48,7 @@ void __init efi_map_region(efi_memory_desc_t *md)
 }
 
 void __init efi_map_region_fixed(efi_memory_desc_t *md) {}
+void __init parse_efi_setup(u64 phys_addr, u32 data_len) {}
 
 void efi_call_phys_prelog(void)
 {
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index ff08cb19630b..324b65103851 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -228,3 +228,9 @@ void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size,
 
 	return (void __iomem *)__va(phys_addr);
 }
+
+void __init parse_efi_setup(u64 phys_addr, u32 data_len)
+{
+	efi_setup = phys_addr + sizeof(struct setup_data);
+	efi_data_len = data_len - sizeof(struct setup_data);
+}
-- 
cgit v1.2.3


From 456a29ddada79198c5965300e04103c40c481f62 Mon Sep 17 00:00:00 2001
From: Dave Young <dyoung@redhat.com>
Date: Fri, 20 Dec 2013 18:02:20 +0800
Subject: x86: Add xloadflags bit for EFI runtime support on kexec

Old kexec-tools can not load new kernels. The reason is kexec-tools does
not fill efi_info in x86 setup header previously, thus EFI failed to
initialize.  In new kexec-tools it will by default to fill efi_info and
pass other EFI required infomation to 2nd kernel so kexec kernel EFI
initialization can succeed finally.

To prevent from breaking userspace, add a new xloadflags bit so
kexec-tools can check the flag and switch to old logic.

Signed-off-by: Dave Young <dyoung@redhat.com>
Acked-by: Borislav Petkov <bp@suse.de>
Tested-by: Toshi Kani <toshi.kani@hp.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 Documentation/x86/boot.txt            | 3 +++
 arch/x86/boot/header.S                | 9 ++++++++-
 arch/x86/include/uapi/asm/bootparam.h | 1 +
 3 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt
index f4f268c2b826..cb81741d3b0b 100644
--- a/Documentation/x86/boot.txt
+++ b/Documentation/x86/boot.txt
@@ -608,6 +608,9 @@ Protocol:       2.12+
 	- If 1, the kernel supports the 64-bit EFI handoff entry point
           given at handover_offset + 0x200.
 
+  Bit 4 (read): XLF_EFI_KEXEC
+	- If 1, the kernel supports kexec EFI boot with EFI runtime support.
+
 Field name:	cmdline_size
 Type:		read
 Offset/size:	0x238/4
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 9ec06a1f6d61..ec3b8ba68096 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -391,7 +391,14 @@ xloadflags:
 #else
 # define XLF23 0
 #endif
-			.word XLF0 | XLF1 | XLF23
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_EFI) && defined(CONFIG_KEXEC)
+# define XLF4 XLF_EFI_KEXEC
+#else
+# define XLF4 0
+#endif
+
+			.word XLF0 | XLF1 | XLF23 | XLF4
 
 cmdline_size:   .long   COMMAND_LINE_SIZE-1     #length of the command line,
                                                 #added with boot protocol
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index 64fe421aab65..225b0988043a 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -24,6 +24,7 @@
 #define XLF_CAN_BE_LOADED_ABOVE_4G	(1<<1)
 #define XLF_EFI_HANDOVER_32		(1<<2)
 #define XLF_EFI_HANDOVER_64		(1<<3)
+#define XLF_EFI_KEXEC			(1<<4)
 
 #ifndef __ASSEMBLY__
 
-- 
cgit v1.2.3


From 5039e316dde3fb71c79e95e97c5bca8e4724d8f2 Mon Sep 17 00:00:00 2001
From: Dave Young <dyoung@redhat.com>
Date: Fri, 20 Dec 2013 18:02:21 +0800
Subject: x86: Export x86 boot_params to sysfs

kexec-tools use boot_params for getting the 1st kernel hardware_subarch,
the kexec kernel EFI runtime support also needs to read the old efi_info
from boot_params. Currently it exists in debugfs which is not a good
place for such infomation. Per HPA, we should avoid "sploit debugfs".

In this patch /sys/kernel/boot_params are exported, also the setup_data is
exported as a subdirectory. kexec-tools is using debugfs for hardware_subarch
for a long time now so we're not removing it yet.

Structure is like below:

/sys/kernel/boot_params
|__ data                /* boot_params in binary*/
|__ setup_data
|   |__ 0               /* the first setup_data node */
|   |   |__ data        /* setup_data node 0 in binary*/
|   |   |__ type        /* setup_data type of setup_data node 0, hex string */
[snip]
|__ version             /* boot protocal version (in hex, "0x" prefixed)*/

Signed-off-by: Dave Young <dyoung@redhat.com>
Acked-by: Borislav Petkov <bp@suse.de>
Tested-by: Toshi Kani <toshi.kani@hp.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 Documentation/ABI/testing/sysfs-kernel-boot_params |  38 +++
 arch/x86/kernel/Makefile                           |   1 +
 arch/x86/kernel/ksysfs.c                           | 339 +++++++++++++++++++++
 3 files changed, 378 insertions(+)
 create mode 100644 Documentation/ABI/testing/sysfs-kernel-boot_params
 create mode 100644 arch/x86/kernel/ksysfs.c

(limited to 'arch')

diff --git a/Documentation/ABI/testing/sysfs-kernel-boot_params b/Documentation/ABI/testing/sysfs-kernel-boot_params
new file mode 100644
index 000000000000..eca38ce2852d
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-boot_params
@@ -0,0 +1,38 @@
+What:		/sys/kernel/boot_params
+Date:		December 2013
+Contact:	Dave Young <dyoung@redhat.com>
+Description:	The /sys/kernel/boot_params directory contains two
+		files: "data" and "version" and one subdirectory "setup_data".
+		It is used to export the kernel boot parameters of an x86
+		platform to userspace for kexec and debugging purpose.
+
+		If there's no setup_data in boot_params the subdirectory will
+		not be created.
+
+		"data" file is the binary representation of struct boot_params.
+
+		"version" file is the string representation of boot
+		protocol version.
+
+		"setup_data" subdirectory contains the setup_data data
+		structure in boot_params. setup_data is maintained in kernel
+		as a link list. In "setup_data" subdirectory there's one
+		subdirectory for each link list node named with the number
+		of the list nodes. The list node subdirectory contains two
+		files "type" and "data". "type" file is the string
+		representation of setup_data type. "data" file is the binary
+		representation of setup_data payload.
+
+		The whole boot_params directory structure is like below:
+		/sys/kernel/boot_params
+		|__ data
+		|__ setup_data
+		|   |__ 0
+		|   |   |__ data
+		|   |   |__ type
+		|   |__ 1
+		|       |__ data
+		|       |__ type
+		|__ version
+
+Users:		Kexec
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 9b0a34e2cd79..510cca5c5390 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -29,6 +29,7 @@ obj-$(CONFIG_X86_64)	+= sys_x86_64.o x8664_ksyms_64.o
 obj-y			+= syscall_$(BITS).o
 obj-$(CONFIG_X86_64)	+= vsyscall_64.o
 obj-$(CONFIG_X86_64)	+= vsyscall_emu_64.o
+obj-$(CONFIG_SYSFS)	+= ksysfs.o
 obj-y			+= bootflag.o e820.o
 obj-y			+= pci-dma.o quirks.o topology.o kdebugfs.o
 obj-y			+= alternative.o i8253.o pci-nommu.o hw_breakpoint.o
diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c
new file mode 100644
index 000000000000..eb53d153f307
--- /dev/null
+++ b/arch/x86/kernel/ksysfs.c
@@ -0,0 +1,339 @@
+/*
+ * Architecture specific sysfs attributes in /sys/kernel
+ *
+ * Copyright (C) 2007, Intel Corp.
+ *      Huang Ying <ying.huang@intel.com>
+ * Copyright (C) 2013, 2013 Red Hat, Inc.
+ *      Dave Young <dyoung@redhat.com>
+ *
+ * This file is released under the GPLv2
+ */
+
+#include <linux/kobject.h>
+#include <linux/string.h>
+#include <linux/sysfs.h>
+#include <linux/init.h>
+#include <linux/stat.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+
+#include <asm/setup.h>
+
+static ssize_t version_show(struct kobject *kobj,
+			    struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "0x%04x\n", boot_params.hdr.version);
+}
+
+static struct kobj_attribute boot_params_version_attr = __ATTR_RO(version);
+
+static ssize_t boot_params_data_read(struct file *fp, struct kobject *kobj,
+				     struct bin_attribute *bin_attr,
+				     char *buf, loff_t off, size_t count)
+{
+	memcpy(buf, (void *)&boot_params + off, count);
+	return count;
+}
+
+static struct bin_attribute boot_params_data_attr = {
+	.attr = {
+		.name = "data",
+		.mode = S_IRUGO,
+	},
+	.read = boot_params_data_read,
+	.size = sizeof(boot_params),
+};
+
+static struct attribute *boot_params_version_attrs[] = {
+	&boot_params_version_attr.attr,
+	NULL,
+};
+
+static struct bin_attribute *boot_params_data_attrs[] = {
+	&boot_params_data_attr,
+	NULL,
+};
+
+static struct attribute_group boot_params_attr_group = {
+	.attrs = boot_params_version_attrs,
+	.bin_attrs = boot_params_data_attrs,
+};
+
+static int kobj_to_setup_data_nr(struct kobject *kobj, int *nr)
+{
+	const char *name;
+
+	name = kobject_name(kobj);
+	return kstrtoint(name, 10, nr);
+}
+
+static int get_setup_data_paddr(int nr, u64 *paddr)
+{
+	int i = 0;
+	struct setup_data *data;
+	u64 pa_data = boot_params.hdr.setup_data;
+
+	while (pa_data) {
+		if (nr == i) {
+			*paddr = pa_data;
+			return 0;
+		}
+		data = ioremap_cache(pa_data, sizeof(*data));
+		if (!data)
+			return -ENOMEM;
+
+		pa_data = data->next;
+		iounmap(data);
+		i++;
+	}
+	return -EINVAL;
+}
+
+static int __init get_setup_data_size(int nr, size_t *size)
+{
+	int i = 0;
+	struct setup_data *data;
+	u64 pa_data = boot_params.hdr.setup_data;
+
+	while (pa_data) {
+		data = ioremap_cache(pa_data, sizeof(*data));
+		if (!data)
+			return -ENOMEM;
+		if (nr == i) {
+			*size = data->len;
+			iounmap(data);
+			return 0;
+		}
+
+		pa_data = data->next;
+		iounmap(data);
+		i++;
+	}
+	return -EINVAL;
+}
+
+static ssize_t type_show(struct kobject *kobj,
+			 struct kobj_attribute *attr, char *buf)
+{
+	int nr, ret;
+	u64 paddr;
+	struct setup_data *data;
+
+	ret = kobj_to_setup_data_nr(kobj, &nr);
+	if (ret)
+		return ret;
+
+	ret = get_setup_data_paddr(nr, &paddr);
+	if (ret)
+		return ret;
+	data = ioremap_cache(paddr, sizeof(*data));
+	if (!data)
+		return -ENOMEM;
+
+	ret = sprintf(buf, "0x%x\n", data->type);
+	iounmap(data);
+	return ret;
+}
+
+static ssize_t setup_data_data_read(struct file *fp,
+				    struct kobject *kobj,
+				    struct bin_attribute *bin_attr,
+				    char *buf,
+				    loff_t off, size_t count)
+{
+	int nr, ret = 0;
+	u64 paddr;
+	struct setup_data *data;
+	void *p;
+
+	ret = kobj_to_setup_data_nr(kobj, &nr);
+	if (ret)
+		return ret;
+
+	ret = get_setup_data_paddr(nr, &paddr);
+	if (ret)
+		return ret;
+	data = ioremap_cache(paddr, sizeof(*data));
+	if (!data)
+		return -ENOMEM;
+
+	if (off > data->len) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (count > data->len - off)
+		count = data->len - off;
+
+	if (!count)
+		goto out;
+
+	ret = count;
+	p = ioremap_cache(paddr + sizeof(*data), data->len);
+	if (!p) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	memcpy(buf, p + off, count);
+	iounmap(p);
+out:
+	iounmap(data);
+	return ret;
+}
+
+static struct kobj_attribute type_attr = __ATTR_RO(type);
+
+static struct bin_attribute data_attr = {
+	.attr = {
+		.name = "data",
+		.mode = S_IRUGO,
+	},
+	.read = setup_data_data_read,
+};
+
+static struct attribute *setup_data_type_attrs[] = {
+	&type_attr.attr,
+	NULL,
+};
+
+static struct bin_attribute *setup_data_data_attrs[] = {
+	&data_attr,
+	NULL,
+};
+
+static struct attribute_group setup_data_attr_group = {
+	.attrs = setup_data_type_attrs,
+	.bin_attrs = setup_data_data_attrs,
+};
+
+static int __init create_setup_data_node(struct kobject *parent,
+					 struct kobject **kobjp, int nr)
+{
+	int ret = 0;
+	size_t size;
+	struct kobject *kobj;
+	char name[16]; /* should be enough for setup_data nodes numbers */
+	snprintf(name, 16, "%d", nr);
+
+	kobj = kobject_create_and_add(name, parent);
+	if (!kobj)
+		return -ENOMEM;
+
+	ret = get_setup_data_size(nr, &size);
+	if (ret)
+		goto out_kobj;
+
+	data_attr.size = size;
+	ret = sysfs_create_group(kobj, &setup_data_attr_group);
+	if (ret)
+		goto out_kobj;
+	*kobjp = kobj;
+
+	return 0;
+out_kobj:
+	kobject_put(kobj);
+	return ret;
+}
+
+static void __init cleanup_setup_data_node(struct kobject *kobj)
+{
+	sysfs_remove_group(kobj, &setup_data_attr_group);
+	kobject_put(kobj);
+}
+
+static int __init get_setup_data_total_num(u64 pa_data, int *nr)
+{
+	int ret = 0;
+	struct setup_data *data;
+
+	*nr = 0;
+	while (pa_data) {
+		*nr += 1;
+		data = ioremap_cache(pa_data, sizeof(*data));
+		if (!data) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		pa_data = data->next;
+		iounmap(data);
+	}
+
+out:
+	return ret;
+}
+
+static int __init create_setup_data_nodes(struct kobject *parent)
+{
+	struct kobject *setup_data_kobj, **kobjp;
+	u64 pa_data;
+	int i, j, nr, ret = 0;
+
+	pa_data = boot_params.hdr.setup_data;
+	if (!pa_data)
+		return 0;
+
+	setup_data_kobj = kobject_create_and_add("setup_data", parent);
+	if (!setup_data_kobj) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = get_setup_data_total_num(pa_data, &nr);
+	if (ret)
+		goto out_setup_data_kobj;
+
+	kobjp = kmalloc(sizeof(*kobjp) * nr, GFP_KERNEL);
+	if (!kobjp) {
+		ret = -ENOMEM;
+		goto out_setup_data_kobj;
+	}
+
+	for (i = 0; i < nr; i++) {
+		ret = create_setup_data_node(setup_data_kobj, kobjp + i, i);
+		if (ret)
+			goto out_clean_nodes;
+	}
+
+	kfree(kobjp);
+	return 0;
+
+out_clean_nodes:
+	for (j = i - 1; j > 0; j--)
+		cleanup_setup_data_node(*(kobjp + j));
+	kfree(kobjp);
+out_setup_data_kobj:
+	kobject_put(setup_data_kobj);
+out:
+	return ret;
+}
+
+static int __init boot_params_ksysfs_init(void)
+{
+	int ret;
+	struct kobject *boot_params_kobj;
+
+	boot_params_kobj = kobject_create_and_add("boot_params",
+						  kernel_kobj);
+	if (!boot_params_kobj) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = sysfs_create_group(boot_params_kobj, &boot_params_attr_group);
+	if (ret)
+		goto out_boot_params_kobj;
+
+	ret = create_setup_data_nodes(boot_params_kobj);
+	if (ret)
+		goto out_create_group;
+
+	return 0;
+out_create_group:
+	sysfs_remove_group(boot_params_kobj, &boot_params_attr_group);
+out_boot_params_kobj:
+	kobject_put(boot_params_kobj);
+out:
+	return ret;
+}
+
+arch_initcall(boot_params_ksysfs_init);
-- 
cgit v1.2.3


From 77ea8c948953a90401e436e7c05973b2d5529804 Mon Sep 17 00:00:00 2001
From: Dave Young <dyoung@redhat.com>
Date: Fri, 20 Dec 2013 18:02:22 +0800
Subject: x86: Reserve setup_data ranges late after parsing memmap cmdline

Currently e820_reserve_setup_data() is called before parsing early
params, it works in normal case. But for memmap=exactmap, the final
memory ranges are created after parsing memmap= cmdline params, so the
previous e820_reserve_setup_data() has no effect. For example,
setup_data ranges will still be marked as normal system ram, thus when
later sysfs driver ioremap them kernel will warn about mapping normal
ram.

This patch fix it by moving the e820_reserve_setup_data() callback after
parsing early params so they can be set as reserved ranges and later
ioremap will be fine with it.

Signed-off-by: Dave Young <dyoung@redhat.com>
Acked-by: Borislav Petkov <bp@suse.de>
Tested-by: Toshi Kani <toshi.kani@hp.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/x86/kernel/setup.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 24536f7a0ae6..be4b456e444b 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -927,8 +927,6 @@ void __init setup_arch(char **cmdline_p)
 	iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1;
 	setup_memory_map();
 	parse_setup_data();
-	/* update the e820_saved too */
-	e820_reserve_setup_data();
 
 	copy_edd();
 
@@ -990,6 +988,8 @@ void __init setup_arch(char **cmdline_p)
 		early_dump_pci_devices();
 #endif
 
+	/* update the e820_saved too */
+	e820_reserve_setup_data();
 	finish_e820_parsing();
 
 	if (efi_enabled(EFI_BOOT))
-- 
cgit v1.2.3


From 518548abd61808ea1e31614ccbdae34d3c32dfa4 Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt.fleming@intel.com>
Date: Sat, 21 Dec 2013 16:09:46 +0000
Subject: x86/efi: Delete superfluous global variables

There's no need to save the runtime map details in global variables, the
values are only required to pass to efi_runtime_map_setup().

And because 'nr_efi_runtime_map' isn't needed, get_nr_runtime_map() can
be deleted along with 'efi_data_len'.

Cc: Dave Young <dyoung@redhat.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/x86/platform/efi/efi.c    | 18 +-----------------
 arch/x86/platform/efi/efi_64.c |  1 -
 2 files changed, 1 insertion(+), 18 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 9965ff403c6e..7ed3ecfde98a 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -76,10 +76,7 @@ static __initdata efi_config_table_type_t arch_tables[] = {
 	{NULL_GUID, NULL, NULL},
 };
 
-static void *efi_runtime_map;
-static int nr_efi_runtime_map;
 u64 efi_setup;		/* efi setup_data physical address */
-u32 efi_data_len;	/* efi setup_data payload length */
 
 /*
  * Returns 1 if 'facility' is enabled, 0 otherwise.
@@ -702,15 +699,6 @@ out:
 	return ret;
 }
 
-static void get_nr_runtime_map(void)
-{
-	if (!efi_setup)
-		return;
-
-	nr_efi_runtime_map = (efi_data_len - sizeof(struct efi_setup_data)) /
-			     sizeof(efi_memory_desc_t);
-}
-
 void __init efi_init(void)
 {
 	efi_char16_t *c16;
@@ -718,7 +706,6 @@ void __init efi_init(void)
 	int i = 0;
 	void *tmp;
 
-	get_nr_runtime_map();
 #ifdef CONFIG_X86_32
 	if (boot_params.efi_info.efi_systab_hi ||
 	    boot_params.efi_info.efi_memmap_hi) {
@@ -933,10 +920,7 @@ static int __init save_runtime_map(void)
 		count++;
 	}
 
-	efi_runtime_map = q;
-	nr_efi_runtime_map = count;
-	efi_runtime_map_setup(efi_runtime_map, nr_efi_runtime_map,
-			      boot_params.efi_info.efi_memdesc_size);
+	efi_runtime_map_setup(q, count, memmap.desc_size);
 
 	return 0;
 out:
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 324b65103851..6284f158a47d 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -232,5 +232,4 @@ void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size,
 void __init parse_efi_setup(u64 phys_addr, u32 data_len)
 {
 	efi_setup = phys_addr + sizeof(struct setup_data);
-	efi_data_len = data_len - sizeof(struct setup_data);
 }
-- 
cgit v1.2.3


From 90ff5d688e61f49f23545ffab6228bd7e87e6dc7 Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Mon, 16 Dec 2013 15:12:43 +1100
Subject: powerpc: Fix bad stack check in exception entry

In EXCEPTION_PROLOG_COMMON() we check to see if the stack pointer (r1)
is valid when coming from the kernel.  If it's not valid, we die but
with a nice oops message.

Currently we allocate a stack frame (subtract INT_FRAME_SIZE) before we
check to see if the stack pointer is negative.  Unfortunately, this
won't detect a bad stack where r1 is less than INT_FRAME_SIZE.

This patch fixes the check to compare the modified r1 with
-INT_FRAME_SIZE.  With this, bad kernel stack pointers (including NULL
pointers) are correctly detected again.

Kudos to Paulus for finding this.

Signed-off-by: Michael Neuling <mikey@neuling.org>
cc: stable@vger.kernel.org
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/exception-64s.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 894662a5d4d5..243ce69ad685 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -284,7 +284,7 @@ do_kvm_##n:								\
 	subi	r1,r1,INT_FRAME_SIZE;	/* alloc frame on kernel stack	*/ \
 	beq-	1f;							   \
 	ld	r1,PACAKSAVE(r13);	/* kernel stack to use		*/ \
-1:	cmpdi	cr1,r1,0;		/* check if r1 is in userspace	*/ \
+1:	cmpdi	cr1,r1,-INT_FRAME_SIZE;	/* check if r1 is in userspace	*/ \
 	blt+	cr1,3f;			/* abort if it is		*/ \
 	li	r1,(n);			/* will be reloaded later	*/ \
 	sth	r1,PACA_TRAP_SAVE(r13);					   \
-- 
cgit v1.2.3


From e8a00ad5e238421ded856ea39f692b94c2d324eb Mon Sep 17 00:00:00 2001
From: Rajesh B Prathipati <rprathip@linux.vnet.ibm.com>
Date: Mon, 16 Dec 2013 18:58:22 +1100
Subject: powerpc: Make unaligned accesses endian-safe for powerpc

The generic put_unaligned/get_unaligned macros were made endian-safe by
calling the appropriate endian dependent macros based on the endian type
of the powerpc processor.

Signed-off-by: Rajesh B Prathipati <rprathip@linux.vnet.ibm.com>
Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/unaligned.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/unaligned.h b/arch/powerpc/include/asm/unaligned.h
index 5f1b1e3c2137..8296381ae432 100644
--- a/arch/powerpc/include/asm/unaligned.h
+++ b/arch/powerpc/include/asm/unaligned.h
@@ -4,13 +4,18 @@
 #ifdef __KERNEL__
 
 /*
- * The PowerPC can do unaligned accesses itself in big endian mode.
+ * The PowerPC can do unaligned accesses itself based on its endian mode.
  */
 #include <linux/unaligned/access_ok.h>
 #include <linux/unaligned/generic.h>
 
+#ifdef __LITTLE_ENDIAN__
+#define get_unaligned	__get_unaligned_le
+#define put_unaligned	__put_unaligned_le
+#else
 #define get_unaligned	__get_unaligned_be
 #define put_unaligned	__put_unaligned_be
+#endif
 
 #endif	/* __KERNEL__ */
 #endif	/* _ASM_POWERPC_UNALIGNED_H */
-- 
cgit v1.2.3


From 20151169f1de4b170368fdb574024027620d0d49 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 18 Dec 2013 09:29:57 +1100
Subject: powerpc: Make 64-bit non-VMX __copy_tofrom_user bi-endian

The powerpc 64-bit __copy_tofrom_user() function uses shifts to handle
unaligned invocations.  However, these shifts were designed for
big-endian systems: On little-endian systems, they must shift in the
opposite direction.

This commit relies on the C preprocessor to insert the correct shifts
into the assembly code.

[ This is a rare but nasty LE issue. Most of the time we use the POWER7
optimised __copy_tofrom_user_power7 loop, but when it hits an exception
we fall back to the base __copy_tofrom_user loop. - Anton ]

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/lib/copyuser_64.S | 53 ++++++++++++++++++++++++++++++------------
 1 file changed, 38 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S
index d73a59014900..596a285c0755 100644
--- a/arch/powerpc/lib/copyuser_64.S
+++ b/arch/powerpc/lib/copyuser_64.S
@@ -9,6 +9,14 @@
 #include <asm/processor.h>
 #include <asm/ppc_asm.h>
 
+#ifdef __BIG_ENDIAN__
+#define sLd sld		/* Shift towards low-numbered address. */
+#define sHd srd		/* Shift towards high-numbered address. */
+#else
+#define sLd srd		/* Shift towards low-numbered address. */
+#define sHd sld		/* Shift towards high-numbered address. */
+#endif
+
 	.align	7
 _GLOBAL(__copy_tofrom_user)
 BEGIN_FTR_SECTION
@@ -118,10 +126,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
 
 24:	ld	r9,0(r4)	/* 3+2n loads, 2+2n stores */
 25:	ld	r0,8(r4)
-	sld	r6,r9,r10
+	sLd	r6,r9,r10
 26:	ldu	r9,16(r4)
-	srd	r7,r0,r11
-	sld	r8,r0,r10
+	sHd	r7,r0,r11
+	sLd	r8,r0,r10
 	or	r7,r7,r6
 	blt	cr6,79f
 27:	ld	r0,8(r4)
@@ -129,35 +137,35 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
 
 28:	ld	r0,0(r4)	/* 4+2n loads, 3+2n stores */
 29:	ldu	r9,8(r4)
-	sld	r8,r0,r10
+	sLd	r8,r0,r10
 	addi	r3,r3,-8
 	blt	cr6,5f
 30:	ld	r0,8(r4)
-	srd	r12,r9,r11
-	sld	r6,r9,r10
+	sHd	r12,r9,r11
+	sLd	r6,r9,r10
 31:	ldu	r9,16(r4)
 	or	r12,r8,r12
-	srd	r7,r0,r11
-	sld	r8,r0,r10
+	sHd	r7,r0,r11
+	sLd	r8,r0,r10
 	addi	r3,r3,16
 	beq	cr6,78f
 
 1:	or	r7,r7,r6
 32:	ld	r0,8(r4)
 76:	std	r12,8(r3)
-2:	srd	r12,r9,r11
-	sld	r6,r9,r10
+2:	sHd	r12,r9,r11
+	sLd	r6,r9,r10
 33:	ldu	r9,16(r4)
 	or	r12,r8,r12
 77:	stdu	r7,16(r3)
-	srd	r7,r0,r11
-	sld	r8,r0,r10
+	sHd	r7,r0,r11
+	sLd	r8,r0,r10
 	bdnz	1b
 
 78:	std	r12,8(r3)
 	or	r7,r7,r6
 79:	std	r7,16(r3)
-5:	srd	r12,r9,r11
+5:	sHd	r12,r9,r11
 	or	r12,r8,r12
 80:	std	r12,24(r3)
 	bne	6f
@@ -165,23 +173,38 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
 	blr
 6:	cmpwi	cr1,r5,8
 	addi	r3,r3,32
-	sld	r9,r9,r10
+	sLd	r9,r9,r10
 	ble	cr1,7f
 34:	ld	r0,8(r4)
-	srd	r7,r0,r11
+	sHd	r7,r0,r11
 	or	r9,r7,r9
 7:
 	bf	cr7*4+1,1f
+#ifdef __BIG_ENDIAN__
 	rotldi	r9,r9,32
+#endif
 94:	stw	r9,0(r3)
+#ifdef __LITTLE_ENDIAN__
+	rotrdi	r9,r9,32
+#endif
 	addi	r3,r3,4
 1:	bf	cr7*4+2,2f
+#ifdef __BIG_ENDIAN__
 	rotldi	r9,r9,16
+#endif
 95:	sth	r9,0(r3)
+#ifdef __LITTLE_ENDIAN__
+	rotrdi	r9,r9,16
+#endif
 	addi	r3,r3,2
 2:	bf	cr7*4+3,3f
+#ifdef __BIG_ENDIAN__
 	rotldi	r9,r9,8
+#endif
 96:	stb	r9,0(r3)
+#ifdef __LITTLE_ENDIAN__
+	rotrdi	r9,r9,8
+#endif
 3:	li	r3,0
 	blr
 
-- 
cgit v1.2.3


From 20acebdfaec3e15d8a27ab25bf4a2f91b2afa757 Mon Sep 17 00:00:00 2001
From: Brian W Hart <hartb@linux.vnet.ibm.com>
Date: Thu, 19 Dec 2013 17:14:07 -0600
Subject: powernv/eeh: Fix possible buffer overrun in ioda_eeh_phb_diag()

PHB diagnostic buffer may be smaller than PAGE_SIZE, especially when
PAGE_SIZE > 4KB.

Signed-off-by: Brian W Hart <hartb@linux.vnet.ibm.com>
Acked-by: Gavin Shan <shangw@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/powernv/eeh-ioda.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
index 02245cee7818..8184ef5ccb1a 100644
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -820,14 +820,15 @@ static void ioda_eeh_phb_diag(struct pci_controller *hose)
 	struct OpalIoPhbErrorCommon *common;
 	long rc;
 
-	common = (struct OpalIoPhbErrorCommon *)phb->diag.blob;
-	rc = opal_pci_get_phb_diag_data2(phb->opal_id, common, PAGE_SIZE);
+	rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob,
+					 PNV_PCI_DIAG_BUF_SIZE);
 	if (rc != OPAL_SUCCESS) {
 		pr_warning("%s: Failed to get diag-data for PHB#%x (%ld)\n",
 			    __func__, hose->global_number, rc);
 		return;
 	}
 
+	common = (struct OpalIoPhbErrorCommon *)phb->diag.blob;
 	switch (common->ioType) {
 	case OPAL_PHB_ERROR_DATA_TYPE_P7IOC:
 		ioda_eeh_p7ioc_phb_diag(hose, common);
-- 
cgit v1.2.3


From ca1de5deb782e1636ed5b898e215a8840ae39230 Mon Sep 17 00:00:00 2001
From: Brian W Hart <hartb@linux.vnet.ibm.com>
Date: Fri, 20 Dec 2013 13:06:01 -0600
Subject: powernv/eeh: Add buffer for P7IOC hub error data

Prevent ioda_eeh_hub_diag() from clobbering itself when called by supplying
a per-PHB buffer for P7IOC hub diagnostic data.  Take care to inform OPAL of
the correct size for the buffer.

[Small style change to the use of sizeof -- BenH]

Signed-off-by: Brian W Hart <hartb@linux.vnet.ibm.com>
Acked-by: Gavin Shan <shangw@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/powernv/eeh-ioda.c | 15 ++-------------
 arch/powerpc/platforms/powernv/pci.h      |  4 +++-
 2 files changed, 5 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
index 8184ef5ccb1a..d7ddcee7feb8 100644
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -36,7 +36,6 @@
 #include "powernv.h"
 #include "pci.h"
 
-static char *hub_diag = NULL;
 static int ioda_eeh_nb_init = 0;
 
 static int ioda_eeh_event(struct notifier_block *nb,
@@ -140,15 +139,6 @@ static int ioda_eeh_post_init(struct pci_controller *hose)
 		ioda_eeh_nb_init = 1;
 	}
 
-	/* We needn't HUB diag-data on PHB3 */
-	if (phb->type == PNV_PHB_IODA1 && !hub_diag) {
-		hub_diag = (char *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
-		if (!hub_diag) {
-			pr_err("%s: Out of memory !\n", __func__);
-			return -ENOMEM;
-		}
-	}
-
 #ifdef CONFIG_DEBUG_FS
 	if (phb->dbgfs) {
 		debugfs_create_file("err_injct_outbound", 0600,
@@ -633,11 +623,10 @@ static void ioda_eeh_hub_diag_common(struct OpalIoP7IOCErrorData *data)
 static void ioda_eeh_hub_diag(struct pci_controller *hose)
 {
 	struct pnv_phb *phb = hose->private_data;
-	struct OpalIoP7IOCErrorData *data;
+	struct OpalIoP7IOCErrorData *data = &phb->diag.hub_diag;
 	long rc;
 
-	data = (struct OpalIoP7IOCErrorData *)ioda_eeh_hub_diag;
-	rc = opal_pci_get_hub_diag_data(phb->hub_id, data, PAGE_SIZE);
+	rc = opal_pci_get_hub_diag_data(phb->hub_id, data, sizeof(*data));
 	if (rc != OPAL_SUCCESS) {
 		pr_warning("%s: Failed to get HUB#%llx diag-data (%ld)\n",
 			   __func__, phb->hub_id, rc);
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 911c24ef033e..1ed8d5f40f5a 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -172,11 +172,13 @@ struct pnv_phb {
 		} ioda;
 	};
 
-	/* PHB status structure */
+	/* PHB and hub status structure */
 	union {
 		unsigned char			blob[PNV_PCI_DIAG_BUF_SIZE];
 		struct OpalIoP7IOCPhbErrorData	p7ioc;
+		struct OpalIoP7IOCErrorData 	hub_diag;
 	} diag;
+
 };
 
 extern struct pci_ops pnv_pci_ops;
-- 
cgit v1.2.3


From 286e4f90a72c0b0621dde0294af6ed4b0baddabb Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Mon, 23 Dec 2013 12:19:51 +1100
Subject: powerpc: Align p_end

p_end is an 8 byte value embedded in the text section. This means it
is only 4 byte aligned when it should be 8 byte aligned. Fix this
by adding an explicit alignment.

This fixes an issue where POWER7 little endian builds with
CONFIG_RELOCATABLE=y fail to boot.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/head_64.S | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 2ae41aba4053..fad2abdcbdfe 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -470,6 +470,7 @@ _STATIC(__after_prom_start)
 	mtctr	r8
 	bctr
 
+.balign 8
 p_end:	.llong	_end - _stext
 
 4:	/* Now copy the rest of the kernel up to _end */
-- 
cgit v1.2.3


From 7d4151b5098fb0bf7f6f8d1156e1ab9d83260580 Mon Sep 17 00:00:00 2001
From: Olof Johansson <olof@lixom.net>
Date: Sat, 28 Dec 2013 13:01:47 -0800
Subject: powerpc: Fix alignment of secondary cpu spin vars

Commit 5c0484e25ec0 ('powerpc: Endian safe trampoline') resulted in
losing proper alignment of the spinlock variables used when booting
secondary CPUs, causing some quite odd issues with failing to boot on
PA Semi-based systems.

This showed itself on ppc64_defconfig, but not on pasemi_defconfig,
so it had gone unnoticed when I initially tested the LE patch set.

Fix is to add explicit alignment instead of relying on good luck. :)

[ It appears that there is a different issue with PA Semi systems
  however this fix is definitely correct so applying anyway -- BenH
]

Fixes: 5c0484e25ec0 ('powerpc: Endian safe trampoline')
Reported-by: Christian Zigotzky <chzigotzky@xenosoft.de>
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=67811
Signed-off-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/head_64.S | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index fad2abdcbdfe..4f0946de2d5c 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -80,6 +80,7 @@ END_FTR_SECTION(0, 1)
 	 * of the function that the cpu should jump to to continue
 	 * initialization.
 	 */
+	.balign 8
 	.globl  __secondary_hold_spinloop
 __secondary_hold_spinloop:
 	.llong	0x0
-- 
cgit v1.2.3


From fcf2f402937a6696f6fa2a1aa882c5075e5fac34 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Wed, 18 Dec 2013 16:46:02 +0100
Subject: s390/pci: obtain function handle in hotplug notifier

When using the CLP interface to enable or disable a pci device a
valid function handle needs to be delivered. So far our assumption
was that we always have an up-to-date version of the function handle
(since it doesn't change when the device is in use). This assumption
is incorrect if the pci device is enabled or disabled outside of our
control. When we are notified about such a change we already receive
the new function handle. Just use it.

Reviewed-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/pci/pci_event.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index 800f064b0da7..069607209a30 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -75,6 +75,7 @@ void zpci_event_availability(void *data)
 		if (!zdev || zdev->state == ZPCI_FN_STATE_CONFIGURED)
 			break;
 		zdev->state = ZPCI_FN_STATE_CONFIGURED;
+		zdev->fh = ccdf->fh;
 		ret = zpci_enable_device(zdev);
 		if (ret)
 			break;
@@ -101,6 +102,7 @@ void zpci_event_availability(void *data)
 		if (pdev)
 			pci_stop_and_remove_bus_device(pdev);
 
+		zdev->fh = ccdf->fh;
 		zpci_disable_device(zdev);
 		zdev->state = ZPCI_FN_STATE_STANDBY;
 		break;
-- 
cgit v1.2.3


From 8777539479abd7b3efeb691685415dc2b057d0e0 Mon Sep 17 00:00:00 2001
From: Abhilash Kesavan <a.kesavan@samsung.com>
Date: Thu, 12 Dec 2013 08:32:02 +0530
Subject: ARM: dts: exynos5250: Fix MDMA0 clock number

Due to incorrect clock specified in MDMA0 node, using MDMA0 controller
could cause system failures, due to wrong clock being controlled. This
patch fixes this by specifying correct clock.

Signed-off-by: Abhilash Kesavan <a.kesavan@samsung.com>
Acked-by: Mike Turquette <mturquette@linaro.org>
[t.figa: Corrected commit message and description.]
Signed-off-by: Tomasz Figa <t.figa@samsung.com>
---
 arch/arm/boot/dts/exynos5250.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/exynos5250.dtsi b/arch/arm/boot/dts/exynos5250.dtsi
index 9db5047812f3..177becde7a26 100644
--- a/arch/arm/boot/dts/exynos5250.dtsi
+++ b/arch/arm/boot/dts/exynos5250.dtsi
@@ -559,7 +559,7 @@
 			compatible = "arm,pl330", "arm,primecell";
 			reg = <0x10800000 0x1000>;
 			interrupts = <0 33 0>;
-			clocks = <&clock 271>;
+			clocks = <&clock 346>;
 			clock-names = "apb_pclk";
 			#dma-cells = <1>;
 			#dma-channels = <8>;
-- 
cgit v1.2.3


From 9da4a8d9197e42d532f2e2af072d8219105139b2 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Mon, 9 Dec 2013 11:38:08 +0100
Subject: m68k/defconfig: Disable /sbin/hotplug fork-bomb by default

Cfr. commit 7934779a69f1184f29d786b89e77dd14519bd226 ("Driver-Core: disable
/sbin/hotplug by default").

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/configs/amiga_defconfig    | 1 -
 arch/m68k/configs/apollo_defconfig   | 1 -
 arch/m68k/configs/atari_defconfig    | 1 -
 arch/m68k/configs/bvme6000_defconfig | 1 -
 arch/m68k/configs/hp300_defconfig    | 1 -
 arch/m68k/configs/mac_defconfig      | 1 -
 arch/m68k/configs/multi_defconfig    | 1 -
 arch/m68k/configs/mvme147_defconfig  | 1 -
 arch/m68k/configs/mvme16x_defconfig  | 1 -
 arch/m68k/configs/q40_defconfig      | 1 -
 arch/m68k/configs/sun3_defconfig     | 1 -
 arch/m68k/configs/sun3x_defconfig    | 1 -
 12 files changed, 12 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index 1f87c0789b12..1b9531750826 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -198,7 +198,6 @@ CONFIG_ATALK=m
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
 # CONFIG_WIRELESS is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_DEVTMPFS=y
 # CONFIG_FIRMWARE_IN_KERNEL is not set
 # CONFIG_FW_LOADER_USER_HELPER is not set
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index ea5b64794364..bc3b9aef51f4 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -196,7 +196,6 @@ CONFIG_ATALK=m
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
 # CONFIG_WIRELESS is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_DEVTMPFS=y
 # CONFIG_FIRMWARE_IN_KERNEL is not set
 # CONFIG_FW_LOADER_USER_HELPER is not set
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index 0c3d280187f2..264c84447075 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -195,7 +195,6 @@ CONFIG_ATALK=m
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
 # CONFIG_WIRELESS is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_DEVTMPFS=y
 # CONFIG_FIRMWARE_IN_KERNEL is not set
 # CONFIG_FW_LOADER_USER_HELPER is not set
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index 9b6b515c5a23..2462c9325b1f 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -194,7 +194,6 @@ CONFIG_ATALK=m
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
 # CONFIG_WIRELESS is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_DEVTMPFS=y
 # CONFIG_FIRMWARE_IN_KERNEL is not set
 # CONFIG_FW_LOADER_USER_HELPER is not set
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index efffa687135f..9e6d58204fbc 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -196,7 +196,6 @@ CONFIG_ATALK=m
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
 # CONFIG_WIRELESS is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_DEVTMPFS=y
 # CONFIG_FIRMWARE_IN_KERNEL is not set
 # CONFIG_FW_LOADER_USER_HELPER is not set
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index 73a8565746dc..7f6be05090b9 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -199,7 +199,6 @@ CONFIG_IPDDP_DECAP=y
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
 # CONFIG_WIRELESS is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_DEVTMPFS=y
 # CONFIG_FIRMWARE_IN_KERNEL is not set
 # CONFIG_FW_LOADER_USER_HELPER is not set
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index 19c7939f700f..265abf4b3ae5 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -208,7 +208,6 @@ CONFIG_IPDDP_DECAP=y
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
 # CONFIG_WIRELESS is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_DEVTMPFS=y
 # CONFIG_FIRMWARE_IN_KERNEL is not set
 # CONFIG_FW_LOADER_USER_HELPER is not set
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index 81fab00de63a..6df0acd9c2e5 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -193,7 +193,6 @@ CONFIG_ATALK=m
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
 # CONFIG_WIRELESS is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_DEVTMPFS=y
 # CONFIG_FIRMWARE_IN_KERNEL is not set
 # CONFIG_FW_LOADER_USER_HELPER is not set
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index df0783770611..4108303e2453 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -194,7 +194,6 @@ CONFIG_ATALK=m
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
 # CONFIG_WIRELESS is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_DEVTMPFS=y
 # CONFIG_FIRMWARE_IN_KERNEL is not set
 # CONFIG_FW_LOADER_USER_HELPER is not set
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index f64c8963fa5d..f7bdca099892 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -194,7 +194,6 @@ CONFIG_ATALK=m
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
 # CONFIG_WIRELESS is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_DEVTMPFS=y
 # CONFIG_FIRMWARE_IN_KERNEL is not set
 # CONFIG_FW_LOADER_USER_HELPER is not set
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index 99bcc60f82a3..e578dd87a451 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -191,7 +191,6 @@ CONFIG_ATALK=m
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
 # CONFIG_WIRELESS is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_DEVTMPFS=y
 # CONFIG_FIRMWARE_IN_KERNEL is not set
 # CONFIG_FW_LOADER_USER_HELPER is not set
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index 2023a91500f3..f5c91e17b3d7 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -191,7 +191,6 @@ CONFIG_ATALK=m
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
 # CONFIG_WIRELESS is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_DEVTMPFS=y
 # CONFIG_FIRMWARE_IN_KERNEL is not set
 # CONFIG_FW_LOADER_USER_HELPER is not set
-- 
cgit v1.2.3


From 4e25c0e92f8eaf69bc51d1d523bcb7268e7dd162 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Tue, 10 Dec 2013 11:35:47 +0100
Subject: m68k/mm: Check for mm != NULL in do_page_fault() debug code

When DEBUG is enabled, do_page_fault() may dereference a NULL pointer,
causing recursive bus errors.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/mm/fault.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c
index eb1d61f68725..c7725f1de05d 100644
--- a/arch/m68k/mm/fault.c
+++ b/arch/m68k/mm/fault.c
@@ -77,8 +77,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
 
 #ifdef DEBUG
 	printk ("do page fault:\nregs->sr=%#x, regs->pc=%#lx, address=%#lx, %ld, %p\n",
-		regs->sr, regs->pc, address, error_code,
-		current->mm->pgd);
+		regs->sr, regs->pc, address, error_code, mm ? mm->pgd : NULL);
 #endif
 
 	/*
-- 
cgit v1.2.3


From 8e398f638179d5968df41ad8eff8f27684454dc4 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Tue, 10 Dec 2013 12:04:16 +0100
Subject: m68k: Convert arch/m68k/mm/fault.c to pr_*()

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/mm/fault.c | 23 ++++++++---------------
 1 file changed, 8 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c
index c7725f1de05d..2bd7487440c4 100644
--- a/arch/m68k/mm/fault.c
+++ b/arch/m68k/mm/fault.c
@@ -25,9 +25,8 @@ int send_fault_sig(struct pt_regs *regs)
 	siginfo.si_signo = current->thread.signo;
 	siginfo.si_code = current->thread.code;
 	siginfo.si_addr = (void *)current->thread.faddr;
-#ifdef DEBUG
-	printk("send_fault_sig: %p,%d,%d\n", siginfo.si_addr, siginfo.si_signo, siginfo.si_code);
-#endif
+	pr_debug("send_fault_sig: %p,%d,%d\n", siginfo.si_addr,
+		 siginfo.si_signo, siginfo.si_code);
 
 	if (user_mode(regs)) {
 		force_sig_info(siginfo.si_signo,
@@ -45,10 +44,10 @@ int send_fault_sig(struct pt_regs *regs)
 		 * terminate things with extreme prejudice.
 		 */
 		if ((unsigned long)siginfo.si_addr < PAGE_SIZE)
-			printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
+			pr_alert("Unable to handle kernel NULL pointer dereference");
 		else
-			printk(KERN_ALERT "Unable to handle kernel access");
-		printk(" at virtual address %p\n", siginfo.si_addr);
+			pr_alert("Unable to handle kernel access");
+		pr_cont(" at virtual address %p\n", siginfo.si_addr);
 		die_if_kernel("Oops", regs, 0 /*error_code*/);
 		do_exit(SIGKILL);
 	}
@@ -75,10 +74,8 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
 	int fault;
 	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
-#ifdef DEBUG
-	printk ("do page fault:\nregs->sr=%#x, regs->pc=%#lx, address=%#lx, %ld, %p\n",
+	pr_debug("do page fault:\nregs->sr=%#x, regs->pc=%#lx, address=%#lx, %ld, %p\n",
 		regs->sr, regs->pc, address, error_code, mm ? mm->pgd : NULL);
-#endif
 
 	/*
 	 * If we're in an interrupt or have no user
@@ -117,9 +114,7 @@ retry:
  * we can handle it..
  */
 good_area:
-#ifdef DEBUG
-	printk("do_page_fault: good_area\n");
-#endif
+	pr_debug("do_page_fault: good_area\n");
 	switch (error_code & 3) {
 		default:	/* 3: write, present */
 			/* fall through */
@@ -142,9 +137,7 @@ good_area:
 	 */
 
 	fault = handle_mm_fault(mm, vma, address, flags);
-#ifdef DEBUG
-	printk("handle_mm_fault returns %d\n",fault);
-#endif
+	pr_debug("handle_mm_fault returns %d\n", fault);
 
 	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
 		return 0;
-- 
cgit v1.2.3


From 245b815c3faf4a0509948618c35f993160e193a9 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Tue, 10 Dec 2013 11:38:34 +0100
Subject: m68k: Convert arch/m68k/kernel/traps.c to pr_*()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Also fix a few printf-style formats, to get rid of the following compiler
warnings when DEBUG is enabled:

arch/m68k/kernel/traps.c: In function ‘access_error060’:
arch/m68k/kernel/traps.c:166: warning: format ‘%d’ expects type ‘int’, but argument 2 has type ‘long unsigned int’
arch/m68k/kernel/traps.c: In function ‘bus_error030’:
arch/m68k/kernel/traps.c:568: warning: format ‘%#lx’ expects type ‘long unsigned int’, but argument 2 has type ‘void *’
arch/m68k/kernel/traps.c:682: warning: format ‘%#lx’ expects type ‘long unsigned int’, but argument 2 has type ‘void *’

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/kernel/traps.c | 232 +++++++++++++++++++++--------------------------
 1 file changed, 101 insertions(+), 131 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c
index 88fcd8c70e7b..6c9ca24830e9 100644
--- a/arch/m68k/kernel/traps.c
+++ b/arch/m68k/kernel/traps.c
@@ -133,9 +133,7 @@ static inline void access_error060 (struct frame *fp)
 {
 	unsigned long fslw = fp->un.fmt4.pc; /* is really FSLW for access error */
 
-#ifdef DEBUG
-	printk("fslw=%#lx, fa=%#lx\n", fslw, fp->un.fmt4.effaddr);
-#endif
+	pr_debug("fslw=%#lx, fa=%#lx\n", fslw, fp->un.fmt4.effaddr);
 
 	if (fslw & MMU060_BPE) {
 		/* branch prediction error -> clear branch cache */
@@ -162,9 +160,7 @@ static inline void access_error060 (struct frame *fp)
 		}
 		if (fslw & MMU060_W)
 			errorcode |= 2;
-#ifdef DEBUG
-		printk("errorcode = %d\n", errorcode );
-#endif
+		pr_debug("errorcode = %ld\n", errorcode);
 		do_page_fault(&fp->ptregs, addr, errorcode);
 	} else if (fslw & (MMU060_SEE)){
 		/* Software Emulation Error.
@@ -173,8 +169,9 @@ static inline void access_error060 (struct frame *fp)
 		send_fault_sig(&fp->ptregs);
 	} else if (!(fslw & (MMU060_RE|MMU060_WE)) ||
 		   send_fault_sig(&fp->ptregs) > 0) {
-		printk("pc=%#lx, fa=%#lx\n", fp->ptregs.pc, fp->un.fmt4.effaddr);
-		printk( "68060 access error, fslw=%lx\n", fslw );
+		pr_err("pc=%#lx, fa=%#lx\n", fp->ptregs.pc,
+		       fp->un.fmt4.effaddr);
+		pr_err("68060 access error, fslw=%lx\n", fslw);
 		trap_c( fp );
 	}
 }
@@ -225,9 +222,7 @@ static inline int do_040writeback1(unsigned short wbs, unsigned long wba,
 	set_fs(old_fs);
 
 
-#ifdef DEBUG
-	printk("do_040writeback1, res=%d\n",res);
-#endif
+	pr_debug("do_040writeback1, res=%d\n", res);
 
 	return res;
 }
@@ -249,7 +244,7 @@ static inline void do_040writebacks(struct frame *fp)
 	int res = 0;
 #if 0
 	if (fp->un.fmt7.wb1s & WBV_040)
-		printk("access_error040: cannot handle 1st writeback. oops.\n");
+		pr_err("access_error040: cannot handle 1st writeback. oops.\n");
 #endif
 
 	if ((fp->un.fmt7.wb2s & WBV_040) &&
@@ -302,14 +297,12 @@ static inline void access_error040(struct frame *fp)
 	unsigned short ssw = fp->un.fmt7.ssw;
 	unsigned long mmusr;
 
-#ifdef DEBUG
-	printk("ssw=%#x, fa=%#lx\n", ssw, fp->un.fmt7.faddr);
-        printk("wb1s=%#x, wb2s=%#x, wb3s=%#x\n", fp->un.fmt7.wb1s,
+	pr_debug("ssw=%#x, fa=%#lx\n", ssw, fp->un.fmt7.faddr);
+	pr_debug("wb1s=%#x, wb2s=%#x, wb3s=%#x\n", fp->un.fmt7.wb1s,
 		fp->un.fmt7.wb2s, fp->un.fmt7.wb3s);
-	printk ("wb2a=%lx, wb3a=%lx, wb2d=%lx, wb3d=%lx\n",
+	pr_debug("wb2a=%lx, wb3a=%lx, wb2d=%lx, wb3d=%lx\n",
 		fp->un.fmt7.wb2a, fp->un.fmt7.wb3a,
 		fp->un.fmt7.wb2d, fp->un.fmt7.wb3d);
-#endif
 
 	if (ssw & ATC_040) {
 		unsigned long addr = fp->un.fmt7.faddr;
@@ -324,9 +317,7 @@ static inline void access_error040(struct frame *fp)
 
 		/* MMU error, get the MMUSR info for this access */
 		mmusr = probe040(!(ssw & RW_040), addr, ssw);
-#ifdef DEBUG
-		printk("mmusr = %lx\n", mmusr);
-#endif
+		pr_debug("mmusr = %lx\n", mmusr);
 		errorcode = 1;
 		if (!(mmusr & MMU_R_040)) {
 			/* clear the invalid atc entry */
@@ -340,14 +331,10 @@ static inline void access_error040(struct frame *fp)
 			errorcode |= 2;
 
 		if (do_page_fault(&fp->ptregs, addr, errorcode)) {
-#ifdef DEBUG
-			printk("do_page_fault() !=0\n");
-#endif
+			pr_debug("do_page_fault() !=0\n");
 			if (user_mode(&fp->ptregs)){
 				/* delay writebacks after signal delivery */
-#ifdef DEBUG
-			        printk(".. was usermode - return\n");
-#endif
+				pr_debug(".. was usermode - return\n");
 				return;
 			}
 			/* disable writeback into user space from kernel
@@ -355,9 +342,7 @@ static inline void access_error040(struct frame *fp)
                          * the writeback won't do good)
 			 */
 disable_wb:
-#ifdef DEBUG
-			printk(".. disabling wb2\n");
-#endif
+			pr_debug(".. disabling wb2\n");
 			if (fp->un.fmt7.wb2a == fp->un.fmt7.faddr)
 				fp->un.fmt7.wb2s &= ~WBV_040;
 			if (fp->un.fmt7.wb3a == fp->un.fmt7.faddr)
@@ -371,7 +356,7 @@ disable_wb:
 		current->thread.signo = SIGBUS;
 		current->thread.faddr = fp->un.fmt7.faddr;
 		if (send_fault_sig(&fp->ptregs) >= 0)
-			printk("68040 bus error (ssw=%x, faddr=%lx)\n", ssw,
+			pr_err("68040 bus error (ssw=%x, faddr=%lx)\n", ssw,
 			       fp->un.fmt7.faddr);
 		goto disable_wb;
 	}
@@ -394,19 +379,17 @@ static inline void bus_error030 (struct frame *fp)
 	unsigned short ssw = fp->un.fmtb.ssw;
 	extern unsigned long _sun3_map_test_start, _sun3_map_test_end;
 
-#ifdef DEBUG
 	if (ssw & (FC | FB))
-		printk ("Instruction fault at %#010lx\n",
+		pr_debug("Instruction fault at %#010lx\n",
 			ssw & FC ?
 			fp->ptregs.format == 0xa ? fp->ptregs.pc + 2 : fp->un.fmtb.baddr - 2
 			:
 			fp->ptregs.format == 0xa ? fp->ptregs.pc + 4 : fp->un.fmtb.baddr);
 	if (ssw & DF)
-		printk ("Data %s fault at %#010lx in %s (pc=%#lx)\n",
+		pr_debug("Data %s fault at %#010lx in %s (pc=%#lx)\n",
 			ssw & RW ? "read" : "write",
 			fp->un.fmtb.daddr,
 			space_names[ssw & DFC], fp->ptregs.pc);
-#endif
 
 	/*
 	 * Check if this page should be demand-mapped. This needs to go before
@@ -429,7 +412,7 @@ static inline void bus_error030 (struct frame *fp)
 			  return;
 			/* instruction fault or kernel data fault! */
 			if (ssw & (FC | FB))
-				printk ("Instruction fault at %#010lx\n",
+				pr_err("Instruction fault at %#010lx\n",
 					fp->ptregs.pc);
 			if (ssw & DF) {
 				/* was this fault incurred testing bus mappings? */
@@ -439,12 +422,12 @@ static inline void bus_error030 (struct frame *fp)
 					return;
 				}
 
-				printk ("Data %s fault at %#010lx in %s (pc=%#lx)\n",
+				pr_err("Data %s fault at %#010lx in %s (pc=%#lx)\n",
 					ssw & RW ? "read" : "write",
 					fp->un.fmtb.daddr,
 					space_names[ssw & DFC], fp->ptregs.pc);
 			}
-			printk ("BAD KERNEL BUSERR\n");
+			pr_err("BAD KERNEL BUSERR\n");
 
 			die_if_kernel("Oops", &fp->ptregs,0);
 			force_sig(SIGKILL, current);
@@ -473,12 +456,11 @@ static inline void bus_error030 (struct frame *fp)
 		else if (buserr_type & SUN3_BUSERR_INVALID)
 			errorcode = 0x00;
 		else {
-#ifdef DEBUG
-			printk ("*** unexpected busfault type=%#04x\n", buserr_type);
-			printk ("invalid %s access at %#lx from pc %#lx\n",
-				!(ssw & RW) ? "write" : "read", addr,
-				fp->ptregs.pc);
-#endif
+			pr_debug("*** unexpected busfault type=%#04x\n",
+				 buserr_type);
+			pr_debug("invalid %s access at %#lx from pc %#lx\n",
+				 !(ssw & RW) ? "write" : "read", addr,
+				 fp->ptregs.pc);
 			die_if_kernel ("Oops", &fp->ptregs, buserr_type);
 			force_sig (SIGBUS, current);
 			return;
@@ -509,9 +491,7 @@ static inline void bus_error030 (struct frame *fp)
 		if (!mmu_emu_handle_fault(addr, 1, 0))
 			do_page_fault (&fp->ptregs, addr, 0);
        } else {
-#ifdef DEBUG
-		printk ("protection fault on insn access (segv).\n");
-#endif
+		pr_debug("protection fault on insn access (segv).\n");
 		force_sig (SIGSEGV, current);
        }
 }
@@ -525,22 +505,22 @@ static inline void bus_error030 (struct frame *fp)
 	unsigned short ssw = fp->un.fmtb.ssw;
 #ifdef DEBUG
 	unsigned long desc;
+#endif
 
-	printk ("pid = %x  ", current->pid);
-	printk ("SSW=%#06x  ", ssw);
+	pr_debug("pid = %x  ", current->pid);
+	pr_debug("SSW=%#06x  ", ssw);
 
 	if (ssw & (FC | FB))
-		printk ("Instruction fault at %#010lx\n",
+		pr_debug("Instruction fault at %#010lx\n",
 			ssw & FC ?
 			fp->ptregs.format == 0xa ? fp->ptregs.pc + 2 : fp->un.fmtb.baddr - 2
 			:
 			fp->ptregs.format == 0xa ? fp->ptregs.pc + 4 : fp->un.fmtb.baddr);
 	if (ssw & DF)
-		printk ("Data %s fault at %#010lx in %s (pc=%#lx)\n",
+		pr_debug("Data %s fault at %#010lx in %s (pc=%#lx)\n",
 			ssw & RW ? "read" : "write",
 			fp->un.fmtb.daddr,
 			space_names[ssw & DFC], fp->ptregs.pc);
-#endif
 
 	/* ++andreas: If a data fault and an instruction fault happen
 	   at the same time map in both pages.  */
@@ -554,27 +534,23 @@ static inline void bus_error030 (struct frame *fp)
 			      "pmove %%psr,%1"
 			      : "=a&" (desc), "=m" (temp)
 			      : "a" (addr), "d" (ssw));
+		pr_debug("mmusr is %#x for addr %#lx in task %p\n",
+			 temp, addr, current);
+		pr_debug("descriptor address is 0x%p, contents %#lx\n",
+			 __va(desc), *(unsigned long *)__va(desc));
 #else
 		asm volatile ("ptestr %2,%1@,#7\n\t"
 			      "pmove %%psr,%0"
 			      : "=m" (temp) : "a" (addr), "d" (ssw));
 #endif
 		mmusr = temp;
-
-#ifdef DEBUG
-		printk("mmusr is %#x for addr %#lx in task %p\n",
-		       mmusr, addr, current);
-		printk("descriptor address is %#lx, contents %#lx\n",
-		       __va(desc), *(unsigned long *)__va(desc));
-#endif
-
 		errorcode = (mmusr & MMU_I) ? 0 : 1;
 		if (!(ssw & RW) || (ssw & RM))
 			errorcode |= 2;
 
 		if (mmusr & (MMU_I | MMU_WP)) {
 			if (ssw & 4) {
-				printk("Data %s fault at %#010lx in %s (pc=%#lx)\n",
+				pr_err("Data %s fault at %#010lx in %s (pc=%#lx)\n",
 				       ssw & RW ? "read" : "write",
 				       fp->un.fmtb.daddr,
 				       space_names[ssw & DFC], fp->ptregs.pc);
@@ -587,9 +563,10 @@ static inline void bus_error030 (struct frame *fp)
 		} else if (!(mmusr & MMU_I)) {
 			/* probably a 020 cas fault */
 			if (!(ssw & RM) && send_fault_sig(&fp->ptregs) > 0)
-				printk("unexpected bus error (%#x,%#x)\n", ssw, mmusr);
+				pr_err("unexpected bus error (%#x,%#x)\n", ssw,
+				       mmusr);
 		} else if (mmusr & (MMU_B|MMU_L|MMU_S)) {
-			printk("invalid %s access at %#lx from pc %#lx\n",
+			pr_err("invalid %s access at %#lx from pc %#lx\n",
 			       !(ssw & RW) ? "write" : "read", addr,
 			       fp->ptregs.pc);
 			die_if_kernel("Oops",&fp->ptregs,mmusr);
@@ -600,7 +577,7 @@ static inline void bus_error030 (struct frame *fp)
 			static volatile long tlong;
 #endif
 
-			printk("weird %s access at %#lx from pc %#lx (ssw is %#x)\n",
+			pr_err("weird %s access at %#lx from pc %#lx (ssw is %#x)\n",
 			       !(ssw & RW) ? "write" : "read", addr,
 			       fp->ptregs.pc, ssw);
 			asm volatile ("ptestr #1,%1@,#0\n\t"
@@ -609,18 +586,16 @@ static inline void bus_error030 (struct frame *fp)
 				      : "a" (addr));
 			mmusr = temp;
 
-			printk ("level 0 mmusr is %#x\n", mmusr);
+			pr_err("level 0 mmusr is %#x\n", mmusr);
 #if 0
 			asm volatile ("pmove %%tt0,%0"
 				      : "=m" (tlong));
-			printk("tt0 is %#lx, ", tlong);
+			pr_debug("tt0 is %#lx, ", tlong);
 			asm volatile ("pmove %%tt1,%0"
 				      : "=m" (tlong));
-			printk("tt1 is %#lx\n", tlong);
-#endif
-#ifdef DEBUG
-			printk("Unknown SIGSEGV - 1\n");
+			pr_debug("tt1 is %#lx\n", tlong);
 #endif
+			pr_debug("Unknown SIGSEGV - 1\n");
 			die_if_kernel("Oops",&fp->ptregs,mmusr);
 			force_sig(SIGSEGV, current);
 			return;
@@ -641,10 +616,9 @@ static inline void bus_error030 (struct frame *fp)
 		return;
 
 	if (fp->ptregs.sr & PS_S) {
-		printk("Instruction fault at %#010lx\n",
-			fp->ptregs.pc);
+		pr_err("Instruction fault at %#010lx\n", fp->ptregs.pc);
 	buserr:
-		printk ("BAD KERNEL BUSERR\n");
+		pr_err("BAD KERNEL BUSERR\n");
 		die_if_kernel("Oops",&fp->ptregs,0);
 		force_sig(SIGKILL, current);
 		return;
@@ -668,28 +642,22 @@ static inline void bus_error030 (struct frame *fp)
 		      "pmove %%psr,%1"
 		      : "=a&" (desc), "=m" (temp)
 		      : "a" (addr));
+	pr_debug("mmusr is %#x for addr %#lx in task %p\n",
+		temp, addr, current);
+	pr_debug("descriptor address is 0x%p, contents %#lx\n",
+		__va(desc), *(unsigned long *)__va(desc));
 #else
 	asm volatile ("ptestr #1,%1@,#7\n\t"
 		      "pmove %%psr,%0"
 		      : "=m" (temp) : "a" (addr));
 #endif
 	mmusr = temp;
-
-#ifdef DEBUG
-	printk ("mmusr is %#x for addr %#lx in task %p\n",
-		mmusr, addr, current);
-	printk ("descriptor address is %#lx, contents %#lx\n",
-		__va(desc), *(unsigned long *)__va(desc));
-#endif
-
 	if (mmusr & MMU_I)
 		do_page_fault (&fp->ptregs, addr, 0);
 	else if (mmusr & (MMU_B|MMU_L|MMU_S)) {
-		printk ("invalid insn access at %#lx from pc %#lx\n",
+		pr_err("invalid insn access at %#lx from pc %#lx\n",
 			addr, fp->ptregs.pc);
-#ifdef DEBUG
-		printk("Unknown SIGSEGV - 2\n");
-#endif
+		pr_debug("Unknown SIGSEGV - 2\n");
 		die_if_kernel("Oops",&fp->ptregs,mmusr);
 		force_sig(SIGSEGV, current);
 		return;
@@ -791,9 +759,7 @@ asmlinkage void buserr_c(struct frame *fp)
 	if (user_mode(&fp->ptregs))
 		current->thread.esp0 = (unsigned long) fp;
 
-#ifdef DEBUG
-	printk ("*** Bus Error *** Format is %x\n", fp->ptregs.format);
-#endif
+	pr_debug("*** Bus Error *** Format is %x\n", fp->ptregs.format);
 
 #if defined(CONFIG_COLDFIRE) && defined(CONFIG_MMU)
 	if (CPU_IS_COLDFIRE) {
@@ -836,9 +802,7 @@ asmlinkage void buserr_c(struct frame *fp)
 #endif
 	default:
 	  die_if_kernel("bad frame format",&fp->ptregs,0);
-#ifdef DEBUG
-	  printk("Unknown SIGSEGV - 4\n");
-#endif
+	  pr_debug("Unknown SIGSEGV - 4\n");
 	  force_sig(SIGSEGV, current);
 	}
 }
@@ -852,7 +816,7 @@ void show_trace(unsigned long *stack)
 	unsigned long addr;
 	int i;
 
-	printk("Call Trace:");
+	pr_info("Call Trace:");
 	addr = (unsigned long)stack + THREAD_SIZE - 1;
 	endstack = (unsigned long *)(addr & -THREAD_SIZE);
 	i = 0;
@@ -869,13 +833,13 @@ void show_trace(unsigned long *stack)
 		if (__kernel_text_address(addr)) {
 #ifndef CONFIG_KALLSYMS
 			if (i % 5 == 0)
-				printk("\n       ");
+				pr_cont("\n       ");
 #endif
-			printk(" [<%08lx>] %pS\n", addr, (void *)addr);
+			pr_cont(" [<%08lx>] %pS\n", addr, (void *)addr);
 			i++;
 		}
 	}
-	printk("\n");
+	pr_cont("\n");
 }
 
 void show_registers(struct pt_regs *regs)
@@ -887,81 +851,87 @@ void show_registers(struct pt_regs *regs)
 	int i;
 
 	print_modules();
-	printk("PC: [<%08lx>] %pS\n", regs->pc, (void *)regs->pc);
-	printk("SR: %04x  SP: %p  a2: %08lx\n", regs->sr, regs, regs->a2);
-	printk("d0: %08lx    d1: %08lx    d2: %08lx    d3: %08lx\n",
+	pr_info("PC: [<%08lx>] %pS\n", regs->pc, (void *)regs->pc);
+	pr_info("SR: %04x  SP: %p  a2: %08lx\n", regs->sr, regs, regs->a2);
+	pr_info("d0: %08lx    d1: %08lx    d2: %08lx    d3: %08lx\n",
 	       regs->d0, regs->d1, regs->d2, regs->d3);
-	printk("d4: %08lx    d5: %08lx    a0: %08lx    a1: %08lx\n",
+	pr_info("d4: %08lx    d5: %08lx    a0: %08lx    a1: %08lx\n",
 	       regs->d4, regs->d5, regs->a0, regs->a1);
 
-	printk("Process %s (pid: %d, task=%p)\n",
+	pr_info("Process %s (pid: %d, task=%p)\n",
 		current->comm, task_pid_nr(current), current);
 	addr = (unsigned long)&fp->un;
-	printk("Frame format=%X ", regs->format);
+	pr_info("Frame format=%X ", regs->format);
 	switch (regs->format) {
 	case 0x2:
-		printk("instr addr=%08lx\n", fp->un.fmt2.iaddr);
+		pr_cont("instr addr=%08lx\n", fp->un.fmt2.iaddr);
 		addr += sizeof(fp->un.fmt2);
 		break;
 	case 0x3:
-		printk("eff addr=%08lx\n", fp->un.fmt3.effaddr);
+		pr_cont("eff addr=%08lx\n", fp->un.fmt3.effaddr);
 		addr += sizeof(fp->un.fmt3);
 		break;
 	case 0x4:
-		printk((CPU_IS_060 ? "fault addr=%08lx fslw=%08lx\n"
-			: "eff addr=%08lx pc=%08lx\n"),
-			fp->un.fmt4.effaddr, fp->un.fmt4.pc);
+		if (CPU_IS_060)
+			pr_cont("fault addr=%08lx fslw=%08lx\n",
+				fp->un.fmt4.effaddr, fp->un.fmt4.pc);
+		else
+			pr_cont("eff addr=%08lx pc=%08lx\n",
+				fp->un.fmt4.effaddr, fp->un.fmt4.pc);
 		addr += sizeof(fp->un.fmt4);
 		break;
 	case 0x7:
-		printk("eff addr=%08lx ssw=%04x faddr=%08lx\n",
+		pr_cont("eff addr=%08lx ssw=%04x faddr=%08lx\n",
 			fp->un.fmt7.effaddr, fp->un.fmt7.ssw, fp->un.fmt7.faddr);
-		printk("wb 1 stat/addr/data: %04x %08lx %08lx\n",
+		pr_info("wb 1 stat/addr/data: %04x %08lx %08lx\n",
 			fp->un.fmt7.wb1s, fp->un.fmt7.wb1a, fp->un.fmt7.wb1dpd0);
-		printk("wb 2 stat/addr/data: %04x %08lx %08lx\n",
+		pr_info("wb 2 stat/addr/data: %04x %08lx %08lx\n",
 			fp->un.fmt7.wb2s, fp->un.fmt7.wb2a, fp->un.fmt7.wb2d);
-		printk("wb 3 stat/addr/data: %04x %08lx %08lx\n",
+		pr_info("wb 3 stat/addr/data: %04x %08lx %08lx\n",
 			fp->un.fmt7.wb3s, fp->un.fmt7.wb3a, fp->un.fmt7.wb3d);
-		printk("push data: %08lx %08lx %08lx %08lx\n",
+		pr_info("push data: %08lx %08lx %08lx %08lx\n",
 			fp->un.fmt7.wb1dpd0, fp->un.fmt7.pd1, fp->un.fmt7.pd2,
 			fp->un.fmt7.pd3);
 		addr += sizeof(fp->un.fmt7);
 		break;
 	case 0x9:
-		printk("instr addr=%08lx\n", fp->un.fmt9.iaddr);
+		pr_cont("instr addr=%08lx\n", fp->un.fmt9.iaddr);
 		addr += sizeof(fp->un.fmt9);
 		break;
 	case 0xa:
-		printk("ssw=%04x isc=%04x isb=%04x daddr=%08lx dobuf=%08lx\n",
+		pr_cont("ssw=%04x isc=%04x isb=%04x daddr=%08lx dobuf=%08lx\n",
 			fp->un.fmta.ssw, fp->un.fmta.isc, fp->un.fmta.isb,
 			fp->un.fmta.daddr, fp->un.fmta.dobuf);
 		addr += sizeof(fp->un.fmta);
 		break;
 	case 0xb:
-		printk("ssw=%04x isc=%04x isb=%04x daddr=%08lx dobuf=%08lx\n",
+		pr_cont("ssw=%04x isc=%04x isb=%04x daddr=%08lx dobuf=%08lx\n",
 			fp->un.fmtb.ssw, fp->un.fmtb.isc, fp->un.fmtb.isb,
 			fp->un.fmtb.daddr, fp->un.fmtb.dobuf);
-		printk("baddr=%08lx dibuf=%08lx ver=%x\n",
+		pr_info("baddr=%08lx dibuf=%08lx ver=%x\n",
 			fp->un.fmtb.baddr, fp->un.fmtb.dibuf, fp->un.fmtb.ver);
 		addr += sizeof(fp->un.fmtb);
 		break;
 	default:
-		printk("\n");
+		pr_cont("\n");
 	}
 	show_stack(NULL, (unsigned long *)addr);
 
-	printk("Code:");
+	pr_info("Code:");
 	set_fs(KERNEL_DS);
 	cp = (u16 *)regs->pc;
 	for (i = -8; i < 16; i++) {
 		if (get_user(c, cp + i) && i >= 0) {
-			printk(" Bad PC value.");
+			pr_cont(" Bad PC value.");
 			break;
 		}
-		printk(i ? " %04x" : " <%04x>", c);
+		if (i)
+			pr_cont(" %04x", c);
+		else
+			pr_cont(" <%04x>", c);
 	}
 	set_fs(old_fs);
-	printk ("\n");
+	pr_cont("\n");
 }
 
 void show_stack(struct task_struct *task, unsigned long *stack)
@@ -978,16 +948,16 @@ void show_stack(struct task_struct *task, unsigned long *stack)
 	}
 	endstack = (unsigned long *)(((unsigned long)stack + THREAD_SIZE - 1) & -THREAD_SIZE);
 
-	printk("Stack from %08lx:", (unsigned long)stack);
+	pr_info("Stack from %08lx:", (unsigned long)stack);
 	p = stack;
 	for (i = 0; i < kstack_depth_to_print; i++) {
 		if (p + 1 > endstack)
 			break;
 		if (i % 8 == 0)
-			printk("\n       ");
-		printk(" %08lx", *p++);
+			pr_cont("\n       ");
+		pr_cont(" %08lx", *p++);
 	}
-	printk("\n");
+	pr_cont("\n");
 	show_trace(stack);
 }
 
@@ -1005,32 +975,32 @@ void bad_super_trap (struct frame *fp)
 
 	console_verbose();
 	if (vector < ARRAY_SIZE(vec_names))
-		printk ("*** %s ***   FORMAT=%X\n",
+		pr_err("*** %s ***   FORMAT=%X\n",
 			vec_names[vector],
 			fp->ptregs.format);
 	else
-		printk ("*** Exception %d ***   FORMAT=%X\n",
+		pr_err("*** Exception %d ***   FORMAT=%X\n",
 			vector, fp->ptregs.format);
 	if (vector == VEC_ADDRERR && CPU_IS_020_OR_030) {
 		unsigned short ssw = fp->un.fmtb.ssw;
 
-		printk ("SSW=%#06x  ", ssw);
+		pr_err("SSW=%#06x  ", ssw);
 
 		if (ssw & RC)
-			printk ("Pipe stage C instruction fault at %#010lx\n",
+			pr_err("Pipe stage C instruction fault at %#010lx\n",
 				(fp->ptregs.format) == 0xA ?
 				fp->ptregs.pc + 2 : fp->un.fmtb.baddr - 2);
 		if (ssw & RB)
-			printk ("Pipe stage B instruction fault at %#010lx\n",
+			pr_err("Pipe stage B instruction fault at %#010lx\n",
 				(fp->ptregs.format) == 0xA ?
 				fp->ptregs.pc + 4 : fp->un.fmtb.baddr);
 		if (ssw & DF)
-			printk ("Data %s fault at %#010lx in %s (pc=%#lx)\n",
+			pr_err("Data %s fault at %#010lx in %s (pc=%#lx)\n",
 				ssw & RW ? "read" : "write",
 				fp->un.fmtb.daddr, space_names[ssw & DFC],
 				fp->ptregs.pc);
 	}
-	printk ("Current process id is %d\n", task_pid_nr(current));
+	pr_err("Current process id is %d\n", task_pid_nr(current));
 	die_if_kernel("BAD KERNEL TRAP", &fp->ptregs, 0);
 }
 
@@ -1162,7 +1132,7 @@ void die_if_kernel (char *str, struct pt_regs *fp, int nr)
 		return;
 
 	console_verbose();
-	printk("%s: %08x\n",str,nr);
+	pr_crit("%s: %08x\n", str, nr);
 	show_registers(fp);
 	add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
 	do_exit(SIGSEGV);
-- 
cgit v1.2.3


From f6fc30dbb720e34e4ae796857b479d0e0af7cfb4 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Tue, 17 Dec 2013 11:24:58 +0100
Subject: m68k/mm: kmap spelling/grammar fixes

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/mm/kmap.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/mm/kmap.c b/arch/m68k/mm/kmap.c
index 568cfad3ceb8..6e4955bc542b 100644
--- a/arch/m68k/mm/kmap.c
+++ b/arch/m68k/mm/kmap.c
@@ -27,9 +27,9 @@
 
 /*
  * For 040/060 we can use the virtual memory area like other architectures,
- * but for 020/030 we want to use early termination page descriptor and we
+ * but for 020/030 we want to use early termination page descriptors and we
  * can't mix this with normal page descriptors, so we have to copy that code
- * (mm/vmalloc.c) and return appriorate aligned addresses.
+ * (mm/vmalloc.c) and return appropriately aligned addresses.
  */
 
 #ifdef CPU_M68040_OR_M68060_ONLY
@@ -224,7 +224,7 @@ void __iomem *__ioremap(unsigned long physaddr, unsigned long size, int cachefla
 EXPORT_SYMBOL(__ioremap);
 
 /*
- * Unmap a ioremap()ed region again
+ * Unmap an ioremap()ed region again
  */
 void iounmap(void __iomem *addr)
 {
@@ -241,8 +241,8 @@ EXPORT_SYMBOL(iounmap);
 
 /*
  * __iounmap unmaps nearly everything, so be careful
- * it doesn't free currently pointer/page tables anymore but it
- * wans't used anyway and might be added later.
+ * Currently it doesn't free pointer/page tables anymore but this
+ * wasn't used anyway and might be added later.
  */
 void __iounmap(void *addr, unsigned long size)
 {
-- 
cgit v1.2.3


From bbb519cd5ca9d84a96f7ea9a5826f57091eade02 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Sun, 18 Aug 2013 12:00:50 +0200
Subject: m68k/defconfig: Enable EARLY_PRINTK

It's too valuable for debugging to be disabled.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/configs/mac_defconfig     | 1 +
 arch/m68k/configs/multi_defconfig   | 1 +
 arch/m68k/configs/mvme16x_defconfig | 1 +
 3 files changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index 7f6be05090b9..2d1e468bece2 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -421,6 +421,7 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index 265abf4b3ae5..f83b2c4a3d68 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -499,6 +499,7 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index 4108303e2453..9cb7e4da8e84 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -388,6 +388,7 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
-- 
cgit v1.2.3


From a16f9a42a557dcf33eaee4c5ba3e09eb7e655c70 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Fri, 17 May 2013 17:25:39 +0200
Subject: m68k: Update defconfigs for v3.13-rc1

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/configs/amiga_defconfig    | 43 ++++++++++++++++++++++++++++++----
 arch/m68k/configs/apollo_defconfig   | 42 +++++++++++++++++++++++++++++----
 arch/m68k/configs/atari_defconfig    | 43 ++++++++++++++++++++++++++++++----
 arch/m68k/configs/bvme6000_defconfig | 41 +++++++++++++++++++++++++++++---
 arch/m68k/configs/hp300_defconfig    | 42 +++++++++++++++++++++++++++++----
 arch/m68k/configs/mac_defconfig      | 43 ++++++++++++++++++++++++++++++----
 arch/m68k/configs/multi_defconfig    | 45 ++++++++++++++++++++++++++++++------
 arch/m68k/configs/mvme147_defconfig  | 41 +++++++++++++++++++++++++++++---
 arch/m68k/configs/mvme16x_defconfig  | 41 +++++++++++++++++++++++++++++---
 arch/m68k/configs/q40_defconfig      | 43 ++++++++++++++++++++++++++++++----
 arch/m68k/configs/sun3_defconfig     | 42 +++++++++++++++++++++++++++++----
 arch/m68k/configs/sun3x_defconfig    | 42 +++++++++++++++++++++++++++++----
 12 files changed, 456 insertions(+), 52 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index 1b9531750826..559ff3af8ff7 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -52,7 +52,6 @@ CONFIG_IP_PNP_RARP=y
 CONFIG_NET_IPIP=m
 CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
-CONFIG_SYN_COOKIES=y
 CONFIG_NET_IPVTI=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
@@ -63,11 +62,11 @@ CONFIG_INET_XFRM_MODE_BEET=m
 # CONFIG_INET_LRO is not set
 CONFIG_INET_DIAG=m
 CONFIG_INET_UDP_DIAG=m
-CONFIG_IPV6_PRIVACY=y
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
 CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_VTI=m
 CONFIG_IPV6_GRE=m
 CONFIG_NETFILTER=y
 CONFIG_NF_CONNTRACK=m
@@ -85,6 +84,17 @@ CONFIG_NF_CONNTRACK_PPTP=m
 CONFIG_NF_CONNTRACK_SANE=m
 CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NF_TABLES=m
+CONFIG_NFT_EXTHDR=m
+CONFIG_NFT_META=m
+CONFIG_NFT_CT=m
+CONFIG_NFT_RBTREE=m
+CONFIG_NFT_HASH=m
+CONFIG_NFT_COUNTER=m
+CONFIG_NFT_LOG=m
+CONFIG_NFT_LIMIT=m
+CONFIG_NFT_NAT=m
+CONFIG_NFT_COMPAT=m
 CONFIG_NETFILTER_XT_SET=m
 CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
 CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
@@ -98,6 +108,7 @@ CONFIG_NETFILTER_XT_TARGET_NFLOG=m
 CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
 CONFIG_NETFILTER_XT_TARGET_NOTRACK=m
 CONFIG_NETFILTER_XT_TARGET_TEE=m
+CONFIG_NETFILTER_XT_TARGET_TPROXY=m
 CONFIG_NETFILTER_XT_TARGET_TRACE=m
 CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
 CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
@@ -130,6 +141,7 @@ CONFIG_NETFILTER_XT_MATCH_QUOTA=m
 CONFIG_NETFILTER_XT_MATCH_RATEEST=m
 CONFIG_NETFILTER_XT_MATCH_REALM=m
 CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_SOCKET=m
 CONFIG_NETFILTER_XT_MATCH_STATE=m
 CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
 CONFIG_NETFILTER_XT_MATCH_STRING=m
@@ -144,11 +156,18 @@ CONFIG_IP_SET_HASH_IP=m
 CONFIG_IP_SET_HASH_IPPORT=m
 CONFIG_IP_SET_HASH_IPPORTIP=m
 CONFIG_IP_SET_HASH_IPPORTNET=m
+CONFIG_IP_SET_HASH_NETPORTNET=m
 CONFIG_IP_SET_HASH_NET=m
+CONFIG_IP_SET_HASH_NETNET=m
 CONFIG_IP_SET_HASH_NETPORT=m
 CONFIG_IP_SET_HASH_NETIFACE=m
 CONFIG_IP_SET_LIST_SET=m
 CONFIG_NF_CONNTRACK_IPV4=m
+CONFIG_NF_TABLES_IPV4=m
+CONFIG_NFT_REJECT_IPV4=m
+CONFIG_NFT_CHAIN_ROUTE_IPV4=m
+CONFIG_NFT_CHAIN_NAT_IPV4=m
+CONFIG_NF_TABLES_ARP=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -156,6 +175,7 @@ CONFIG_IP_NF_MATCH_RPFILTER=m
 CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_TARGET_SYNPROXY=m
 CONFIG_IP_NF_TARGET_ULOG=m
 CONFIG_NF_NAT_IPV4=m
 CONFIG_IP_NF_TARGET_MASQUERADE=m
@@ -170,6 +190,9 @@ CONFIG_IP_NF_ARPTABLES=m
 CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NF_CONNTRACK_IPV6=m
+CONFIG_NF_TABLES_IPV6=m
+CONFIG_NFT_CHAIN_ROUTE_IPV6=m
+CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -183,11 +206,13 @@ CONFIG_IP6_NF_MATCH_RT=m
 CONFIG_IP6_NF_TARGET_HL=m
 CONFIG_IP6_NF_FILTER=m
 CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_TARGET_SYNPROXY=m
 CONFIG_IP6_NF_MANGLE=m
 CONFIG_IP6_NF_RAW=m
 CONFIG_NF_NAT_IPV6=m
 CONFIG_IP6_NF_TARGET_MASQUERADE=m
 CONFIG_IP6_NF_TARGET_NPT=m
+CONFIG_NF_TABLES_BRIDGE=m
 CONFIG_IP_DCCP=m
 # CONFIG_IP_DCCP_CCID3 is not set
 CONFIG_SCTP_COOKIE_HMAC_SHA1=y
@@ -195,8 +220,12 @@ CONFIG_RDS=m
 CONFIG_RDS_TCP=m
 CONFIG_L2TP=m
 CONFIG_ATALK=m
+CONFIG_DNS_RESOLVER=y
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
+CONFIG_BATMAN_ADV_NC=y
+CONFIG_NETLINK_DIAG=m
+CONFIG_NET_MPLS_GSO=m
 # CONFIG_WIRELESS is not set
 CONFIG_DEVTMPFS=y
 # CONFIG_FIRMWARE_IN_KERNEL is not set
@@ -215,6 +244,7 @@ CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
+CONFIG_DUMMY_IRQ=m
 CONFIG_IDE=y
 CONFIG_IDE_GD_ATAPI=y
 CONFIG_BLK_DEV_IDECD=y
@@ -261,6 +291,7 @@ CONFIG_EQUALIZER=m
 CONFIG_NET_TEAM=m
 CONFIG_NET_TEAM_MODE_BROADCAST=m
 CONFIG_NET_TEAM_MODE_ROUNDROBIN=m
+CONFIG_NET_TEAM_MODE_RANDOM=m
 CONFIG_NET_TEAM_MODE_ACTIVEBACKUP=m
 CONFIG_NET_TEAM_MODE_LOADBALANCE=m
 CONFIG_VXLAN=m
@@ -270,10 +301,10 @@ CONFIG_VETH=m
 # CONFIG_NET_VENDOR_3COM is not set
 CONFIG_A2065=y
 CONFIG_ARIADNE=y
+# CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
 # CONFIG_NET_VENDOR_CIRRUS is not set
-# CONFIG_NET_VENDOR_FUJITSU is not set
 # CONFIG_NET_VENDOR_HP is not set
 # CONFIG_NET_VENDOR_INTEL is not set
 # CONFIG_NET_VENDOR_MARVELL is not set
@@ -284,6 +315,7 @@ CONFIG_ZORRO8390=y
 # CONFIG_NET_VENDOR_SEEQ is not set
 # CONFIG_NET_VENDOR_SMSC is not set
 # CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
 CONFIG_PPP=m
 CONFIG_PPP_BSDCOMP=m
@@ -310,7 +342,6 @@ CONFIG_JOYSTICK_AMIGA=m
 CONFIG_INPUT_MISC=y
 CONFIG_INPUT_M68K_BEEP=m
 # CONFIG_SERIO is not set
-CONFIG_VT_HW_CONSOLE_BINDING=y
 # CONFIG_LEGACY_PTYS is not set
 # CONFIG_DEVKMEM is not set
 CONFIG_PRINTER=m
@@ -439,10 +470,10 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_STRING_HELPERS=m
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
-CONFIG_CRYPTO_NULL=m
 CONFIG_CRYPTO_CRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CCM=m
@@ -475,6 +506,8 @@ CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_ZLIB=m
 CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_LZ4=m
+CONFIG_CRYPTO_LZ4HC=m
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index bc3b9aef51f4..cb1f55df69b6 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -50,7 +50,6 @@ CONFIG_IP_PNP_RARP=y
 CONFIG_NET_IPIP=m
 CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
-CONFIG_SYN_COOKIES=y
 CONFIG_NET_IPVTI=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
@@ -61,11 +60,11 @@ CONFIG_INET_XFRM_MODE_BEET=m
 # CONFIG_INET_LRO is not set
 CONFIG_INET_DIAG=m
 CONFIG_INET_UDP_DIAG=m
-CONFIG_IPV6_PRIVACY=y
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
 CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_VTI=m
 CONFIG_IPV6_GRE=m
 CONFIG_NETFILTER=y
 CONFIG_NF_CONNTRACK=m
@@ -83,6 +82,17 @@ CONFIG_NF_CONNTRACK_PPTP=m
 CONFIG_NF_CONNTRACK_SANE=m
 CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NF_TABLES=m
+CONFIG_NFT_EXTHDR=m
+CONFIG_NFT_META=m
+CONFIG_NFT_CT=m
+CONFIG_NFT_RBTREE=m
+CONFIG_NFT_HASH=m
+CONFIG_NFT_COUNTER=m
+CONFIG_NFT_LOG=m
+CONFIG_NFT_LIMIT=m
+CONFIG_NFT_NAT=m
+CONFIG_NFT_COMPAT=m
 CONFIG_NETFILTER_XT_SET=m
 CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
 CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
@@ -96,6 +106,7 @@ CONFIG_NETFILTER_XT_TARGET_NFLOG=m
 CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
 CONFIG_NETFILTER_XT_TARGET_NOTRACK=m
 CONFIG_NETFILTER_XT_TARGET_TEE=m
+CONFIG_NETFILTER_XT_TARGET_TPROXY=m
 CONFIG_NETFILTER_XT_TARGET_TRACE=m
 CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
 CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
@@ -128,6 +139,7 @@ CONFIG_NETFILTER_XT_MATCH_QUOTA=m
 CONFIG_NETFILTER_XT_MATCH_RATEEST=m
 CONFIG_NETFILTER_XT_MATCH_REALM=m
 CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_SOCKET=m
 CONFIG_NETFILTER_XT_MATCH_STATE=m
 CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
 CONFIG_NETFILTER_XT_MATCH_STRING=m
@@ -142,11 +154,18 @@ CONFIG_IP_SET_HASH_IP=m
 CONFIG_IP_SET_HASH_IPPORT=m
 CONFIG_IP_SET_HASH_IPPORTIP=m
 CONFIG_IP_SET_HASH_IPPORTNET=m
+CONFIG_IP_SET_HASH_NETPORTNET=m
 CONFIG_IP_SET_HASH_NET=m
+CONFIG_IP_SET_HASH_NETNET=m
 CONFIG_IP_SET_HASH_NETPORT=m
 CONFIG_IP_SET_HASH_NETIFACE=m
 CONFIG_IP_SET_LIST_SET=m
 CONFIG_NF_CONNTRACK_IPV4=m
+CONFIG_NF_TABLES_IPV4=m
+CONFIG_NFT_REJECT_IPV4=m
+CONFIG_NFT_CHAIN_ROUTE_IPV4=m
+CONFIG_NFT_CHAIN_NAT_IPV4=m
+CONFIG_NF_TABLES_ARP=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -154,6 +173,7 @@ CONFIG_IP_NF_MATCH_RPFILTER=m
 CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_TARGET_SYNPROXY=m
 CONFIG_IP_NF_TARGET_ULOG=m
 CONFIG_NF_NAT_IPV4=m
 CONFIG_IP_NF_TARGET_MASQUERADE=m
@@ -168,6 +188,9 @@ CONFIG_IP_NF_ARPTABLES=m
 CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NF_CONNTRACK_IPV6=m
+CONFIG_NF_TABLES_IPV6=m
+CONFIG_NFT_CHAIN_ROUTE_IPV6=m
+CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -181,11 +204,13 @@ CONFIG_IP6_NF_MATCH_RT=m
 CONFIG_IP6_NF_TARGET_HL=m
 CONFIG_IP6_NF_FILTER=m
 CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_TARGET_SYNPROXY=m
 CONFIG_IP6_NF_MANGLE=m
 CONFIG_IP6_NF_RAW=m
 CONFIG_NF_NAT_IPV6=m
 CONFIG_IP6_NF_TARGET_MASQUERADE=m
 CONFIG_IP6_NF_TARGET_NPT=m
+CONFIG_NF_TABLES_BRIDGE=m
 CONFIG_IP_DCCP=m
 # CONFIG_IP_DCCP_CCID3 is not set
 CONFIG_SCTP_COOKIE_HMAC_SHA1=y
@@ -193,8 +218,12 @@ CONFIG_RDS=m
 CONFIG_RDS_TCP=m
 CONFIG_L2TP=m
 CONFIG_ATALK=m
+CONFIG_DNS_RESOLVER=y
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
+CONFIG_BATMAN_ADV_NC=y
+CONFIG_NETLINK_DIAG=m
+CONFIG_NET_MPLS_GSO=m
 # CONFIG_WIRELESS is not set
 CONFIG_DEVTMPFS=y
 # CONFIG_FIRMWARE_IN_KERNEL is not set
@@ -207,6 +236,7 @@ CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
+CONFIG_DUMMY_IRQ=m
 CONFIG_RAID_ATTRS=m
 CONFIG_SCSI=y
 CONFIG_SCSI_TGT=m
@@ -243,12 +273,14 @@ CONFIG_EQUALIZER=m
 CONFIG_NET_TEAM=m
 CONFIG_NET_TEAM_MODE_BROADCAST=m
 CONFIG_NET_TEAM_MODE_ROUNDROBIN=m
+CONFIG_NET_TEAM_MODE_RANDOM=m
 CONFIG_NET_TEAM_MODE_ACTIVEBACKUP=m
 CONFIG_NET_TEAM_MODE_LOADBALANCE=m
 CONFIG_VXLAN=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
 CONFIG_VETH=m
+# CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
 # CONFIG_NET_VENDOR_INTEL is not set
@@ -257,6 +289,7 @@ CONFIG_VETH=m
 # CONFIG_NET_VENDOR_NATSEMI is not set
 # CONFIG_NET_VENDOR_SEEQ is not set
 # CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
 CONFIG_PPP=m
 CONFIG_PPP_BSDCOMP=m
@@ -278,7 +311,6 @@ CONFIG_INPUT_EVDEV=m
 # CONFIG_MOUSE_PS2 is not set
 CONFIG_MOUSE_SERIAL=m
 CONFIG_SERIO=m
-CONFIG_VT_HW_CONSOLE_BINDING=y
 # CONFIG_LEGACY_PTYS is not set
 # CONFIG_DEVKMEM is not set
 # CONFIG_HW_RANDOM is not set
@@ -396,10 +428,10 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_STRING_HELPERS=m
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
-CONFIG_CRYPTO_NULL=m
 CONFIG_CRYPTO_CRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CCM=m
@@ -432,6 +464,8 @@ CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_ZLIB=m
 CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_LZ4=m
+CONFIG_CRYPTO_LZ4HC=m
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index 264c84447075..e880cfbb62d9 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -49,7 +49,6 @@ CONFIG_IP_PNP_RARP=y
 CONFIG_NET_IPIP=m
 CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
-CONFIG_SYN_COOKIES=y
 CONFIG_NET_IPVTI=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
@@ -60,11 +59,11 @@ CONFIG_INET_XFRM_MODE_BEET=m
 # CONFIG_INET_LRO is not set
 CONFIG_INET_DIAG=m
 CONFIG_INET_UDP_DIAG=m
-CONFIG_IPV6_PRIVACY=y
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
 CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_VTI=m
 CONFIG_IPV6_GRE=m
 CONFIG_NETFILTER=y
 CONFIG_NF_CONNTRACK=m
@@ -82,6 +81,17 @@ CONFIG_NF_CONNTRACK_PPTP=m
 CONFIG_NF_CONNTRACK_SANE=m
 CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NF_TABLES=m
+CONFIG_NFT_EXTHDR=m
+CONFIG_NFT_META=m
+CONFIG_NFT_CT=m
+CONFIG_NFT_RBTREE=m
+CONFIG_NFT_HASH=m
+CONFIG_NFT_COUNTER=m
+CONFIG_NFT_LOG=m
+CONFIG_NFT_LIMIT=m
+CONFIG_NFT_NAT=m
+CONFIG_NFT_COMPAT=m
 CONFIG_NETFILTER_XT_SET=m
 CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
 CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
@@ -95,6 +105,7 @@ CONFIG_NETFILTER_XT_TARGET_NFLOG=m
 CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
 CONFIG_NETFILTER_XT_TARGET_NOTRACK=m
 CONFIG_NETFILTER_XT_TARGET_TEE=m
+CONFIG_NETFILTER_XT_TARGET_TPROXY=m
 CONFIG_NETFILTER_XT_TARGET_TRACE=m
 CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
 CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
@@ -127,6 +138,7 @@ CONFIG_NETFILTER_XT_MATCH_QUOTA=m
 CONFIG_NETFILTER_XT_MATCH_RATEEST=m
 CONFIG_NETFILTER_XT_MATCH_REALM=m
 CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_SOCKET=m
 CONFIG_NETFILTER_XT_MATCH_STATE=m
 CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
 CONFIG_NETFILTER_XT_MATCH_STRING=m
@@ -141,11 +153,18 @@ CONFIG_IP_SET_HASH_IP=m
 CONFIG_IP_SET_HASH_IPPORT=m
 CONFIG_IP_SET_HASH_IPPORTIP=m
 CONFIG_IP_SET_HASH_IPPORTNET=m
+CONFIG_IP_SET_HASH_NETPORTNET=m
 CONFIG_IP_SET_HASH_NET=m
+CONFIG_IP_SET_HASH_NETNET=m
 CONFIG_IP_SET_HASH_NETPORT=m
 CONFIG_IP_SET_HASH_NETIFACE=m
 CONFIG_IP_SET_LIST_SET=m
 CONFIG_NF_CONNTRACK_IPV4=m
+CONFIG_NF_TABLES_IPV4=m
+CONFIG_NFT_REJECT_IPV4=m
+CONFIG_NFT_CHAIN_ROUTE_IPV4=m
+CONFIG_NFT_CHAIN_NAT_IPV4=m
+CONFIG_NF_TABLES_ARP=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -153,6 +172,7 @@ CONFIG_IP_NF_MATCH_RPFILTER=m
 CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_TARGET_SYNPROXY=m
 CONFIG_IP_NF_TARGET_ULOG=m
 CONFIG_NF_NAT_IPV4=m
 CONFIG_IP_NF_TARGET_MASQUERADE=m
@@ -167,6 +187,9 @@ CONFIG_IP_NF_ARPTABLES=m
 CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NF_CONNTRACK_IPV6=m
+CONFIG_NF_TABLES_IPV6=m
+CONFIG_NFT_CHAIN_ROUTE_IPV6=m
+CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -180,11 +203,13 @@ CONFIG_IP6_NF_MATCH_RT=m
 CONFIG_IP6_NF_TARGET_HL=m
 CONFIG_IP6_NF_FILTER=m
 CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_TARGET_SYNPROXY=m
 CONFIG_IP6_NF_MANGLE=m
 CONFIG_IP6_NF_RAW=m
 CONFIG_NF_NAT_IPV6=m
 CONFIG_IP6_NF_TARGET_MASQUERADE=m
 CONFIG_IP6_NF_TARGET_NPT=m
+CONFIG_NF_TABLES_BRIDGE=m
 CONFIG_IP_DCCP=m
 # CONFIG_IP_DCCP_CCID3 is not set
 CONFIG_SCTP_COOKIE_HMAC_SHA1=y
@@ -192,8 +217,12 @@ CONFIG_RDS=m
 CONFIG_RDS_TCP=m
 CONFIG_L2TP=m
 CONFIG_ATALK=m
+CONFIG_DNS_RESOLVER=y
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
+CONFIG_BATMAN_ADV_NC=y
+CONFIG_NETLINK_DIAG=m
+CONFIG_NET_MPLS_GSO=m
 # CONFIG_WIRELESS is not set
 CONFIG_DEVTMPFS=y
 # CONFIG_FIRMWARE_IN_KERNEL is not set
@@ -210,6 +239,7 @@ CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
+CONFIG_DUMMY_IRQ=m
 CONFIG_IDE=y
 CONFIG_IDE_GD_ATAPI=y
 CONFIG_BLK_DEV_IDECD=y
@@ -248,10 +278,10 @@ CONFIG_TCM_PSCSI=m
 CONFIG_NETDEVICES=y
 CONFIG_DUMMY=m
 CONFIG_EQUALIZER=m
-CONFIG_MII=y
 CONFIG_NET_TEAM=m
 CONFIG_NET_TEAM_MODE_BROADCAST=m
 CONFIG_NET_TEAM_MODE_ROUNDROBIN=m
+CONFIG_NET_TEAM_MODE_RANDOM=m
 CONFIG_NET_TEAM_MODE_ACTIVEBACKUP=m
 CONFIG_NET_TEAM_MODE_LOADBALANCE=m
 CONFIG_VXLAN=m
@@ -259,6 +289,7 @@ CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
 CONFIG_VETH=m
 CONFIG_ATARILANCE=y
+# CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
 # CONFIG_NET_VENDOR_INTEL is not set
@@ -266,6 +297,7 @@ CONFIG_ATARILANCE=y
 # CONFIG_NET_VENDOR_MICREL is not set
 # CONFIG_NET_VENDOR_SEEQ is not set
 # CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
 CONFIG_PPP=m
 CONFIG_PPP_BSDCOMP=m
@@ -290,7 +322,6 @@ CONFIG_MOUSE_ATARI=m
 CONFIG_INPUT_MISC=y
 CONFIG_INPUT_M68K_BEEP=m
 # CONFIG_SERIO is not set
-CONFIG_VT_HW_CONSOLE_BINDING=y
 # CONFIG_LEGACY_PTYS is not set
 # CONFIG_DEVKMEM is not set
 CONFIG_PRINTER=m
@@ -414,10 +445,10 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_STRING_HELPERS=m
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
-CONFIG_CRYPTO_NULL=m
 CONFIG_CRYPTO_CRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CCM=m
@@ -450,6 +481,8 @@ CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_ZLIB=m
 CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_LZ4=m
+CONFIG_CRYPTO_LZ4HC=m
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index 2462c9325b1f..4aa4f45e52a8 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -48,7 +48,6 @@ CONFIG_IP_PNP_RARP=y
 CONFIG_NET_IPIP=m
 CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
-CONFIG_SYN_COOKIES=y
 CONFIG_NET_IPVTI=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
@@ -59,11 +58,11 @@ CONFIG_INET_XFRM_MODE_BEET=m
 # CONFIG_INET_LRO is not set
 CONFIG_INET_DIAG=m
 CONFIG_INET_UDP_DIAG=m
-CONFIG_IPV6_PRIVACY=y
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
 CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_VTI=m
 CONFIG_IPV6_GRE=m
 CONFIG_NETFILTER=y
 CONFIG_NF_CONNTRACK=m
@@ -81,6 +80,17 @@ CONFIG_NF_CONNTRACK_PPTP=m
 CONFIG_NF_CONNTRACK_SANE=m
 CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NF_TABLES=m
+CONFIG_NFT_EXTHDR=m
+CONFIG_NFT_META=m
+CONFIG_NFT_CT=m
+CONFIG_NFT_RBTREE=m
+CONFIG_NFT_HASH=m
+CONFIG_NFT_COUNTER=m
+CONFIG_NFT_LOG=m
+CONFIG_NFT_LIMIT=m
+CONFIG_NFT_NAT=m
+CONFIG_NFT_COMPAT=m
 CONFIG_NETFILTER_XT_SET=m
 CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
 CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
@@ -94,6 +104,7 @@ CONFIG_NETFILTER_XT_TARGET_NFLOG=m
 CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
 CONFIG_NETFILTER_XT_TARGET_NOTRACK=m
 CONFIG_NETFILTER_XT_TARGET_TEE=m
+CONFIG_NETFILTER_XT_TARGET_TPROXY=m
 CONFIG_NETFILTER_XT_TARGET_TRACE=m
 CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
 CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
@@ -126,6 +137,7 @@ CONFIG_NETFILTER_XT_MATCH_QUOTA=m
 CONFIG_NETFILTER_XT_MATCH_RATEEST=m
 CONFIG_NETFILTER_XT_MATCH_REALM=m
 CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_SOCKET=m
 CONFIG_NETFILTER_XT_MATCH_STATE=m
 CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
 CONFIG_NETFILTER_XT_MATCH_STRING=m
@@ -140,11 +152,18 @@ CONFIG_IP_SET_HASH_IP=m
 CONFIG_IP_SET_HASH_IPPORT=m
 CONFIG_IP_SET_HASH_IPPORTIP=m
 CONFIG_IP_SET_HASH_IPPORTNET=m
+CONFIG_IP_SET_HASH_NETPORTNET=m
 CONFIG_IP_SET_HASH_NET=m
+CONFIG_IP_SET_HASH_NETNET=m
 CONFIG_IP_SET_HASH_NETPORT=m
 CONFIG_IP_SET_HASH_NETIFACE=m
 CONFIG_IP_SET_LIST_SET=m
 CONFIG_NF_CONNTRACK_IPV4=m
+CONFIG_NF_TABLES_IPV4=m
+CONFIG_NFT_REJECT_IPV4=m
+CONFIG_NFT_CHAIN_ROUTE_IPV4=m
+CONFIG_NFT_CHAIN_NAT_IPV4=m
+CONFIG_NF_TABLES_ARP=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -152,6 +171,7 @@ CONFIG_IP_NF_MATCH_RPFILTER=m
 CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_TARGET_SYNPROXY=m
 CONFIG_IP_NF_TARGET_ULOG=m
 CONFIG_NF_NAT_IPV4=m
 CONFIG_IP_NF_TARGET_MASQUERADE=m
@@ -166,6 +186,9 @@ CONFIG_IP_NF_ARPTABLES=m
 CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NF_CONNTRACK_IPV6=m
+CONFIG_NF_TABLES_IPV6=m
+CONFIG_NFT_CHAIN_ROUTE_IPV6=m
+CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -179,11 +202,13 @@ CONFIG_IP6_NF_MATCH_RT=m
 CONFIG_IP6_NF_TARGET_HL=m
 CONFIG_IP6_NF_FILTER=m
 CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_TARGET_SYNPROXY=m
 CONFIG_IP6_NF_MANGLE=m
 CONFIG_IP6_NF_RAW=m
 CONFIG_NF_NAT_IPV6=m
 CONFIG_IP6_NF_TARGET_MASQUERADE=m
 CONFIG_IP6_NF_TARGET_NPT=m
+CONFIG_NF_TABLES_BRIDGE=m
 CONFIG_IP_DCCP=m
 # CONFIG_IP_DCCP_CCID3 is not set
 CONFIG_SCTP_COOKIE_HMAC_SHA1=y
@@ -191,8 +216,12 @@ CONFIG_RDS=m
 CONFIG_RDS_TCP=m
 CONFIG_L2TP=m
 CONFIG_ATALK=m
+CONFIG_DNS_RESOLVER=y
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
+CONFIG_BATMAN_ADV_NC=y
+CONFIG_NETLINK_DIAG=m
+CONFIG_NET_MPLS_GSO=m
 # CONFIG_WIRELESS is not set
 CONFIG_DEVTMPFS=y
 # CONFIG_FIRMWARE_IN_KERNEL is not set
@@ -205,6 +234,7 @@ CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
+CONFIG_DUMMY_IRQ=m
 CONFIG_RAID_ATTRS=m
 CONFIG_SCSI=y
 CONFIG_SCSI_TGT=m
@@ -242,12 +272,14 @@ CONFIG_EQUALIZER=m
 CONFIG_NET_TEAM=m
 CONFIG_NET_TEAM_MODE_BROADCAST=m
 CONFIG_NET_TEAM_MODE_ROUNDROBIN=m
+CONFIG_NET_TEAM_MODE_RANDOM=m
 CONFIG_NET_TEAM_MODE_ACTIVEBACKUP=m
 CONFIG_NET_TEAM_MODE_LOADBALANCE=m
 CONFIG_VXLAN=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
 CONFIG_VETH=m
+# CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
 CONFIG_BVME6000_NET=y
@@ -256,6 +288,7 @@ CONFIG_BVME6000_NET=y
 # CONFIG_NET_VENDOR_NATSEMI is not set
 # CONFIG_NET_VENDOR_SEEQ is not set
 # CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
 CONFIG_PPP=m
 CONFIG_PPP_BSDCOMP=m
@@ -388,10 +421,10 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_STRING_HELPERS=m
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
-CONFIG_CRYPTO_NULL=m
 CONFIG_CRYPTO_CRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CCM=m
@@ -424,6 +457,8 @@ CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_ZLIB=m
 CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_LZ4=m
+CONFIG_CRYPTO_LZ4HC=m
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index 9e6d58204fbc..7cd9d9f456fb 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -50,7 +50,6 @@ CONFIG_IP_PNP_RARP=y
 CONFIG_NET_IPIP=m
 CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
-CONFIG_SYN_COOKIES=y
 CONFIG_NET_IPVTI=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
@@ -61,11 +60,11 @@ CONFIG_INET_XFRM_MODE_BEET=m
 # CONFIG_INET_LRO is not set
 CONFIG_INET_DIAG=m
 CONFIG_INET_UDP_DIAG=m
-CONFIG_IPV6_PRIVACY=y
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
 CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_VTI=m
 CONFIG_IPV6_GRE=m
 CONFIG_NETFILTER=y
 CONFIG_NF_CONNTRACK=m
@@ -83,6 +82,17 @@ CONFIG_NF_CONNTRACK_PPTP=m
 CONFIG_NF_CONNTRACK_SANE=m
 CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NF_TABLES=m
+CONFIG_NFT_EXTHDR=m
+CONFIG_NFT_META=m
+CONFIG_NFT_CT=m
+CONFIG_NFT_RBTREE=m
+CONFIG_NFT_HASH=m
+CONFIG_NFT_COUNTER=m
+CONFIG_NFT_LOG=m
+CONFIG_NFT_LIMIT=m
+CONFIG_NFT_NAT=m
+CONFIG_NFT_COMPAT=m
 CONFIG_NETFILTER_XT_SET=m
 CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
 CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
@@ -96,6 +106,7 @@ CONFIG_NETFILTER_XT_TARGET_NFLOG=m
 CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
 CONFIG_NETFILTER_XT_TARGET_NOTRACK=m
 CONFIG_NETFILTER_XT_TARGET_TEE=m
+CONFIG_NETFILTER_XT_TARGET_TPROXY=m
 CONFIG_NETFILTER_XT_TARGET_TRACE=m
 CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
 CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
@@ -128,6 +139,7 @@ CONFIG_NETFILTER_XT_MATCH_QUOTA=m
 CONFIG_NETFILTER_XT_MATCH_RATEEST=m
 CONFIG_NETFILTER_XT_MATCH_REALM=m
 CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_SOCKET=m
 CONFIG_NETFILTER_XT_MATCH_STATE=m
 CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
 CONFIG_NETFILTER_XT_MATCH_STRING=m
@@ -142,11 +154,18 @@ CONFIG_IP_SET_HASH_IP=m
 CONFIG_IP_SET_HASH_IPPORT=m
 CONFIG_IP_SET_HASH_IPPORTIP=m
 CONFIG_IP_SET_HASH_IPPORTNET=m
+CONFIG_IP_SET_HASH_NETPORTNET=m
 CONFIG_IP_SET_HASH_NET=m
+CONFIG_IP_SET_HASH_NETNET=m
 CONFIG_IP_SET_HASH_NETPORT=m
 CONFIG_IP_SET_HASH_NETIFACE=m
 CONFIG_IP_SET_LIST_SET=m
 CONFIG_NF_CONNTRACK_IPV4=m
+CONFIG_NF_TABLES_IPV4=m
+CONFIG_NFT_REJECT_IPV4=m
+CONFIG_NFT_CHAIN_ROUTE_IPV4=m
+CONFIG_NFT_CHAIN_NAT_IPV4=m
+CONFIG_NF_TABLES_ARP=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -154,6 +173,7 @@ CONFIG_IP_NF_MATCH_RPFILTER=m
 CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_TARGET_SYNPROXY=m
 CONFIG_IP_NF_TARGET_ULOG=m
 CONFIG_NF_NAT_IPV4=m
 CONFIG_IP_NF_TARGET_MASQUERADE=m
@@ -168,6 +188,9 @@ CONFIG_IP_NF_ARPTABLES=m
 CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NF_CONNTRACK_IPV6=m
+CONFIG_NF_TABLES_IPV6=m
+CONFIG_NFT_CHAIN_ROUTE_IPV6=m
+CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -181,11 +204,13 @@ CONFIG_IP6_NF_MATCH_RT=m
 CONFIG_IP6_NF_TARGET_HL=m
 CONFIG_IP6_NF_FILTER=m
 CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_TARGET_SYNPROXY=m
 CONFIG_IP6_NF_MANGLE=m
 CONFIG_IP6_NF_RAW=m
 CONFIG_NF_NAT_IPV6=m
 CONFIG_IP6_NF_TARGET_MASQUERADE=m
 CONFIG_IP6_NF_TARGET_NPT=m
+CONFIG_NF_TABLES_BRIDGE=m
 CONFIG_IP_DCCP=m
 # CONFIG_IP_DCCP_CCID3 is not set
 CONFIG_SCTP_COOKIE_HMAC_SHA1=y
@@ -193,8 +218,12 @@ CONFIG_RDS=m
 CONFIG_RDS_TCP=m
 CONFIG_L2TP=m
 CONFIG_ATALK=m
+CONFIG_DNS_RESOLVER=y
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
+CONFIG_BATMAN_ADV_NC=y
+CONFIG_NETLINK_DIAG=m
+CONFIG_NET_MPLS_GSO=m
 # CONFIG_WIRELESS is not set
 CONFIG_DEVTMPFS=y
 # CONFIG_FIRMWARE_IN_KERNEL is not set
@@ -207,6 +236,7 @@ CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
+CONFIG_DUMMY_IRQ=m
 CONFIG_RAID_ATTRS=m
 CONFIG_SCSI=y
 CONFIG_SCSI_TGT=m
@@ -243,6 +273,7 @@ CONFIG_EQUALIZER=m
 CONFIG_NET_TEAM=m
 CONFIG_NET_TEAM_MODE_BROADCAST=m
 CONFIG_NET_TEAM_MODE_ROUNDROBIN=m
+CONFIG_NET_TEAM_MODE_RANDOM=m
 CONFIG_NET_TEAM_MODE_ACTIVEBACKUP=m
 CONFIG_NET_TEAM_MODE_LOADBALANCE=m
 CONFIG_VXLAN=m
@@ -250,6 +281,7 @@ CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
 CONFIG_VETH=m
 CONFIG_HPLANCE=y
+# CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
 # CONFIG_NET_VENDOR_INTEL is not set
@@ -258,6 +290,7 @@ CONFIG_HPLANCE=y
 # CONFIG_NET_VENDOR_NATSEMI is not set
 # CONFIG_NET_VENDOR_SEEQ is not set
 # CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
 CONFIG_PPP=m
 CONFIG_PPP_BSDCOMP=m
@@ -281,7 +314,6 @@ CONFIG_MOUSE_SERIAL=m
 CONFIG_INPUT_MISC=y
 CONFIG_HP_SDC_RTC=m
 CONFIG_SERIO_SERPORT=m
-CONFIG_VT_HW_CONSOLE_BINDING=y
 # CONFIG_LEGACY_PTYS is not set
 # CONFIG_DEVKMEM is not set
 # CONFIG_HW_RANDOM is not set
@@ -398,10 +430,10 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_STRING_HELPERS=m
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
-CONFIG_CRYPTO_NULL=m
 CONFIG_CRYPTO_CRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CCM=m
@@ -434,6 +466,8 @@ CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_ZLIB=m
 CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_LZ4=m
+CONFIG_CRYPTO_LZ4HC=m
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index 2d1e468bece2..31f5bd061d14 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -49,7 +49,6 @@ CONFIG_IP_PNP_RARP=y
 CONFIG_NET_IPIP=m
 CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
-CONFIG_SYN_COOKIES=y
 CONFIG_NET_IPVTI=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
@@ -60,11 +59,11 @@ CONFIG_INET_XFRM_MODE_BEET=m
 # CONFIG_INET_LRO is not set
 CONFIG_INET_DIAG=m
 CONFIG_INET_UDP_DIAG=m
-CONFIG_IPV6_PRIVACY=y
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
 CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_VTI=m
 CONFIG_IPV6_GRE=m
 CONFIG_NETFILTER=y
 CONFIG_NF_CONNTRACK=m
@@ -82,6 +81,17 @@ CONFIG_NF_CONNTRACK_PPTP=m
 CONFIG_NF_CONNTRACK_SANE=m
 CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NF_TABLES=m
+CONFIG_NFT_EXTHDR=m
+CONFIG_NFT_META=m
+CONFIG_NFT_CT=m
+CONFIG_NFT_RBTREE=m
+CONFIG_NFT_HASH=m
+CONFIG_NFT_COUNTER=m
+CONFIG_NFT_LOG=m
+CONFIG_NFT_LIMIT=m
+CONFIG_NFT_NAT=m
+CONFIG_NFT_COMPAT=m
 CONFIG_NETFILTER_XT_SET=m
 CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
 CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
@@ -95,6 +105,7 @@ CONFIG_NETFILTER_XT_TARGET_NFLOG=m
 CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
 CONFIG_NETFILTER_XT_TARGET_NOTRACK=m
 CONFIG_NETFILTER_XT_TARGET_TEE=m
+CONFIG_NETFILTER_XT_TARGET_TPROXY=m
 CONFIG_NETFILTER_XT_TARGET_TRACE=m
 CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
 CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
@@ -127,6 +138,7 @@ CONFIG_NETFILTER_XT_MATCH_QUOTA=m
 CONFIG_NETFILTER_XT_MATCH_RATEEST=m
 CONFIG_NETFILTER_XT_MATCH_REALM=m
 CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_SOCKET=m
 CONFIG_NETFILTER_XT_MATCH_STATE=m
 CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
 CONFIG_NETFILTER_XT_MATCH_STRING=m
@@ -141,11 +153,18 @@ CONFIG_IP_SET_HASH_IP=m
 CONFIG_IP_SET_HASH_IPPORT=m
 CONFIG_IP_SET_HASH_IPPORTIP=m
 CONFIG_IP_SET_HASH_IPPORTNET=m
+CONFIG_IP_SET_HASH_NETPORTNET=m
 CONFIG_IP_SET_HASH_NET=m
+CONFIG_IP_SET_HASH_NETNET=m
 CONFIG_IP_SET_HASH_NETPORT=m
 CONFIG_IP_SET_HASH_NETIFACE=m
 CONFIG_IP_SET_LIST_SET=m
 CONFIG_NF_CONNTRACK_IPV4=m
+CONFIG_NF_TABLES_IPV4=m
+CONFIG_NFT_REJECT_IPV4=m
+CONFIG_NFT_CHAIN_ROUTE_IPV4=m
+CONFIG_NFT_CHAIN_NAT_IPV4=m
+CONFIG_NF_TABLES_ARP=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -153,6 +172,7 @@ CONFIG_IP_NF_MATCH_RPFILTER=m
 CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_TARGET_SYNPROXY=m
 CONFIG_IP_NF_TARGET_ULOG=m
 CONFIG_NF_NAT_IPV4=m
 CONFIG_IP_NF_TARGET_MASQUERADE=m
@@ -167,6 +187,9 @@ CONFIG_IP_NF_ARPTABLES=m
 CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NF_CONNTRACK_IPV6=m
+CONFIG_NF_TABLES_IPV6=m
+CONFIG_NFT_CHAIN_ROUTE_IPV6=m
+CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -180,11 +203,13 @@ CONFIG_IP6_NF_MATCH_RT=m
 CONFIG_IP6_NF_TARGET_HL=m
 CONFIG_IP6_NF_FILTER=m
 CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_TARGET_SYNPROXY=m
 CONFIG_IP6_NF_MANGLE=m
 CONFIG_IP6_NF_RAW=m
 CONFIG_NF_NAT_IPV6=m
 CONFIG_IP6_NF_TARGET_MASQUERADE=m
 CONFIG_IP6_NF_TARGET_NPT=m
+CONFIG_NF_TABLES_BRIDGE=m
 CONFIG_IP_DCCP=m
 # CONFIG_IP_DCCP_CCID3 is not set
 CONFIG_SCTP_COOKIE_HMAC_SHA1=y
@@ -195,9 +220,12 @@ CONFIG_ATALK=m
 CONFIG_DEV_APPLETALK=m
 CONFIG_IPDDP=m
 CONFIG_IPDDP_ENCAP=y
-CONFIG_IPDDP_DECAP=y
+CONFIG_DNS_RESOLVER=y
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
+CONFIG_BATMAN_ADV_NC=y
+CONFIG_NETLINK_DIAG=m
+CONFIG_NET_MPLS_GSO=m
 # CONFIG_WIRELESS is not set
 CONFIG_DEVTMPFS=y
 # CONFIG_FIRMWARE_IN_KERNEL is not set
@@ -211,6 +239,7 @@ CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
+CONFIG_DUMMY_IRQ=m
 CONFIG_IDE=y
 CONFIG_IDE_GD_ATAPI=y
 CONFIG_BLK_DEV_IDECD=y
@@ -260,6 +289,7 @@ CONFIG_EQUALIZER=m
 CONFIG_NET_TEAM=m
 CONFIG_NET_TEAM_MODE_BROADCAST=m
 CONFIG_NET_TEAM_MODE_ROUNDROBIN=m
+CONFIG_NET_TEAM_MODE_RANDOM=m
 CONFIG_NET_TEAM_MODE_ACTIVEBACKUP=m
 CONFIG_NET_TEAM_MODE_LOADBALANCE=m
 CONFIG_VXLAN=m
@@ -267,6 +297,7 @@ CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
 CONFIG_VETH=m
 CONFIG_MACMACE=y
+# CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
 CONFIG_MAC89x0=y
@@ -278,6 +309,7 @@ CONFIG_MAC8390=y
 # CONFIG_NET_VENDOR_SEEQ is not set
 # CONFIG_NET_VENDOR_SMSC is not set
 # CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
 CONFIG_PPP=m
 CONFIG_PPP_BSDCOMP=m
@@ -301,7 +333,6 @@ CONFIG_MOUSE_SERIAL=m
 CONFIG_INPUT_MISC=y
 CONFIG_INPUT_M68K_BEEP=m
 CONFIG_SERIO=m
-CONFIG_VT_HW_CONSOLE_BINDING=y
 # CONFIG_LEGACY_PTYS is not set
 # CONFIG_DEVKMEM is not set
 CONFIG_SERIAL_PMACZILOG=y
@@ -421,11 +452,11 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_STRING_HELPERS=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
-CONFIG_CRYPTO_NULL=m
 CONFIG_CRYPTO_CRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CCM=m
@@ -458,6 +489,8 @@ CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_ZLIB=m
 CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_LZ4=m
+CONFIG_CRYPTO_LZ4HC=m
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index f83b2c4a3d68..4e5adff326ee 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -58,7 +58,6 @@ CONFIG_IP_PNP_RARP=y
 CONFIG_NET_IPIP=m
 CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
-CONFIG_SYN_COOKIES=y
 CONFIG_NET_IPVTI=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
@@ -69,11 +68,11 @@ CONFIG_INET_XFRM_MODE_BEET=m
 # CONFIG_INET_LRO is not set
 CONFIG_INET_DIAG=m
 CONFIG_INET_UDP_DIAG=m
-CONFIG_IPV6_PRIVACY=y
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
 CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_VTI=m
 CONFIG_IPV6_GRE=m
 CONFIG_NETFILTER=y
 CONFIG_NF_CONNTRACK=m
@@ -91,6 +90,17 @@ CONFIG_NF_CONNTRACK_PPTP=m
 CONFIG_NF_CONNTRACK_SANE=m
 CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NF_TABLES=m
+CONFIG_NFT_EXTHDR=m
+CONFIG_NFT_META=m
+CONFIG_NFT_CT=m
+CONFIG_NFT_RBTREE=m
+CONFIG_NFT_HASH=m
+CONFIG_NFT_COUNTER=m
+CONFIG_NFT_LOG=m
+CONFIG_NFT_LIMIT=m
+CONFIG_NFT_NAT=m
+CONFIG_NFT_COMPAT=m
 CONFIG_NETFILTER_XT_SET=m
 CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
 CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
@@ -104,6 +114,7 @@ CONFIG_NETFILTER_XT_TARGET_NFLOG=m
 CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
 CONFIG_NETFILTER_XT_TARGET_NOTRACK=m
 CONFIG_NETFILTER_XT_TARGET_TEE=m
+CONFIG_NETFILTER_XT_TARGET_TPROXY=m
 CONFIG_NETFILTER_XT_TARGET_TRACE=m
 CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
 CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
@@ -136,6 +147,7 @@ CONFIG_NETFILTER_XT_MATCH_QUOTA=m
 CONFIG_NETFILTER_XT_MATCH_RATEEST=m
 CONFIG_NETFILTER_XT_MATCH_REALM=m
 CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_SOCKET=m
 CONFIG_NETFILTER_XT_MATCH_STATE=m
 CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
 CONFIG_NETFILTER_XT_MATCH_STRING=m
@@ -150,11 +162,18 @@ CONFIG_IP_SET_HASH_IP=m
 CONFIG_IP_SET_HASH_IPPORT=m
 CONFIG_IP_SET_HASH_IPPORTIP=m
 CONFIG_IP_SET_HASH_IPPORTNET=m
+CONFIG_IP_SET_HASH_NETPORTNET=m
 CONFIG_IP_SET_HASH_NET=m
+CONFIG_IP_SET_HASH_NETNET=m
 CONFIG_IP_SET_HASH_NETPORT=m
 CONFIG_IP_SET_HASH_NETIFACE=m
 CONFIG_IP_SET_LIST_SET=m
 CONFIG_NF_CONNTRACK_IPV4=m
+CONFIG_NF_TABLES_IPV4=m
+CONFIG_NFT_REJECT_IPV4=m
+CONFIG_NFT_CHAIN_ROUTE_IPV4=m
+CONFIG_NFT_CHAIN_NAT_IPV4=m
+CONFIG_NF_TABLES_ARP=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -162,6 +181,7 @@ CONFIG_IP_NF_MATCH_RPFILTER=m
 CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_TARGET_SYNPROXY=m
 CONFIG_IP_NF_TARGET_ULOG=m
 CONFIG_NF_NAT_IPV4=m
 CONFIG_IP_NF_TARGET_MASQUERADE=m
@@ -176,6 +196,9 @@ CONFIG_IP_NF_ARPTABLES=m
 CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NF_CONNTRACK_IPV6=m
+CONFIG_NF_TABLES_IPV6=m
+CONFIG_NFT_CHAIN_ROUTE_IPV6=m
+CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -189,11 +212,13 @@ CONFIG_IP6_NF_MATCH_RT=m
 CONFIG_IP6_NF_TARGET_HL=m
 CONFIG_IP6_NF_FILTER=m
 CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_TARGET_SYNPROXY=m
 CONFIG_IP6_NF_MANGLE=m
 CONFIG_IP6_NF_RAW=m
 CONFIG_NF_NAT_IPV6=m
 CONFIG_IP6_NF_TARGET_MASQUERADE=m
 CONFIG_IP6_NF_TARGET_NPT=m
+CONFIG_NF_TABLES_BRIDGE=m
 CONFIG_IP_DCCP=m
 # CONFIG_IP_DCCP_CCID3 is not set
 CONFIG_SCTP_COOKIE_HMAC_SHA1=y
@@ -204,9 +229,12 @@ CONFIG_ATALK=m
 CONFIG_DEV_APPLETALK=m
 CONFIG_IPDDP=m
 CONFIG_IPDDP_ENCAP=y
-CONFIG_IPDDP_DECAP=y
+CONFIG_DNS_RESOLVER=y
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
+CONFIG_BATMAN_ADV_NC=y
+CONFIG_NETLINK_DIAG=m
+CONFIG_NET_MPLS_GSO=m
 # CONFIG_WIRELESS is not set
 CONFIG_DEVTMPFS=y
 # CONFIG_FIRMWARE_IN_KERNEL is not set
@@ -229,6 +257,7 @@ CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
+CONFIG_DUMMY_IRQ=m
 CONFIG_IDE=y
 CONFIG_IDE_GD_ATAPI=y
 CONFIG_BLK_DEV_IDECD=y
@@ -289,10 +318,10 @@ CONFIG_MAC_EMUMOUSEBTN=y
 CONFIG_NETDEVICES=y
 CONFIG_DUMMY=m
 CONFIG_EQUALIZER=m
-CONFIG_MII=y
 CONFIG_NET_TEAM=m
 CONFIG_NET_TEAM_MODE_BROADCAST=m
 CONFIG_NET_TEAM_MODE_ROUNDROBIN=m
+CONFIG_NET_TEAM_MODE_RANDOM=m
 CONFIG_NET_TEAM_MODE_ACTIVEBACKUP=m
 CONFIG_NET_TEAM_MODE_LOADBALANCE=m
 CONFIG_VXLAN=m
@@ -307,10 +336,10 @@ CONFIG_HPLANCE=y
 CONFIG_MVME147_NET=y
 CONFIG_SUN3LANCE=y
 CONFIG_MACMACE=y
+# CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
 CONFIG_MAC89x0=y
-# CONFIG_NET_VENDOR_FUJITSU is not set
 # CONFIG_NET_VENDOR_HP is not set
 CONFIG_BVME6000_NET=y
 CONFIG_MVME16x_NET=y
@@ -324,6 +353,7 @@ CONFIG_APNE=y
 CONFIG_ZORRO8390=y
 # CONFIG_NET_VENDOR_SEEQ is not set
 # CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
 CONFIG_PLIP=m
 CONFIG_PPP=m
@@ -356,7 +386,6 @@ CONFIG_INPUT_MISC=y
 CONFIG_INPUT_M68K_BEEP=m
 CONFIG_HP_SDC_RTC=m
 CONFIG_SERIO_Q40KBD=y
-CONFIG_VT_HW_CONSOLE_BINDING=y
 # CONFIG_LEGACY_PTYS is not set
 # CONFIG_DEVKMEM is not set
 CONFIG_SERIAL_PMACZILOG=y
@@ -499,11 +528,11 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_STRING_HELPERS=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
-CONFIG_CRYPTO_NULL=m
 CONFIG_CRYPTO_CRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CCM=m
@@ -536,6 +565,8 @@ CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_ZLIB=m
 CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_LZ4=m
+CONFIG_CRYPTO_LZ4HC=m
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index 6df0acd9c2e5..02cdbac5565e 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -47,7 +47,6 @@ CONFIG_IP_PNP_RARP=y
 CONFIG_NET_IPIP=m
 CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
-CONFIG_SYN_COOKIES=y
 CONFIG_NET_IPVTI=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
@@ -58,11 +57,11 @@ CONFIG_INET_XFRM_MODE_BEET=m
 # CONFIG_INET_LRO is not set
 CONFIG_INET_DIAG=m
 CONFIG_INET_UDP_DIAG=m
-CONFIG_IPV6_PRIVACY=y
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
 CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_VTI=m
 CONFIG_IPV6_GRE=m
 CONFIG_NETFILTER=y
 CONFIG_NF_CONNTRACK=m
@@ -80,6 +79,17 @@ CONFIG_NF_CONNTRACK_PPTP=m
 CONFIG_NF_CONNTRACK_SANE=m
 CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NF_TABLES=m
+CONFIG_NFT_EXTHDR=m
+CONFIG_NFT_META=m
+CONFIG_NFT_CT=m
+CONFIG_NFT_RBTREE=m
+CONFIG_NFT_HASH=m
+CONFIG_NFT_COUNTER=m
+CONFIG_NFT_LOG=m
+CONFIG_NFT_LIMIT=m
+CONFIG_NFT_NAT=m
+CONFIG_NFT_COMPAT=m
 CONFIG_NETFILTER_XT_SET=m
 CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
 CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
@@ -93,6 +103,7 @@ CONFIG_NETFILTER_XT_TARGET_NFLOG=m
 CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
 CONFIG_NETFILTER_XT_TARGET_NOTRACK=m
 CONFIG_NETFILTER_XT_TARGET_TEE=m
+CONFIG_NETFILTER_XT_TARGET_TPROXY=m
 CONFIG_NETFILTER_XT_TARGET_TRACE=m
 CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
 CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
@@ -125,6 +136,7 @@ CONFIG_NETFILTER_XT_MATCH_QUOTA=m
 CONFIG_NETFILTER_XT_MATCH_RATEEST=m
 CONFIG_NETFILTER_XT_MATCH_REALM=m
 CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_SOCKET=m
 CONFIG_NETFILTER_XT_MATCH_STATE=m
 CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
 CONFIG_NETFILTER_XT_MATCH_STRING=m
@@ -139,11 +151,18 @@ CONFIG_IP_SET_HASH_IP=m
 CONFIG_IP_SET_HASH_IPPORT=m
 CONFIG_IP_SET_HASH_IPPORTIP=m
 CONFIG_IP_SET_HASH_IPPORTNET=m
+CONFIG_IP_SET_HASH_NETPORTNET=m
 CONFIG_IP_SET_HASH_NET=m
+CONFIG_IP_SET_HASH_NETNET=m
 CONFIG_IP_SET_HASH_NETPORT=m
 CONFIG_IP_SET_HASH_NETIFACE=m
 CONFIG_IP_SET_LIST_SET=m
 CONFIG_NF_CONNTRACK_IPV4=m
+CONFIG_NF_TABLES_IPV4=m
+CONFIG_NFT_REJECT_IPV4=m
+CONFIG_NFT_CHAIN_ROUTE_IPV4=m
+CONFIG_NFT_CHAIN_NAT_IPV4=m
+CONFIG_NF_TABLES_ARP=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -151,6 +170,7 @@ CONFIG_IP_NF_MATCH_RPFILTER=m
 CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_TARGET_SYNPROXY=m
 CONFIG_IP_NF_TARGET_ULOG=m
 CONFIG_NF_NAT_IPV4=m
 CONFIG_IP_NF_TARGET_MASQUERADE=m
@@ -165,6 +185,9 @@ CONFIG_IP_NF_ARPTABLES=m
 CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NF_CONNTRACK_IPV6=m
+CONFIG_NF_TABLES_IPV6=m
+CONFIG_NFT_CHAIN_ROUTE_IPV6=m
+CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -178,11 +201,13 @@ CONFIG_IP6_NF_MATCH_RT=m
 CONFIG_IP6_NF_TARGET_HL=m
 CONFIG_IP6_NF_FILTER=m
 CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_TARGET_SYNPROXY=m
 CONFIG_IP6_NF_MANGLE=m
 CONFIG_IP6_NF_RAW=m
 CONFIG_NF_NAT_IPV6=m
 CONFIG_IP6_NF_TARGET_MASQUERADE=m
 CONFIG_IP6_NF_TARGET_NPT=m
+CONFIG_NF_TABLES_BRIDGE=m
 CONFIG_IP_DCCP=m
 # CONFIG_IP_DCCP_CCID3 is not set
 CONFIG_SCTP_COOKIE_HMAC_SHA1=y
@@ -190,8 +215,12 @@ CONFIG_RDS=m
 CONFIG_RDS_TCP=m
 CONFIG_L2TP=m
 CONFIG_ATALK=m
+CONFIG_DNS_RESOLVER=y
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
+CONFIG_BATMAN_ADV_NC=y
+CONFIG_NETLINK_DIAG=m
+CONFIG_NET_MPLS_GSO=m
 # CONFIG_WIRELESS is not set
 CONFIG_DEVTMPFS=y
 # CONFIG_FIRMWARE_IN_KERNEL is not set
@@ -204,6 +233,7 @@ CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
+CONFIG_DUMMY_IRQ=m
 CONFIG_RAID_ATTRS=m
 CONFIG_SCSI=y
 CONFIG_SCSI_TGT=m
@@ -241,6 +271,7 @@ CONFIG_EQUALIZER=m
 CONFIG_NET_TEAM=m
 CONFIG_NET_TEAM_MODE_BROADCAST=m
 CONFIG_NET_TEAM_MODE_ROUNDROBIN=m
+CONFIG_NET_TEAM_MODE_RANDOM=m
 CONFIG_NET_TEAM_MODE_ACTIVEBACKUP=m
 CONFIG_NET_TEAM_MODE_LOADBALANCE=m
 CONFIG_VXLAN=m
@@ -248,6 +279,7 @@ CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
 CONFIG_VETH=m
 CONFIG_MVME147_NET=y
+# CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
 # CONFIG_NET_VENDOR_INTEL is not set
@@ -256,6 +288,7 @@ CONFIG_MVME147_NET=y
 # CONFIG_NET_VENDOR_NATSEMI is not set
 # CONFIG_NET_VENDOR_SEEQ is not set
 # CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
 CONFIG_PPP=m
 CONFIG_PPP_BSDCOMP=m
@@ -388,10 +421,10 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_STRING_HELPERS=m
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
-CONFIG_CRYPTO_NULL=m
 CONFIG_CRYPTO_CRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CCM=m
@@ -424,6 +457,8 @@ CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_ZLIB=m
 CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_LZ4=m
+CONFIG_CRYPTO_LZ4HC=m
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index 9cb7e4da8e84..05a990a9dbd4 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -48,7 +48,6 @@ CONFIG_IP_PNP_RARP=y
 CONFIG_NET_IPIP=m
 CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
-CONFIG_SYN_COOKIES=y
 CONFIG_NET_IPVTI=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
@@ -59,11 +58,11 @@ CONFIG_INET_XFRM_MODE_BEET=m
 # CONFIG_INET_LRO is not set
 CONFIG_INET_DIAG=m
 CONFIG_INET_UDP_DIAG=m
-CONFIG_IPV6_PRIVACY=y
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
 CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_VTI=m
 CONFIG_IPV6_GRE=m
 CONFIG_NETFILTER=y
 CONFIG_NF_CONNTRACK=m
@@ -81,6 +80,17 @@ CONFIG_NF_CONNTRACK_PPTP=m
 CONFIG_NF_CONNTRACK_SANE=m
 CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NF_TABLES=m
+CONFIG_NFT_EXTHDR=m
+CONFIG_NFT_META=m
+CONFIG_NFT_CT=m
+CONFIG_NFT_RBTREE=m
+CONFIG_NFT_HASH=m
+CONFIG_NFT_COUNTER=m
+CONFIG_NFT_LOG=m
+CONFIG_NFT_LIMIT=m
+CONFIG_NFT_NAT=m
+CONFIG_NFT_COMPAT=m
 CONFIG_NETFILTER_XT_SET=m
 CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
 CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
@@ -94,6 +104,7 @@ CONFIG_NETFILTER_XT_TARGET_NFLOG=m
 CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
 CONFIG_NETFILTER_XT_TARGET_NOTRACK=m
 CONFIG_NETFILTER_XT_TARGET_TEE=m
+CONFIG_NETFILTER_XT_TARGET_TPROXY=m
 CONFIG_NETFILTER_XT_TARGET_TRACE=m
 CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
 CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
@@ -126,6 +137,7 @@ CONFIG_NETFILTER_XT_MATCH_QUOTA=m
 CONFIG_NETFILTER_XT_MATCH_RATEEST=m
 CONFIG_NETFILTER_XT_MATCH_REALM=m
 CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_SOCKET=m
 CONFIG_NETFILTER_XT_MATCH_STATE=m
 CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
 CONFIG_NETFILTER_XT_MATCH_STRING=m
@@ -140,11 +152,18 @@ CONFIG_IP_SET_HASH_IP=m
 CONFIG_IP_SET_HASH_IPPORT=m
 CONFIG_IP_SET_HASH_IPPORTIP=m
 CONFIG_IP_SET_HASH_IPPORTNET=m
+CONFIG_IP_SET_HASH_NETPORTNET=m
 CONFIG_IP_SET_HASH_NET=m
+CONFIG_IP_SET_HASH_NETNET=m
 CONFIG_IP_SET_HASH_NETPORT=m
 CONFIG_IP_SET_HASH_NETIFACE=m
 CONFIG_IP_SET_LIST_SET=m
 CONFIG_NF_CONNTRACK_IPV4=m
+CONFIG_NF_TABLES_IPV4=m
+CONFIG_NFT_REJECT_IPV4=m
+CONFIG_NFT_CHAIN_ROUTE_IPV4=m
+CONFIG_NFT_CHAIN_NAT_IPV4=m
+CONFIG_NF_TABLES_ARP=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -152,6 +171,7 @@ CONFIG_IP_NF_MATCH_RPFILTER=m
 CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_TARGET_SYNPROXY=m
 CONFIG_IP_NF_TARGET_ULOG=m
 CONFIG_NF_NAT_IPV4=m
 CONFIG_IP_NF_TARGET_MASQUERADE=m
@@ -166,6 +186,9 @@ CONFIG_IP_NF_ARPTABLES=m
 CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NF_CONNTRACK_IPV6=m
+CONFIG_NF_TABLES_IPV6=m
+CONFIG_NFT_CHAIN_ROUTE_IPV6=m
+CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -179,11 +202,13 @@ CONFIG_IP6_NF_MATCH_RT=m
 CONFIG_IP6_NF_TARGET_HL=m
 CONFIG_IP6_NF_FILTER=m
 CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_TARGET_SYNPROXY=m
 CONFIG_IP6_NF_MANGLE=m
 CONFIG_IP6_NF_RAW=m
 CONFIG_NF_NAT_IPV6=m
 CONFIG_IP6_NF_TARGET_MASQUERADE=m
 CONFIG_IP6_NF_TARGET_NPT=m
+CONFIG_NF_TABLES_BRIDGE=m
 CONFIG_IP_DCCP=m
 # CONFIG_IP_DCCP_CCID3 is not set
 CONFIG_SCTP_COOKIE_HMAC_SHA1=y
@@ -191,8 +216,12 @@ CONFIG_RDS=m
 CONFIG_RDS_TCP=m
 CONFIG_L2TP=m
 CONFIG_ATALK=m
+CONFIG_DNS_RESOLVER=y
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
+CONFIG_BATMAN_ADV_NC=y
+CONFIG_NETLINK_DIAG=m
+CONFIG_NET_MPLS_GSO=m
 # CONFIG_WIRELESS is not set
 CONFIG_DEVTMPFS=y
 # CONFIG_FIRMWARE_IN_KERNEL is not set
@@ -205,6 +234,7 @@ CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
+CONFIG_DUMMY_IRQ=m
 CONFIG_RAID_ATTRS=m
 CONFIG_SCSI=y
 CONFIG_SCSI_TGT=m
@@ -242,12 +272,14 @@ CONFIG_EQUALIZER=m
 CONFIG_NET_TEAM=m
 CONFIG_NET_TEAM_MODE_BROADCAST=m
 CONFIG_NET_TEAM_MODE_ROUNDROBIN=m
+CONFIG_NET_TEAM_MODE_RANDOM=m
 CONFIG_NET_TEAM_MODE_ACTIVEBACKUP=m
 CONFIG_NET_TEAM_MODE_LOADBALANCE=m
 CONFIG_VXLAN=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
 CONFIG_VETH=m
+# CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
 CONFIG_MVME16x_NET=y
@@ -256,6 +288,7 @@ CONFIG_MVME16x_NET=y
 # CONFIG_NET_VENDOR_NATSEMI is not set
 # CONFIG_NET_VENDOR_SEEQ is not set
 # CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
 CONFIG_PPP=m
 CONFIG_PPP_BSDCOMP=m
@@ -388,11 +421,11 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_STRING_HELPERS=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
-CONFIG_CRYPTO_NULL=m
 CONFIG_CRYPTO_CRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CCM=m
@@ -425,6 +458,8 @@ CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_ZLIB=m
 CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_LZ4=m
+CONFIG_CRYPTO_LZ4HC=m
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index f7bdca099892..568e2a98f976 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -48,7 +48,6 @@ CONFIG_IP_PNP_RARP=y
 CONFIG_NET_IPIP=m
 CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
-CONFIG_SYN_COOKIES=y
 CONFIG_NET_IPVTI=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
@@ -59,11 +58,11 @@ CONFIG_INET_XFRM_MODE_BEET=m
 # CONFIG_INET_LRO is not set
 CONFIG_INET_DIAG=m
 CONFIG_INET_UDP_DIAG=m
-CONFIG_IPV6_PRIVACY=y
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
 CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_VTI=m
 CONFIG_IPV6_GRE=m
 CONFIG_NETFILTER=y
 CONFIG_NF_CONNTRACK=m
@@ -81,6 +80,17 @@ CONFIG_NF_CONNTRACK_PPTP=m
 CONFIG_NF_CONNTRACK_SANE=m
 CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NF_TABLES=m
+CONFIG_NFT_EXTHDR=m
+CONFIG_NFT_META=m
+CONFIG_NFT_CT=m
+CONFIG_NFT_RBTREE=m
+CONFIG_NFT_HASH=m
+CONFIG_NFT_COUNTER=m
+CONFIG_NFT_LOG=m
+CONFIG_NFT_LIMIT=m
+CONFIG_NFT_NAT=m
+CONFIG_NFT_COMPAT=m
 CONFIG_NETFILTER_XT_SET=m
 CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
 CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
@@ -94,6 +104,7 @@ CONFIG_NETFILTER_XT_TARGET_NFLOG=m
 CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
 CONFIG_NETFILTER_XT_TARGET_NOTRACK=m
 CONFIG_NETFILTER_XT_TARGET_TEE=m
+CONFIG_NETFILTER_XT_TARGET_TPROXY=m
 CONFIG_NETFILTER_XT_TARGET_TRACE=m
 CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
 CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
@@ -126,6 +137,7 @@ CONFIG_NETFILTER_XT_MATCH_QUOTA=m
 CONFIG_NETFILTER_XT_MATCH_RATEEST=m
 CONFIG_NETFILTER_XT_MATCH_REALM=m
 CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_SOCKET=m
 CONFIG_NETFILTER_XT_MATCH_STATE=m
 CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
 CONFIG_NETFILTER_XT_MATCH_STRING=m
@@ -140,11 +152,18 @@ CONFIG_IP_SET_HASH_IP=m
 CONFIG_IP_SET_HASH_IPPORT=m
 CONFIG_IP_SET_HASH_IPPORTIP=m
 CONFIG_IP_SET_HASH_IPPORTNET=m
+CONFIG_IP_SET_HASH_NETPORTNET=m
 CONFIG_IP_SET_HASH_NET=m
+CONFIG_IP_SET_HASH_NETNET=m
 CONFIG_IP_SET_HASH_NETPORT=m
 CONFIG_IP_SET_HASH_NETIFACE=m
 CONFIG_IP_SET_LIST_SET=m
 CONFIG_NF_CONNTRACK_IPV4=m
+CONFIG_NF_TABLES_IPV4=m
+CONFIG_NFT_REJECT_IPV4=m
+CONFIG_NFT_CHAIN_ROUTE_IPV4=m
+CONFIG_NFT_CHAIN_NAT_IPV4=m
+CONFIG_NF_TABLES_ARP=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -152,6 +171,7 @@ CONFIG_IP_NF_MATCH_RPFILTER=m
 CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_TARGET_SYNPROXY=m
 CONFIG_IP_NF_TARGET_ULOG=m
 CONFIG_NF_NAT_IPV4=m
 CONFIG_IP_NF_TARGET_MASQUERADE=m
@@ -166,6 +186,9 @@ CONFIG_IP_NF_ARPTABLES=m
 CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NF_CONNTRACK_IPV6=m
+CONFIG_NF_TABLES_IPV6=m
+CONFIG_NFT_CHAIN_ROUTE_IPV6=m
+CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -179,11 +202,13 @@ CONFIG_IP6_NF_MATCH_RT=m
 CONFIG_IP6_NF_TARGET_HL=m
 CONFIG_IP6_NF_FILTER=m
 CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_TARGET_SYNPROXY=m
 CONFIG_IP6_NF_MANGLE=m
 CONFIG_IP6_NF_RAW=m
 CONFIG_NF_NAT_IPV6=m
 CONFIG_IP6_NF_TARGET_MASQUERADE=m
 CONFIG_IP6_NF_TARGET_NPT=m
+CONFIG_NF_TABLES_BRIDGE=m
 CONFIG_IP_DCCP=m
 # CONFIG_IP_DCCP_CCID3 is not set
 CONFIG_SCTP_COOKIE_HMAC_SHA1=y
@@ -191,8 +216,12 @@ CONFIG_RDS=m
 CONFIG_RDS_TCP=m
 CONFIG_L2TP=m
 CONFIG_ATALK=m
+CONFIG_DNS_RESOLVER=y
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
+CONFIG_BATMAN_ADV_NC=y
+CONFIG_NETLINK_DIAG=m
+CONFIG_NET_MPLS_GSO=m
 # CONFIG_WIRELESS is not set
 CONFIG_DEVTMPFS=y
 # CONFIG_FIRMWARE_IN_KERNEL is not set
@@ -208,6 +237,7 @@ CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
+CONFIG_DUMMY_IRQ=m
 CONFIG_IDE=y
 CONFIG_IDE_GD_ATAPI=y
 CONFIG_BLK_DEV_IDECD=y
@@ -248,6 +278,7 @@ CONFIG_EQUALIZER=m
 CONFIG_NET_TEAM=m
 CONFIG_NET_TEAM_MODE_BROADCAST=m
 CONFIG_NET_TEAM_MODE_ROUNDROBIN=m
+CONFIG_NET_TEAM_MODE_RANDOM=m
 CONFIG_NET_TEAM_MODE_ACTIVEBACKUP=m
 CONFIG_NET_TEAM_MODE_LOADBALANCE=m
 CONFIG_VXLAN=m
@@ -256,10 +287,10 @@ CONFIG_NETCONSOLE_DYNAMIC=y
 CONFIG_VETH=m
 # CONFIG_NET_VENDOR_3COM is not set
 # CONFIG_NET_VENDOR_AMD is not set
+# CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
 # CONFIG_NET_VENDOR_CIRRUS is not set
-# CONFIG_NET_VENDOR_FUJITSU is not set
 # CONFIG_NET_VENDOR_HP is not set
 # CONFIG_NET_VENDOR_INTEL is not set
 # CONFIG_NET_VENDOR_MARVELL is not set
@@ -268,6 +299,7 @@ CONFIG_NE2000=m
 # CONFIG_NET_VENDOR_SEEQ is not set
 # CONFIG_NET_VENDOR_SMSC is not set
 # CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
 CONFIG_PLIP=m
 CONFIG_PPP=m
@@ -292,7 +324,6 @@ CONFIG_MOUSE_SERIAL=m
 CONFIG_INPUT_MISC=y
 CONFIG_INPUT_M68K_BEEP=m
 CONFIG_SERIO_Q40KBD=y
-CONFIG_VT_HW_CONSOLE_BINDING=y
 # CONFIG_LEGACY_PTYS is not set
 # CONFIG_DEVKMEM is not set
 CONFIG_PRINTER=m
@@ -412,10 +443,10 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_STRING_HELPERS=m
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
-CONFIG_CRYPTO_NULL=m
 CONFIG_CRYPTO_CRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CCM=m
@@ -448,6 +479,8 @@ CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_ZLIB=m
 CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_LZ4=m
+CONFIG_CRYPTO_LZ4HC=m
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index e578dd87a451..60b0aeac5742 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -45,7 +45,6 @@ CONFIG_IP_PNP_RARP=y
 CONFIG_NET_IPIP=m
 CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
-CONFIG_SYN_COOKIES=y
 CONFIG_NET_IPVTI=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
@@ -56,11 +55,11 @@ CONFIG_INET_XFRM_MODE_BEET=m
 # CONFIG_INET_LRO is not set
 CONFIG_INET_DIAG=m
 CONFIG_INET_UDP_DIAG=m
-CONFIG_IPV6_PRIVACY=y
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
 CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_VTI=m
 CONFIG_IPV6_GRE=m
 CONFIG_NETFILTER=y
 CONFIG_NF_CONNTRACK=m
@@ -78,6 +77,17 @@ CONFIG_NF_CONNTRACK_PPTP=m
 CONFIG_NF_CONNTRACK_SANE=m
 CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NF_TABLES=m
+CONFIG_NFT_EXTHDR=m
+CONFIG_NFT_META=m
+CONFIG_NFT_CT=m
+CONFIG_NFT_RBTREE=m
+CONFIG_NFT_HASH=m
+CONFIG_NFT_COUNTER=m
+CONFIG_NFT_LOG=m
+CONFIG_NFT_LIMIT=m
+CONFIG_NFT_NAT=m
+CONFIG_NFT_COMPAT=m
 CONFIG_NETFILTER_XT_SET=m
 CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
 CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
@@ -91,6 +101,7 @@ CONFIG_NETFILTER_XT_TARGET_NFLOG=m
 CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
 CONFIG_NETFILTER_XT_TARGET_NOTRACK=m
 CONFIG_NETFILTER_XT_TARGET_TEE=m
+CONFIG_NETFILTER_XT_TARGET_TPROXY=m
 CONFIG_NETFILTER_XT_TARGET_TRACE=m
 CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
 CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
@@ -123,6 +134,7 @@ CONFIG_NETFILTER_XT_MATCH_QUOTA=m
 CONFIG_NETFILTER_XT_MATCH_RATEEST=m
 CONFIG_NETFILTER_XT_MATCH_REALM=m
 CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_SOCKET=m
 CONFIG_NETFILTER_XT_MATCH_STATE=m
 CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
 CONFIG_NETFILTER_XT_MATCH_STRING=m
@@ -137,11 +149,18 @@ CONFIG_IP_SET_HASH_IP=m
 CONFIG_IP_SET_HASH_IPPORT=m
 CONFIG_IP_SET_HASH_IPPORTIP=m
 CONFIG_IP_SET_HASH_IPPORTNET=m
+CONFIG_IP_SET_HASH_NETPORTNET=m
 CONFIG_IP_SET_HASH_NET=m
+CONFIG_IP_SET_HASH_NETNET=m
 CONFIG_IP_SET_HASH_NETPORT=m
 CONFIG_IP_SET_HASH_NETIFACE=m
 CONFIG_IP_SET_LIST_SET=m
 CONFIG_NF_CONNTRACK_IPV4=m
+CONFIG_NF_TABLES_IPV4=m
+CONFIG_NFT_REJECT_IPV4=m
+CONFIG_NFT_CHAIN_ROUTE_IPV4=m
+CONFIG_NFT_CHAIN_NAT_IPV4=m
+CONFIG_NF_TABLES_ARP=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -149,6 +168,7 @@ CONFIG_IP_NF_MATCH_RPFILTER=m
 CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_TARGET_SYNPROXY=m
 CONFIG_IP_NF_TARGET_ULOG=m
 CONFIG_NF_NAT_IPV4=m
 CONFIG_IP_NF_TARGET_MASQUERADE=m
@@ -163,6 +183,9 @@ CONFIG_IP_NF_ARPTABLES=m
 CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NF_CONNTRACK_IPV6=m
+CONFIG_NF_TABLES_IPV6=m
+CONFIG_NFT_CHAIN_ROUTE_IPV6=m
+CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -176,11 +199,13 @@ CONFIG_IP6_NF_MATCH_RT=m
 CONFIG_IP6_NF_TARGET_HL=m
 CONFIG_IP6_NF_FILTER=m
 CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_TARGET_SYNPROXY=m
 CONFIG_IP6_NF_MANGLE=m
 CONFIG_IP6_NF_RAW=m
 CONFIG_NF_NAT_IPV6=m
 CONFIG_IP6_NF_TARGET_MASQUERADE=m
 CONFIG_IP6_NF_TARGET_NPT=m
+CONFIG_NF_TABLES_BRIDGE=m
 CONFIG_IP_DCCP=m
 # CONFIG_IP_DCCP_CCID3 is not set
 CONFIG_SCTP_COOKIE_HMAC_SHA1=y
@@ -188,8 +213,12 @@ CONFIG_RDS=m
 CONFIG_RDS_TCP=m
 CONFIG_L2TP=m
 CONFIG_ATALK=m
+CONFIG_DNS_RESOLVER=y
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
+CONFIG_BATMAN_ADV_NC=y
+CONFIG_NETLINK_DIAG=m
+CONFIG_NET_MPLS_GSO=m
 # CONFIG_WIRELESS is not set
 CONFIG_DEVTMPFS=y
 # CONFIG_FIRMWARE_IN_KERNEL is not set
@@ -202,6 +231,7 @@ CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
+CONFIG_DUMMY_IRQ=m
 CONFIG_RAID_ATTRS=m
 CONFIG_SCSI=y
 CONFIG_SCSI_TGT=m
@@ -239,6 +269,7 @@ CONFIG_EQUALIZER=m
 CONFIG_NET_TEAM=m
 CONFIG_NET_TEAM_MODE_BROADCAST=m
 CONFIG_NET_TEAM_MODE_ROUNDROBIN=m
+CONFIG_NET_TEAM_MODE_RANDOM=m
 CONFIG_NET_TEAM_MODE_ACTIVEBACKUP=m
 CONFIG_NET_TEAM_MODE_LOADBALANCE=m
 CONFIG_VXLAN=m
@@ -246,6 +277,7 @@ CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
 CONFIG_VETH=m
 CONFIG_SUN3LANCE=y
+# CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_CADENCE is not set
 CONFIG_SUN3_82586=y
 # CONFIG_NET_VENDOR_MARVELL is not set
@@ -254,6 +286,7 @@ CONFIG_SUN3_82586=y
 # CONFIG_NET_VENDOR_SEEQ is not set
 # CONFIG_NET_VENDOR_STMICRO is not set
 # CONFIG_NET_VENDOR_SUN is not set
+# CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
 CONFIG_PPP=m
 CONFIG_PPP_BSDCOMP=m
@@ -275,7 +308,6 @@ CONFIG_INPUT_EVDEV=m
 CONFIG_KEYBOARD_SUNKBD=y
 # CONFIG_MOUSE_PS2 is not set
 CONFIG_MOUSE_SERIAL=m
-CONFIG_VT_HW_CONSOLE_BINDING=y
 # CONFIG_LEGACY_PTYS is not set
 # CONFIG_DEVKMEM is not set
 # CONFIG_HW_RANDOM is not set
@@ -390,10 +422,10 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_STRING_HELPERS=m
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
-CONFIG_CRYPTO_NULL=m
 CONFIG_CRYPTO_CRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CCM=m
@@ -426,6 +458,8 @@ CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_ZLIB=m
 CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_LZ4=m
+CONFIG_CRYPTO_LZ4HC=m
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index f5c91e17b3d7..21bda331eebb 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -45,7 +45,6 @@ CONFIG_IP_PNP_RARP=y
 CONFIG_NET_IPIP=m
 CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
-CONFIG_SYN_COOKIES=y
 CONFIG_NET_IPVTI=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
@@ -56,11 +55,11 @@ CONFIG_INET_XFRM_MODE_BEET=m
 # CONFIG_INET_LRO is not set
 CONFIG_INET_DIAG=m
 CONFIG_INET_UDP_DIAG=m
-CONFIG_IPV6_PRIVACY=y
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
 CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_VTI=m
 CONFIG_IPV6_GRE=m
 CONFIG_NETFILTER=y
 CONFIG_NF_CONNTRACK=m
@@ -78,6 +77,17 @@ CONFIG_NF_CONNTRACK_PPTP=m
 CONFIG_NF_CONNTRACK_SANE=m
 CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NF_TABLES=m
+CONFIG_NFT_EXTHDR=m
+CONFIG_NFT_META=m
+CONFIG_NFT_CT=m
+CONFIG_NFT_RBTREE=m
+CONFIG_NFT_HASH=m
+CONFIG_NFT_COUNTER=m
+CONFIG_NFT_LOG=m
+CONFIG_NFT_LIMIT=m
+CONFIG_NFT_NAT=m
+CONFIG_NFT_COMPAT=m
 CONFIG_NETFILTER_XT_SET=m
 CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
 CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
@@ -91,6 +101,7 @@ CONFIG_NETFILTER_XT_TARGET_NFLOG=m
 CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
 CONFIG_NETFILTER_XT_TARGET_NOTRACK=m
 CONFIG_NETFILTER_XT_TARGET_TEE=m
+CONFIG_NETFILTER_XT_TARGET_TPROXY=m
 CONFIG_NETFILTER_XT_TARGET_TRACE=m
 CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
 CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
@@ -123,6 +134,7 @@ CONFIG_NETFILTER_XT_MATCH_QUOTA=m
 CONFIG_NETFILTER_XT_MATCH_RATEEST=m
 CONFIG_NETFILTER_XT_MATCH_REALM=m
 CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_SOCKET=m
 CONFIG_NETFILTER_XT_MATCH_STATE=m
 CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
 CONFIG_NETFILTER_XT_MATCH_STRING=m
@@ -137,11 +149,18 @@ CONFIG_IP_SET_HASH_IP=m
 CONFIG_IP_SET_HASH_IPPORT=m
 CONFIG_IP_SET_HASH_IPPORTIP=m
 CONFIG_IP_SET_HASH_IPPORTNET=m
+CONFIG_IP_SET_HASH_NETPORTNET=m
 CONFIG_IP_SET_HASH_NET=m
+CONFIG_IP_SET_HASH_NETNET=m
 CONFIG_IP_SET_HASH_NETPORT=m
 CONFIG_IP_SET_HASH_NETIFACE=m
 CONFIG_IP_SET_LIST_SET=m
 CONFIG_NF_CONNTRACK_IPV4=m
+CONFIG_NF_TABLES_IPV4=m
+CONFIG_NFT_REJECT_IPV4=m
+CONFIG_NFT_CHAIN_ROUTE_IPV4=m
+CONFIG_NFT_CHAIN_NAT_IPV4=m
+CONFIG_NF_TABLES_ARP=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -149,6 +168,7 @@ CONFIG_IP_NF_MATCH_RPFILTER=m
 CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_TARGET_SYNPROXY=m
 CONFIG_IP_NF_TARGET_ULOG=m
 CONFIG_NF_NAT_IPV4=m
 CONFIG_IP_NF_TARGET_MASQUERADE=m
@@ -163,6 +183,9 @@ CONFIG_IP_NF_ARPTABLES=m
 CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NF_CONNTRACK_IPV6=m
+CONFIG_NF_TABLES_IPV6=m
+CONFIG_NFT_CHAIN_ROUTE_IPV6=m
+CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -176,11 +199,13 @@ CONFIG_IP6_NF_MATCH_RT=m
 CONFIG_IP6_NF_TARGET_HL=m
 CONFIG_IP6_NF_FILTER=m
 CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_TARGET_SYNPROXY=m
 CONFIG_IP6_NF_MANGLE=m
 CONFIG_IP6_NF_RAW=m
 CONFIG_NF_NAT_IPV6=m
 CONFIG_IP6_NF_TARGET_MASQUERADE=m
 CONFIG_IP6_NF_TARGET_NPT=m
+CONFIG_NF_TABLES_BRIDGE=m
 CONFIG_IP_DCCP=m
 # CONFIG_IP_DCCP_CCID3 is not set
 CONFIG_SCTP_COOKIE_HMAC_SHA1=y
@@ -188,8 +213,12 @@ CONFIG_RDS=m
 CONFIG_RDS_TCP=m
 CONFIG_L2TP=m
 CONFIG_ATALK=m
+CONFIG_DNS_RESOLVER=y
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
+CONFIG_BATMAN_ADV_NC=y
+CONFIG_NETLINK_DIAG=m
+CONFIG_NET_MPLS_GSO=m
 # CONFIG_WIRELESS is not set
 CONFIG_DEVTMPFS=y
 # CONFIG_FIRMWARE_IN_KERNEL is not set
@@ -202,6 +231,7 @@ CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
+CONFIG_DUMMY_IRQ=m
 CONFIG_RAID_ATTRS=m
 CONFIG_SCSI=y
 CONFIG_SCSI_TGT=m
@@ -239,6 +269,7 @@ CONFIG_EQUALIZER=m
 CONFIG_NET_TEAM=m
 CONFIG_NET_TEAM_MODE_BROADCAST=m
 CONFIG_NET_TEAM_MODE_ROUNDROBIN=m
+CONFIG_NET_TEAM_MODE_RANDOM=m
 CONFIG_NET_TEAM_MODE_ACTIVEBACKUP=m
 CONFIG_NET_TEAM_MODE_LOADBALANCE=m
 CONFIG_VXLAN=m
@@ -246,6 +277,7 @@ CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
 CONFIG_VETH=m
 CONFIG_SUN3LANCE=y
+# CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
 # CONFIG_NET_VENDOR_INTEL is not set
@@ -254,6 +286,7 @@ CONFIG_SUN3LANCE=y
 # CONFIG_NET_VENDOR_NATSEMI is not set
 # CONFIG_NET_VENDOR_SEEQ is not set
 # CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
 CONFIG_PPP=m
 CONFIG_PPP_BSDCOMP=m
@@ -275,7 +308,6 @@ CONFIG_INPUT_EVDEV=m
 CONFIG_KEYBOARD_SUNKBD=y
 # CONFIG_MOUSE_PS2 is not set
 CONFIG_MOUSE_SERIAL=m
-CONFIG_VT_HW_CONSOLE_BINDING=y
 # CONFIG_LEGACY_PTYS is not set
 # CONFIG_DEVKMEM is not set
 # CONFIG_HW_RANDOM is not set
@@ -390,10 +422,10 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_STRING_HELPERS=m
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
-CONFIG_CRYPTO_NULL=m
 CONFIG_CRYPTO_CRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CCM=m
@@ -426,6 +458,8 @@ CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_ZLIB=m
 CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_LZ4=m
+CONFIG_CRYPTO_LZ4HC=m
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
-- 
cgit v1.2.3


From e66d2ae7c67bd9ac982a3d1890564de7f7eabf4b Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Sun, 29 Dec 2013 02:29:30 +0100
Subject: KVM: x86: Fix APIC map calculation after re-enabling

Update arch.apic_base before triggering recalculate_apic_map. Otherwise
the recalculation will work against the previous state of the APIC and
will fail to build the correct map when an APIC is hardware-enabled
again.

This fixes a regression of 1e08ec4a13.

Cc: stable@vger.kernel.org
Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/lapic.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index dec48bfaddb8..1673940cf9c3 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1350,6 +1350,10 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
 		return;
 	}
 
+	if (!kvm_vcpu_is_bsp(apic->vcpu))
+		value &= ~MSR_IA32_APICBASE_BSP;
+	vcpu->arch.apic_base = value;
+
 	/* update jump label if enable bit changes */
 	if ((vcpu->arch.apic_base ^ value) & MSR_IA32_APICBASE_ENABLE) {
 		if (value & MSR_IA32_APICBASE_ENABLE)
@@ -1359,10 +1363,6 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
 		recalculate_apic_map(vcpu->kvm);
 	}
 
-	if (!kvm_vcpu_is_bsp(apic->vcpu))
-		value &= ~MSR_IA32_APICBASE_BSP;
-
-	vcpu->arch.apic_base = value;
 	if ((old_value ^ value) & X2APIC_ENABLE) {
 		if (value & X2APIC_ENABLE) {
 			u32 id = kvm_apic_id(apic);
-- 
cgit v1.2.3


From 29bf08f12b2fd72b882da0d85b7385e4a438a297 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@web.de>
Date: Sat, 28 Dec 2013 16:31:52 +0100
Subject: KVM: nVMX: Unconditionally uninit the MMU on nested vmexit

Three reasons for doing this: 1. arch.walk_mmu points to arch.mmu anyway
in case nested EPT wasn't in use. 2. this aligns VMX with SVM. But 3. is
most important: nested_cpu_has_ept(vmcs12) queries the VMCS page, and if
one guest VCPU manipulates the page of another VCPU in L2, we may be
fooled to skip over the nested_ept_uninit_mmu_context, leaving mmu in
nested state. That can crash the host later on if nested_ept_get_cr3 is
invoked while L1 already left vmxon and nested.current_vmcs12 became
NULL therefore.

Cc: stable@kernel.org
Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/vmx.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index b2fe1c252f35..da7837e1349d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8283,8 +8283,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
 	vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
 	kvm_set_cr4(vcpu, vmcs12->host_cr4);
 
-	if (nested_cpu_has_ept(vmcs12))
-		nested_ept_uninit_mmu_context(vcpu);
+	nested_ept_uninit_mmu_context(vcpu);
 
 	kvm_set_cr3(vcpu, vmcs12->host_cr3);
 	kvm_mmu_reset_context(vcpu);
-- 
cgit v1.2.3


From ad70b029d2c678386384bd72c7fa2705c449b518 Mon Sep 17 00:00:00 2001
From: Nobuhiro Iwamatsu <nobuhiro.iwamatsu.yj@renesas.com>
Date: Thu, 2 Jan 2014 12:58:53 -0800
Subject: sh: add EXPORT_SYMBOL(min_low_pfn) and EXPORT_SYMBOL(max_low_pfn) to
 sh_ksyms_32.c

Min_low_pfn and max_low_pfn were used in pfn_valid macro if defined
CONFIG_FLATMEM.  When the functions that use the pfn_valid is used in
driver module, max_low_pfn and min_low_pfn is to undefined, and fail to
build.

  ERROR: "min_low_pfn" [drivers/block/aoe/aoe.ko] undefined!
  ERROR: "max_low_pfn" [drivers/block/aoe/aoe.ko] undefined!
  make[2]: *** [__modpost] Error 1
  make[1]: *** [modules] Error 2

This patch fix this problem.

Signed-off-by: Nobuhiro Iwamatsu <nobuhiro.iwamatsu.yj@renesas.com>
Cc: Kuninori Morimoto <kuninori.morimoto.gx@gmail.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/sh/kernel/sh_ksyms_32.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch')

diff --git a/arch/sh/kernel/sh_ksyms_32.c b/arch/sh/kernel/sh_ksyms_32.c
index 2a0a596ebf67..d77f2f6c7ff0 100644
--- a/arch/sh/kernel/sh_ksyms_32.c
+++ b/arch/sh/kernel/sh_ksyms_32.c
@@ -20,6 +20,11 @@ EXPORT_SYMBOL(csum_partial_copy_generic);
 EXPORT_SYMBOL(copy_page);
 EXPORT_SYMBOL(__clear_user);
 EXPORT_SYMBOL(empty_zero_page);
+#ifdef CONFIG_FLATMEM
+/* need in pfn_valid macro */
+EXPORT_SYMBOL(min_low_pfn);
+EXPORT_SYMBOL(max_low_pfn);
+#endif
 
 #define DECLARE_EXPORT(name)		\
 	extern void name(void);EXPORT_SYMBOL(name)
-- 
cgit v1.2.3


From 41a34cec2e0df7798ea322ed1480bc3d3facdc8e Mon Sep 17 00:00:00 2001
From: Dave Young <dyoung@redhat.com>
Date: Fri, 3 Jan 2014 11:54:31 +0800
Subject: x86: ksysfs.c build fix

kbuild test robot report below error for randconfig:

  arch/x86/kernel/ksysfs.c: In function 'get_setup_data_paddr':
  arch/x86/kernel/ksysfs.c:81:3: error: implicit declaration of function 'ioremap_cache' [-Werror=implicit-function-declaration]
  arch/x86/kernel/ksysfs.c:86:3: error: implicit declaration of function 'iounmap' [-Werror=implicit-function-declaration]

Fix it by including <asm/io.h> in ksysfs.c

Signed-off-by: Dave Young <dyoung@redhat.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/x86/kernel/ksysfs.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c
index eb53d153f307..c2bedaea11f7 100644
--- a/arch/x86/kernel/ksysfs.c
+++ b/arch/x86/kernel/ksysfs.c
@@ -17,6 +17,7 @@
 #include <linux/slab.h>
 #include <linux/mm.h>
 
+#include <asm/io.h>
 #include <asm/setup.h>
 
 static ssize_t version_show(struct kobject *kobj,
-- 
cgit v1.2.3


From 5c12af0c41e3417e1939095325920463b5f8e726 Mon Sep 17 00:00:00 2001
From: Dave Young <dyoung@redhat.com>
Date: Fri, 3 Jan 2014 11:56:49 +0800
Subject: x86/efi: parse_efi_setup() build fix

In case without CONFIG_EFI, there will be below build error:

   arch/x86/built-in.o: In function `setup_arch':
  (.init.text+0x9dc): undefined reference to `parse_efi_setup'

Thus fix it by adding blank inline function in asm/efi.h
Also remove an unused declaration for variable efi_data_len.

Signed-off-by: Dave Young <dyoung@redhat.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/x86/include/asm/efi.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 4d1ba80b6ff1..3b978c472d08 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -142,8 +142,6 @@ struct efi_setup_data {
 };
 
 extern u64 efi_setup;
-extern u32 efi_data_len;
-extern void parse_efi_setup(u64 phys_addr, u32 data_len);
 
 #ifdef CONFIG_EFI
 
@@ -153,7 +151,7 @@ static inline bool efi_is_native(void)
 }
 
 extern struct console early_efi_console;
-
+extern void parse_efi_setup(u64 phys_addr, u32 data_len);
 #else
 /*
  * IF EFI is not configured, have the EFI calls return -ENOSYS.
@@ -165,6 +163,7 @@ extern struct console early_efi_console;
 #define efi_call4(_f, _a1, _a2, _a3, _a4)		(-ENOSYS)
 #define efi_call5(_f, _a1, _a2, _a3, _a4, _a5)		(-ENOSYS)
 #define efi_call6(_f, _a1, _a2, _a3, _a4, _a5, _a6)	(-ENOSYS)
+static inline void parse_efi_setup(u64 phys_addr, u32 data_len) {}
 #endif /* CONFIG_EFI */
 
 #endif /* _ASM_X86_EFI_H */
-- 
cgit v1.2.3


From dd360393f4d948eb518372316e52101cf3b44212 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Mon, 23 Dec 2013 14:16:58 +0200
Subject: x86, cpu: Detect more TLB configuration
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Intel Software Developer’s Manual covers few more TLB
configurations exposed as CPUID 2 descriptors:

61H Instruction TLB: 4 KByte pages, fully associative, 48 entries
63H Data TLB: 1 GByte pages, 4-way set associative, 4 entries
76H Instruction TLB: 2M/4M pages, fully associative, 8 entries
B5H Instruction TLB: 4KByte pages, 8-way set associative, 64 entries
B6H Instruction TLB: 4KByte pages, 8-way set associative, 128 entries
C1H Shared 2nd-Level TLB: 4 KByte/2MByte pages, 8-way associative, 1024 entries
C2H DTLB DTLB: 2 MByte/$MByte pages, 4-way associative, 16 entries

Let's detect them as well.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Link: http://lkml.kernel.org/r/1387801018-14499-1-git-send-email-kirill.shutemov@linux.intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/include/asm/processor.h |  1 +
 arch/x86/kernel/cpu/common.c     |  7 ++++---
 arch/x86/kernel/cpu/intel.c      | 26 ++++++++++++++++++++++++++
 3 files changed, 31 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 7b034a4057f9..1dd6260ed940 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -72,6 +72,7 @@ extern u16 __read_mostly tlb_lli_4m[NR_INFO];
 extern u16 __read_mostly tlb_lld_4k[NR_INFO];
 extern u16 __read_mostly tlb_lld_2m[NR_INFO];
 extern u16 __read_mostly tlb_lld_4m[NR_INFO];
+extern u16 __read_mostly tlb_lld_1g[NR_INFO];
 extern s8  __read_mostly tlb_flushall_shift;
 
 /*
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 6abc172b8258..24b6fd10625a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -472,6 +472,7 @@ u16 __read_mostly tlb_lli_4m[NR_INFO];
 u16 __read_mostly tlb_lld_4k[NR_INFO];
 u16 __read_mostly tlb_lld_2m[NR_INFO];
 u16 __read_mostly tlb_lld_4m[NR_INFO];
+u16 __read_mostly tlb_lld_1g[NR_INFO];
 
 /*
  * tlb_flushall_shift shows the balance point in replacing cr3 write
@@ -486,13 +487,13 @@ void cpu_detect_tlb(struct cpuinfo_x86 *c)
 	if (this_cpu->c_detect_tlb)
 		this_cpu->c_detect_tlb(c);
 
-	printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \
-		"Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d\n"	     \
+	printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n"
+		"Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d, 1GB %d\n"
 		"tlb_flushall_shift: %d\n",
 		tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES],
 		tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES],
 		tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES],
-		tlb_flushall_shift);
+		tlb_lld_1g[ENTRIES], tlb_flushall_shift);
 }
 
 void detect_ht(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index ea04b342c026..5eb7ea5fae15 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -506,6 +506,7 @@ static unsigned int intel_size_cache(struct cpuinfo_x86 *c, unsigned int size)
 #define TLB_DATA0_2M_4M	0x23
 
 #define STLB_4K		0x41
+#define STLB_4K_2M	0x42
 
 static const struct _tlb_table intel_tlb_table[] = {
 	{ 0x01, TLB_INST_4K,		32,	" TLB_INST 4 KByte pages, 4-way set associative" },
@@ -526,13 +527,20 @@ static const struct _tlb_table intel_tlb_table[] = {
 	{ 0x5b, TLB_DATA_4K_4M,		64,	" TLB_DATA 4 KByte and 4 MByte pages" },
 	{ 0x5c, TLB_DATA_4K_4M,		128,	" TLB_DATA 4 KByte and 4 MByte pages" },
 	{ 0x5d, TLB_DATA_4K_4M,		256,	" TLB_DATA 4 KByte and 4 MByte pages" },
+	{ 0x61, TLB_INST_4K,		48,	" TLB_INST 4 KByte pages, full associative" },
+	{ 0x63, TLB_DATA_1G,		4,	" TLB_DATA 1 GByte pages, 4-way set associative" },
+	{ 0x76, TLB_INST_2M_4M,		8,	" TLB_INST 2-MByte or 4-MByte pages, fully associative" },
 	{ 0xb0, TLB_INST_4K,		128,	" TLB_INST 4 KByte pages, 4-way set associative" },
 	{ 0xb1, TLB_INST_2M_4M,		4,	" TLB_INST 2M pages, 4-way, 8 entries or 4M pages, 4-way entries" },
 	{ 0xb2, TLB_INST_4K,		64,	" TLB_INST 4KByte pages, 4-way set associative" },
 	{ 0xb3, TLB_DATA_4K,		128,	" TLB_DATA 4 KByte pages, 4-way set associative" },
 	{ 0xb4, TLB_DATA_4K,		256,	" TLB_DATA 4 KByte pages, 4-way associative" },
+	{ 0xb5, TLB_INST_4K,		64,	" TLB_INST 4 KByte pages, 8-way set ssociative" },
+	{ 0xb6, TLB_INST_4K,		128,	" TLB_INST 4 KByte pages, 8-way set ssociative" },
 	{ 0xba, TLB_DATA_4K,		64,	" TLB_DATA 4 KByte pages, 4-way associative" },
 	{ 0xc0, TLB_DATA_4K_4M,		8,	" TLB_DATA 4 KByte and 4 MByte pages, 4-way associative" },
+	{ 0xc1, STLB_4K_2M,		1024,	" STLB 4 KByte and 2 MByte pages, 8-way associative" },
+	{ 0xc2, TLB_DATA_2M_4M,		16,	" DTLB 2 MByte/4MByte pages, 4-way associative" },
 	{ 0xca, STLB_4K,		512,	" STLB 4 KByte pages, 4-way associative" },
 	{ 0x00, 0, 0 }
 };
@@ -558,6 +566,20 @@ static void intel_tlb_lookup(const unsigned char desc)
 		if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries)
 			tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries;
 		break;
+	case STLB_4K_2M:
+		if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries)
+			tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries;
+		if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries)
+			tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries;
+		if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries)
+			tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries;
+		if (tlb_lld_2m[ENTRIES] < intel_tlb_table[k].entries)
+			tlb_lld_2m[ENTRIES] = intel_tlb_table[k].entries;
+		if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries)
+			tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries;
+		if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries)
+			tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries;
+		break;
 	case TLB_INST_ALL:
 		if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries)
 			tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries;
@@ -603,6 +625,10 @@ static void intel_tlb_lookup(const unsigned char desc)
 		if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries)
 			tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries;
 		break;
+	case TLB_DATA_1G:
+		if (tlb_lld_1g[ENTRIES] < intel_tlb_table[k].entries)
+			tlb_lld_1g[ENTRIES] = intel_tlb_table[k].entries;
+		break;
 	}
 }
 
-- 
cgit v1.2.3


From bf70053c5d2000514ade1f60f47e1f426899af39 Mon Sep 17 00:00:00 2001
From: Yijing Wang <wangyijing@huawei.com>
Date: Wed, 11 Dec 2013 14:00:26 +0800
Subject: sparc/PCI: Use dev_is_pci() to identify PCI devices

Use dev_is_pci() instead of checking bus type directly.

Signed-off-by: Yijing Wang <wangyijing@huawei.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/iommu.c  | 2 +-
 arch/sparc/kernel/ioport.c | 5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c
index 070ed141aac7..76663b019eb5 100644
--- a/arch/sparc/kernel/iommu.c
+++ b/arch/sparc/kernel/iommu.c
@@ -854,7 +854,7 @@ int dma_supported(struct device *dev, u64 device_mask)
 		return 1;
 
 #ifdef CONFIG_PCI
-	if (dev->bus == &pci_bus_type)
+	if (dev_is_pci(dev))
 		return pci64_dma_supported(to_pci_dev(dev), device_mask);
 #endif
 
diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c
index 2096468de9b2..e7e215dfa866 100644
--- a/arch/sparc/kernel/ioport.c
+++ b/arch/sparc/kernel/ioport.c
@@ -666,10 +666,9 @@ EXPORT_SYMBOL(dma_ops);
  */
 int dma_supported(struct device *dev, u64 mask)
 {
-#ifdef CONFIG_PCI
-	if (dev->bus == &pci_bus_type)
+	if (dev_is_pci(dev))
 		return 1;
-#endif
+
 	return 0;
 }
 EXPORT_SYMBOL(dma_supported);
-- 
cgit v1.2.3


From ce2521bf7d366a13e2ab3f9e1ff2084b145f4605 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <tkhai@yandex.ru>
Date: Thu, 12 Dec 2013 18:09:50 +0400
Subject: sparc64: smp_callin: Enable irqs after preemption is disabled

Most of other architectures have below suggested order.
So lets do the same to fit generic idle loop scheme better.

Signed-off-by: Kirill Tkhai <tkhai@yandex.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/smp_64.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index b66a5338231e..b085311dcd0e 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -123,11 +123,12 @@ void smp_callin(void)
 		rmb();
 
 	set_cpu_online(cpuid, true);
-	local_irq_enable();
 
 	/* idle thread is expected to have preempt disabled */
 	preempt_disable();
 
+	local_irq_enable();
+
 	cpu_startup_entry(CPUHP_ONLINE);
 }
 
-- 
cgit v1.2.3


From 16932237f2978a2265662f8de4af743b1f55a209 Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <dave.kleikamp@oracle.com>
Date: Mon, 16 Dec 2013 15:01:00 -0600
Subject: Revert "sparc64: Fix __copy_{to,from}_user_inatomic defines."

This reverts commit 145e1c0023585e0e8f6df22316308ec61c5066b2.

This commit broke the behavior of __copy_from_user_inatomic when
it is only partially successful. Instead of returning the number
of bytes not copied, it now returns 1. This translates to the
wrong value being returned by iov_iter_copy_from_user_atomic.

xfstests generic/246 and LTP writev01 both fail on btrfs and nfs
because of this.

Signed-off-by: Dave Kleikamp <dave.kleikamp@oracle.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: sparclinux@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/include/asm/uaccess_64.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h
index e562d3caee57..ad7e178337f1 100644
--- a/arch/sparc/include/asm/uaccess_64.h
+++ b/arch/sparc/include/asm/uaccess_64.h
@@ -262,8 +262,8 @@ extern unsigned long __must_check __clear_user(void __user *, unsigned long);
 extern __must_check long strlen_user(const char __user *str);
 extern __must_check long strnlen_user(const char __user *str, long n);
 
-#define __copy_to_user_inatomic ___copy_to_user
-#define __copy_from_user_inatomic ___copy_from_user
+#define __copy_to_user_inatomic __copy_to_user
+#define __copy_from_user_inatomic __copy_from_user
 
 struct pt_regs;
 extern unsigned long compute_effective_address(struct pt_regs *,
-- 
cgit v1.2.3


From d2eca20d77d9d42f3163a0a3d6ead75ee3635f99 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Thu, 2 Jan 2014 17:14:45 +0000
Subject: CRYPTO: Fix more AES build errors

Building a multi-arch kernel results in:

arch/arm/crypto/built-in.o: In function `aesbs_xts_decrypt':
sha1_glue.c:(.text+0x15c8): undefined reference to `bsaes_xts_decrypt'
arch/arm/crypto/built-in.o: In function `aesbs_xts_encrypt':
sha1_glue.c:(.text+0x1664): undefined reference to `bsaes_xts_encrypt'
arch/arm/crypto/built-in.o: In function `aesbs_ctr_encrypt':
sha1_glue.c:(.text+0x184c): undefined reference to `bsaes_ctr32_encrypt_blocks'
arch/arm/crypto/built-in.o: In function `aesbs_cbc_decrypt':
sha1_glue.c:(.text+0x19b4): undefined reference to `bsaes_cbc_encrypt'

This code is already runtime-conditional on NEON being supported, so
there's no point compiling it out depending on the minimum build
architecture.

Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/crypto/aesbs-core.S_shipped | 2 +-
 arch/arm/crypto/bsaes-armv7.pl       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/crypto/aesbs-core.S_shipped b/arch/arm/crypto/aesbs-core.S_shipped
index 64205d453260..71e5fc7cfb18 100644
--- a/arch/arm/crypto/aesbs-core.S_shipped
+++ b/arch/arm/crypto/aesbs-core.S_shipped
@@ -58,7 +58,7 @@
 # define VFP_ABI_FRAME	0
 # define BSAES_ASM_EXTENDED_KEY
 # define XTS_CHAIN_TWEAK
-# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+# define __ARM_ARCH__	7
 #endif
 
 #ifdef __thumb__
diff --git a/arch/arm/crypto/bsaes-armv7.pl b/arch/arm/crypto/bsaes-armv7.pl
index f3d96d932573..be068db960ee 100644
--- a/arch/arm/crypto/bsaes-armv7.pl
+++ b/arch/arm/crypto/bsaes-armv7.pl
@@ -701,7 +701,7 @@ $code.=<<___;
 # define VFP_ABI_FRAME	0
 # define BSAES_ASM_EXTENDED_KEY
 # define XTS_CHAIN_TWEAK
-# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+# define __ARM_ARCH__	7
 #endif
 
 #ifdef __thumb__
-- 
cgit v1.2.3


From 29c350bf28da333e41e30497b649fe335712a2ab Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Fri, 3 Jan 2014 15:01:39 +0000
Subject: ARM: fix "bad mode in ... handler" message for undefined instructions

The array was missing the final entry for the undefined instruction
exception handler; this commit adds it.

Cc: <stable@vger.kernel.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/traps.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 7940241f0576..6eda3bf85c52 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -36,7 +36,13 @@
 #include <asm/system_misc.h>
 #include <asm/opcodes.h>
 
-static const char *handler[]= { "prefetch abort", "data abort", "address exception", "interrupt" };
+static const char *handler[]= {
+	"prefetch abort",
+	"data abort",
+	"address exception",
+	"interrupt",
+	"undefined instruction",
+};
 
 void *vectors_page;
 
-- 
cgit v1.2.3


From 0a5ccc86507f45b80831dac1049197c4d45be955 Mon Sep 17 00:00:00 2001
From: Rob Herring <rob.herring@calxeda.com>
Date: Fri, 3 Jan 2014 16:17:44 +0100
Subject: ARM: 7933/1: rename ioremap_cached to ioremap_cache

ioremap_cache is more aligned with other architectures.
There are only 2 users of this in the kernel: pxa2xx-flash and Xen.

This fixes Xen build failures on arm64:

drivers/tty/hvc/hvc_xen.c:233:2: error: implicit declaration of function 'ioremap_cached' [-Werror=implicit-function-declaration]
drivers/xen/grant-table.c:1174:3: error: implicit declaration of function 'ioremap_cached' [-Werror=implicit-function-declaration]
drivers/xen/xenbus/xenbus_probe.c:778:4: error: implicit declaration of function 'ioremap_cached' [-Werror=implicit-function-declaration]

Signed-off-by: Rob Herring <rob.herring@calxeda.com>
Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/io.h       | 2 +-
 arch/arm/include/asm/xen/page.h | 2 +-
 drivers/mtd/maps/pxa2xx-flash.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
index 3c597c222ef2..fbeb39c869e9 100644
--- a/arch/arm/include/asm/io.h
+++ b/arch/arm/include/asm/io.h
@@ -329,7 +329,7 @@ extern void _memset_io(volatile void __iomem *, int, size_t);
  */
 #define ioremap(cookie,size)		__arm_ioremap((cookie), (size), MT_DEVICE)
 #define ioremap_nocache(cookie,size)	__arm_ioremap((cookie), (size), MT_DEVICE)
-#define ioremap_cached(cookie,size)	__arm_ioremap((cookie), (size), MT_DEVICE_CACHED)
+#define ioremap_cache(cookie,size)	__arm_ioremap((cookie), (size), MT_DEVICE_CACHED)
 #define ioremap_wc(cookie,size)		__arm_ioremap((cookie), (size), MT_DEVICE_WC)
 #define iounmap				__arm_iounmap
 
diff --git a/arch/arm/include/asm/xen/page.h b/arch/arm/include/asm/xen/page.h
index 75579a9d6f76..3759cacdd7f8 100644
--- a/arch/arm/include/asm/xen/page.h
+++ b/arch/arm/include/asm/xen/page.h
@@ -117,6 +117,6 @@ static inline bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
 	return __set_phys_to_machine(pfn, mfn);
 }
 
-#define xen_remap(cookie, size) ioremap_cached((cookie), (size));
+#define xen_remap(cookie, size) ioremap_cache((cookie), (size));
 
 #endif /* _ASM_ARM_XEN_PAGE_H */
diff --git a/drivers/mtd/maps/pxa2xx-flash.c b/drivers/mtd/maps/pxa2xx-flash.c
index d210d131fef2..0f55589a56b8 100644
--- a/drivers/mtd/maps/pxa2xx-flash.c
+++ b/drivers/mtd/maps/pxa2xx-flash.c
@@ -73,7 +73,7 @@ static int pxa2xx_flash_probe(struct platform_device *pdev)
 		return -ENOMEM;
 	}
 	info->map.cached =
-		ioremap_cached(info->map.phys, info->map.size);
+		ioremap_cache(info->map.phys, info->map.size);
 	if (!info->map.cached)
 		printk(KERN_WARNING "Failed to ioremap cached %s\n",
 		       info->map.name);
-- 
cgit v1.2.3


From e9a1d0165bbda4398d4a5d71736cf6390fb42c6f Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@linux-m68k.org>
Date: Fri, 3 Jan 2014 16:43:51 +0100
Subject: metag/smp: Make boot_secondary() static

boot_secondary() is not used outside arch/metag/kernel/smp.c, hence make it
static.

Signed-off-by: Geert Uytterhoeven <geert+renesas@linux-m68k.org>
Signed-off-by: James Hogan <james.hogan@imgtec.com>
---
 arch/metag/kernel/smp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/metag/kernel/smp.c b/arch/metag/kernel/smp.c
index f74008b6da07..f006d2276f40 100644
--- a/arch/metag/kernel/smp.c
+++ b/arch/metag/kernel/smp.c
@@ -68,7 +68,7 @@ static DECLARE_COMPLETION(cpu_running);
 /*
  * "thread" is assumed to be a valid Meta hardware thread ID.
  */
-int boot_secondary(unsigned int thread, struct task_struct *idle)
+static int boot_secondary(unsigned int thread, struct task_struct *idle)
 {
 	u32 val;
 
-- 
cgit v1.2.3


From 663b55b9b39fa9c848cca273ca4e12bf29b32c71 Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Mon, 6 Jan 2014 19:20:26 -0500
Subject: x86: Delete non-required instances of include <linux/init.h>

None of these files are actually using any __init type directives
and hence don't need to include <linux/init.h>.  Most are just a
left over from __devinit and __cpuinit removal, or simply due to
code getting copied from one driver to the next.

[ hpa: undid incorrect removal from arch/x86/kernel/head_32.S ]

Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Link: http://lkml.kernel.org/r/1389054026-12947-1-git-send-email-paul.gortmaker@windriver.com
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/mce.h                           | 1 -
 arch/x86/include/asm/mpspec.h                        | 1 -
 arch/x86/include/asm/processor.h                     | 1 -
 arch/x86/include/asm/ptrace.h                        | 1 -
 arch/x86/include/asm/smp.h                           | 1 -
 arch/x86/include/asm/timer.h                         | 1 -
 arch/x86/kernel/apic/apic_flat_64.c                  | 1 -
 arch/x86/kernel/apic/apic_noop.c                     | 1 -
 arch/x86/kernel/apic/ipi.c                           | 1 -
 arch/x86/kernel/apic/summit_32.c                     | 1 -
 arch/x86/kernel/apic/x2apic_cluster.c                | 1 -
 arch/x86/kernel/apic/x2apic_phys.c                   | 1 -
 arch/x86/kernel/cpu/amd.c                            | 1 -
 arch/x86/kernel/cpu/centaur.c                        | 1 -
 arch/x86/kernel/cpu/cyrix.c                          | 1 -
 arch/x86/kernel/cpu/intel.c                          | 1 -
 arch/x86/kernel/cpu/mcheck/mce_intel.c               | 1 -
 arch/x86/kernel/cpu/mcheck/p5.c                      | 1 -
 arch/x86/kernel/cpu/mcheck/winchip.c                 | 1 -
 arch/x86/kernel/cpu/transmeta.c                      | 1 -
 arch/x86/kernel/cpu/umc.c                            | 1 -
 arch/x86/kernel/crash.c                              | 1 -
 arch/x86/kernel/doublefault.c                        | 1 -
 arch/x86/kernel/hw_breakpoint.c                      | 1 -
 arch/x86/kernel/kgdb.c                               | 1 -
 arch/x86/kernel/machine_kexec_32.c                   | 1 -
 arch/x86/kernel/pci-nommu.c                          | 1 -
 arch/x86/kernel/process_32.c                         | 1 -
 arch/x86/kernel/tsc_sync.c                           | 1 -
 arch/x86/lib/delay.c                                 | 1 -
 arch/x86/mm/kmmio.c                                  | 1 -
 arch/x86/mm/pageattr-test.c                          | 1 -
 arch/x86/pci/fixup.c                                 | 1 -
 arch/x86/platform/intel-mid/early_printk_intel_mid.c | 1 -
 arch/x86/platform/iris/iris.c                        | 1 -
 arch/x86/realmode/rm/reboot.S                        | 1 -
 arch/x86/realmode/rm/trampoline_32.S                 | 1 -
 arch/x86/realmode/rm/trampoline_64.S                 | 1 -
 arch/x86/um/vdso/vdso.S                              | 1 -
 arch/x86/vdso/vdso.S                                 | 1 -
 arch/x86/vdso/vdsox32.S                              | 1 -
 41 files changed, 41 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index c696a8687567..6e4ce2df87cf 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -118,7 +118,6 @@ extern void mce_register_decode_chain(struct notifier_block *nb);
 extern void mce_unregister_decode_chain(struct notifier_block *nb);
 
 #include <linux/percpu.h>
-#include <linux/init.h>
 #include <linux/atomic.h>
 
 extern int mce_p5_enabled;
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 3142a94c7b4b..3e6b4920ef5d 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -1,7 +1,6 @@
 #ifndef _ASM_X86_MPSPEC_H
 #define _ASM_X86_MPSPEC_H
 
-#include <linux/init.h>
 
 #include <asm/mpspec_def.h>
 #include <asm/x86_init.h>
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 7b034a4057f9..8ade61721ffb 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -27,7 +27,6 @@ struct mm_struct;
 #include <linux/cache.h>
 #include <linux/threads.h>
 #include <linux/math64.h>
-#include <linux/init.h>
 #include <linux/err.h>
 #include <linux/irqflags.h>
 
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 942a08623a1a..14fd6fd75a19 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -60,7 +60,6 @@ struct pt_regs {
 
 #endif /* !__i386__ */
 
-#include <linux/init.h>
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt_types.h>
 #endif
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 4137890e88e3..8cd27e08e23c 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -2,7 +2,6 @@
 #define _ASM_X86_SMP_H
 #ifndef __ASSEMBLY__
 #include <linux/cpumask.h>
-#include <linux/init.h>
 #include <asm/percpu.h>
 
 /*
diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h
index 34baa0eb5d0c..a6f3e776d2e4 100644
--- a/arch/x86/include/asm/timer.h
+++ b/arch/x86/include/asm/timer.h
@@ -1,6 +1,5 @@
 #ifndef _ASM_X86_TIMER_H
 #define _ASM_X86_TIMER_H
-#include <linux/init.h>
 #include <linux/pm.h>
 #include <linux/percpu.h>
 #include <linux/interrupt.h>
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 00c77cf78e9e..5d5b9eb2b7a4 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -14,7 +14,6 @@
 #include <linux/string.h>
 #include <linux/kernel.h>
 #include <linux/ctype.h>
-#include <linux/init.h>
 #include <linux/hardirq.h>
 #include <linux/module.h>
 #include <asm/smp.h>
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index e145f28b4099..191ce75c0e54 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -15,7 +15,6 @@
 #include <linux/string.h>
 #include <linux/kernel.h>
 #include <linux/ctype.h>
-#include <linux/init.h>
 #include <linux/errno.h>
 #include <asm/fixmap.h>
 #include <asm/mpspec.h>
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index 7434d8556d09..62071569bd50 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -1,6 +1,5 @@
 #include <linux/cpumask.h>
 #include <linux/interrupt.h>
-#include <linux/init.h>
 
 #include <linux/mm.h>
 #include <linux/delay.h>
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index 77c95c0e1bf7..00146f9b0254 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -29,7 +29,6 @@
 #define pr_fmt(fmt) "summit: %s: " fmt, __func__
 
 #include <linux/mm.h>
-#include <linux/init.h>
 #include <asm/io.h>
 #include <asm/bios_ebda.h>
 
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 140e29db478d..cac85ee6913f 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -3,7 +3,6 @@
 #include <linux/string.h>
 #include <linux/kernel.h>
 #include <linux/ctype.h>
-#include <linux/init.h>
 #include <linux/dmar.h>
 #include <linux/cpu.h>
 
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 562a76d433c8..de231e328cae 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -3,7 +3,6 @@
 #include <linux/string.h>
 #include <linux/kernel.h>
 #include <linux/ctype.h>
-#include <linux/init.h>
 #include <linux/dmar.h>
 
 #include <asm/smp.h>
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index bca023bdd6b2..39bc78dad377 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -1,5 +1,4 @@
 #include <linux/export.h>
-#include <linux/init.h>
 #include <linux/bitops.h>
 #include <linux/elf.h>
 #include <linux/mm.h>
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index 8d5652dc99dd..8779edab684e 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -1,6 +1,5 @@
 #include <linux/bitops.h>
 #include <linux/kernel.h>
-#include <linux/init.h>
 
 #include <asm/processor.h>
 #include <asm/e820.h>
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index d0969c75ab54..aaf152e79637 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -1,4 +1,3 @@
-#include <linux/init.h>
 #include <linux/bitops.h>
 #include <linux/delay.h>
 #include <linux/pci.h>
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index dc1ec0dff939..53f5d3c7ac09 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -1,4 +1,3 @@
-#include <linux/init.h>
 #include <linux/kernel.h>
 
 #include <linux/string.h>
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 4cfe0458ca66..fb6156fee6f7 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -6,7 +6,6 @@
  */
 
 #include <linux/gfp.h>
-#include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/percpu.h>
 #include <linux/sched.h>
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c
index 1c044b1ccc59..a3042989398c 100644
--- a/arch/x86/kernel/cpu/mcheck/p5.c
+++ b/arch/x86/kernel/cpu/mcheck/p5.c
@@ -5,7 +5,6 @@
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/types.h>
-#include <linux/init.h>
 #include <linux/smp.h>
 
 #include <asm/processor.h>
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c
index e9a701aecaa1..7dc5564d0cdf 100644
--- a/arch/x86/kernel/cpu/mcheck/winchip.c
+++ b/arch/x86/kernel/cpu/mcheck/winchip.c
@@ -5,7 +5,6 @@
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/types.h>
-#include <linux/init.h>
 
 #include <asm/processor.h>
 #include <asm/mce.h>
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c
index aa0430d69b90..3fa0e5ad86b4 100644
--- a/arch/x86/kernel/cpu/transmeta.c
+++ b/arch/x86/kernel/cpu/transmeta.c
@@ -1,6 +1,5 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
-#include <linux/init.h>
 #include <asm/processor.h>
 #include <asm/msr.h>
 #include "cpu.h"
diff --git a/arch/x86/kernel/cpu/umc.c b/arch/x86/kernel/cpu/umc.c
index 75c5ad5d35cc..ef9c2a0078bd 100644
--- a/arch/x86/kernel/cpu/umc.c
+++ b/arch/x86/kernel/cpu/umc.c
@@ -1,5 +1,4 @@
 #include <linux/kernel.h>
-#include <linux/init.h>
 #include <asm/processor.h>
 #include "cpu.h"
 
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 18677a90d6a3..a57902efe2d5 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -7,7 +7,6 @@
  *
  */
 
-#include <linux/init.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/smp.h>
diff --git a/arch/x86/kernel/doublefault.c b/arch/x86/kernel/doublefault.c
index 5d3fe8d36e4a..f6dfd9334b67 100644
--- a/arch/x86/kernel/doublefault.c
+++ b/arch/x86/kernel/doublefault.c
@@ -1,6 +1,5 @@
 #include <linux/mm.h>
 #include <linux/sched.h>
-#include <linux/init.h>
 #include <linux/init_task.h>
 #include <linux/fs.h>
 
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index f66ff162dce8..a67b47c31314 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -38,7 +38,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/sched.h>
-#include <linux/init.h>
 #include <linux/smp.h>
 
 #include <asm/hw_breakpoint.h>
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 836f8322960e..7ec1d5f8d283 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -39,7 +39,6 @@
 #include <linux/sched.h>
 #include <linux/delay.h>
 #include <linux/kgdb.h>
-#include <linux/init.h>
 #include <linux/smp.h>
 #include <linux/nmi.h>
 #include <linux/hw_breakpoint.h>
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 5b19e4d78b00..1667b1de8d5d 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -9,7 +9,6 @@
 #include <linux/mm.h>
 #include <linux/kexec.h>
 #include <linux/delay.h>
-#include <linux/init.h>
 #include <linux/numa.h>
 #include <linux/ftrace.h>
 #include <linux/suspend.h>
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index 871be4a84c7d..da15918d1c81 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -3,7 +3,6 @@
 #include <linux/dma-mapping.h>
 #include <linux/scatterlist.h>
 #include <linux/string.h>
-#include <linux/init.h>
 #include <linux/gfp.h>
 #include <linux/pci.h>
 #include <linux/mm.h>
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 6f1236c29c4b..0de43e98ce08 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -24,7 +24,6 @@
 #include <linux/interrupt.h>
 #include <linux/delay.h>
 #include <linux/reboot.h>
-#include <linux/init.h>
 #include <linux/mc146818rtc.h>
 #include <linux/module.h>
 #include <linux/kallsyms.h>
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index adfdf56a3714..26488487bc61 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -16,7 +16,6 @@
  */
 #include <linux/spinlock.h>
 #include <linux/kernel.h>
-#include <linux/init.h>
 #include <linux/smp.h>
 #include <linux/nmi.h>
 #include <asm/tsc.h>
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index 7c3bee636e2f..39d6a3db0b96 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -16,7 +16,6 @@
 #include <linux/timex.h>
 #include <linux/preempt.h>
 #include <linux/delay.h>
-#include <linux/init.h>
 
 #include <asm/processor.h>
 #include <asm/delay.h>
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index e5d5e2ce9f77..637ab34ed632 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -11,7 +11,6 @@
 #include <linux/rculist.h>
 #include <linux/spinlock.h>
 #include <linux/hash.h>
-#include <linux/init.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/uaccess.h>
diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
index d0b1773d9d2e..461bc8289024 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pageattr-test.c
@@ -8,7 +8,6 @@
 #include <linux/kthread.h>
 #include <linux/random.h>
 #include <linux/kernel.h>
-#include <linux/init.h>
 #include <linux/mm.h>
 
 #include <asm/cacheflush.h>
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index b046e070e088..bca9e85daaa5 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -5,7 +5,6 @@
 #include <linux/delay.h>
 #include <linux/dmi.h>
 #include <linux/pci.h>
-#include <linux/init.h>
 #include <linux/vgaarb.h>
 #include <asm/pci_x86.h>
 
diff --git a/arch/x86/platform/intel-mid/early_printk_intel_mid.c b/arch/x86/platform/intel-mid/early_printk_intel_mid.c
index 4f702f554f6e..e0bd082a80e0 100644
--- a/arch/x86/platform/intel-mid/early_printk_intel_mid.c
+++ b/arch/x86/platform/intel-mid/early_printk_intel_mid.c
@@ -22,7 +22,6 @@
 #include <linux/console.h>
 #include <linux/kernel.h>
 #include <linux/delay.h>
-#include <linux/init.h>
 #include <linux/io.h>
 
 #include <asm/fixmap.h>
diff --git a/arch/x86/platform/iris/iris.c b/arch/x86/platform/iris/iris.c
index e6cb80f620af..4d171e8640ef 100644
--- a/arch/x86/platform/iris/iris.c
+++ b/arch/x86/platform/iris/iris.c
@@ -27,7 +27,6 @@
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/delay.h>
-#include <linux/init.h>
 #include <linux/pm.h>
 #include <asm/io.h>
 
diff --git a/arch/x86/realmode/rm/reboot.S b/arch/x86/realmode/rm/reboot.S
index f932ea61d1c8..d66c607bdc58 100644
--- a/arch/x86/realmode/rm/reboot.S
+++ b/arch/x86/realmode/rm/reboot.S
@@ -1,5 +1,4 @@
 #include <linux/linkage.h>
-#include <linux/init.h>
 #include <asm/segment.h>
 #include <asm/page_types.h>
 #include <asm/processor-flags.h>
diff --git a/arch/x86/realmode/rm/trampoline_32.S b/arch/x86/realmode/rm/trampoline_32.S
index c1b2791183e7..48ddd76bc4c3 100644
--- a/arch/x86/realmode/rm/trampoline_32.S
+++ b/arch/x86/realmode/rm/trampoline_32.S
@@ -20,7 +20,6 @@
  */
 
 #include <linux/linkage.h>
-#include <linux/init.h>
 #include <asm/segment.h>
 #include <asm/page_types.h>
 #include "realmode.h"
diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S
index bb360dc39d21..dac7b20d2f9d 100644
--- a/arch/x86/realmode/rm/trampoline_64.S
+++ b/arch/x86/realmode/rm/trampoline_64.S
@@ -25,7 +25,6 @@
  */
 
 #include <linux/linkage.h>
-#include <linux/init.h>
 #include <asm/pgtable_types.h>
 #include <asm/page_types.h>
 #include <asm/msr.h>
diff --git a/arch/x86/um/vdso/vdso.S b/arch/x86/um/vdso/vdso.S
index 1cb468adacbb..4b4bd4cc06ab 100644
--- a/arch/x86/um/vdso/vdso.S
+++ b/arch/x86/um/vdso/vdso.S
@@ -1,4 +1,3 @@
-#include <linux/init.h>
 
 __INITDATA
 
diff --git a/arch/x86/vdso/vdso.S b/arch/x86/vdso/vdso.S
index 01f5e3b4613c..1e13eb8c9656 100644
--- a/arch/x86/vdso/vdso.S
+++ b/arch/x86/vdso/vdso.S
@@ -1,6 +1,5 @@
 #include <asm/page_types.h>
 #include <linux/linkage.h>
-#include <linux/init.h>
 
 __PAGE_ALIGNED_DATA
 
diff --git a/arch/x86/vdso/vdsox32.S b/arch/x86/vdso/vdsox32.S
index d6b9a7f42a8a..295f1c7543d8 100644
--- a/arch/x86/vdso/vdsox32.S
+++ b/arch/x86/vdso/vdsox32.S
@@ -1,6 +1,5 @@
 #include <asm/page_types.h>
 #include <linux/linkage.h>
-#include <linux/init.h>
 
 __PAGE_ALIGNED_DATA
 
-- 
cgit v1.2.3


From 6bcac805bacb3b637911244d95368512ae389abc Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Tue, 7 Jan 2014 17:53:54 +0000
Subject: Revert "ARM: 7908/1: mm: Fix the arm_dma_limit calculation"

This reverts commit 787b0d5c1ca7ff24feb6f92e4c7f4410ee7d81a8 since
it is no longer required after 7909/1 was applied, and it causes
build regressions when ARM_PATCH_PHYS_VIRT is disabled and DMA_ZONE
is enabled.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 1f7b19a47060..3e8f106ee5fe 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -229,7 +229,7 @@ void __init setup_dma_zone(const struct machine_desc *mdesc)
 #ifdef CONFIG_ZONE_DMA
 	if (mdesc->dma_zone_size) {
 		arm_dma_zone_size = mdesc->dma_zone_size;
-		arm_dma_limit = __pv_phys_offset + arm_dma_zone_size - 1;
+		arm_dma_limit = PHYS_OFFSET + arm_dma_zone_size - 1;
 	} else
 		arm_dma_limit = 0xffffffff;
 	arm_dma_pfn_limit = arm_dma_limit >> PAGE_SHIFT;
-- 
cgit v1.2.3


From f8dae00684d678afa13041ef170cecfd1297ed40 Mon Sep 17 00:00:00 2001
From: John David Anglin <dave.anglin@bell.net>
Date: Sun, 5 Jan 2014 21:25:00 -0500
Subject: parisc: Ensure full cache coherency for kmap/kunmap

Helge Deller noted a few weeks ago problems with the AIO support on
parisc. This change is the result of numerous iterations on how best to
deal with this problem.

The solution adopted here is to provide full cache coherency in a
uniform manner on all parisc systems. This involves calling
flush_dcache_page() on kmap operations and flush_kernel_dcache_page() on
kunmap operations. As a result, the copy_user_page() and
clear_user_page() functions can be removed and the overall code is
simpler.

The change ensures that both userspace and kernel aliases to a mapped
page are invalidated and flushed. This is necessary for the correct
operation of PA8800 and PA8900 based systems which do not support
inequivalent aliases.

With this change, I have observed no cache related issues on c8000 and
rp3440. It is now possible for example to do kernel builds with "-j64"
on four way systems.

On systems using XFS file systems, the patch recently posted by Mikulas
Patocka to "fix crash using XFS on loopback" is needed to avoid a hang
caused by an uninitialized lock passed to flush_dcache_page() in the
page struct.

Signed-off-by: John David Anglin <dave.anglin@bell.net>
Cc: stable@vger.kernel.org # v3.9+
Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/include/asm/cacheflush.h | 12 ++++--------
 arch/parisc/include/asm/page.h       |  5 ++---
 arch/parisc/kernel/cache.c           | 35 -----------------------------------
 3 files changed, 6 insertions(+), 46 deletions(-)

(limited to 'arch')

diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
index f0e2784e7cca..2f9b751878ba 100644
--- a/arch/parisc/include/asm/cacheflush.h
+++ b/arch/parisc/include/asm/cacheflush.h
@@ -125,42 +125,38 @@ flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vma
 void mark_rodata_ro(void);
 #endif
 
-#ifdef CONFIG_PA8X00
-/* Only pa8800, pa8900 needs this */
-
 #include <asm/kmap_types.h>
 
 #define ARCH_HAS_KMAP
 
-void kunmap_parisc(void *addr);
-
 static inline void *kmap(struct page *page)
 {
 	might_sleep();
+	flush_dcache_page(page);
 	return page_address(page);
 }
 
 static inline void kunmap(struct page *page)
 {
-	kunmap_parisc(page_address(page));
+	flush_kernel_dcache_page_addr(page_address(page));
 }
 
 static inline void *kmap_atomic(struct page *page)
 {
 	pagefault_disable();
+	flush_dcache_page(page);
 	return page_address(page);
 }
 
 static inline void __kunmap_atomic(void *addr)
 {
-	kunmap_parisc(addr);
+	flush_kernel_dcache_page_addr(addr);
 	pagefault_enable();
 }
 
 #define kmap_atomic_prot(page, prot)	kmap_atomic(page)
 #define kmap_atomic_pfn(pfn)	kmap_atomic(pfn_to_page(pfn))
 #define kmap_atomic_to_page(ptr)	virt_to_page(ptr)
-#endif
 
 #endif /* _PARISC_CACHEFLUSH_H */
 
diff --git a/arch/parisc/include/asm/page.h b/arch/parisc/include/asm/page.h
index b7adb2ac049c..c53fc63149e8 100644
--- a/arch/parisc/include/asm/page.h
+++ b/arch/parisc/include/asm/page.h
@@ -28,9 +28,8 @@ struct page;
 
 void clear_page_asm(void *page);
 void copy_page_asm(void *to, void *from);
-void clear_user_page(void *vto, unsigned long vaddr, struct page *pg);
-void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
-			   struct page *pg);
+#define clear_user_page(vto, vaddr, page) clear_page_asm(vto)
+#define copy_user_page(vto, vfrom, vaddr, page) copy_page_asm(vto, vfrom)
 
 /* #define CONFIG_PARISC_TMPALIAS */
 
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index c035673209f7..a72545554a31 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -388,41 +388,6 @@ void flush_kernel_dcache_page_addr(void *addr)
 }
 EXPORT_SYMBOL(flush_kernel_dcache_page_addr);
 
-void clear_user_page(void *vto, unsigned long vaddr, struct page *page)
-{
-	clear_page_asm(vto);
-	if (!parisc_requires_coherency())
-		flush_kernel_dcache_page_asm(vto);
-}
-EXPORT_SYMBOL(clear_user_page);
-
-void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
-	struct page *pg)
-{
-	/* Copy using kernel mapping.  No coherency is needed
-	   (all in kmap/kunmap) on machines that don't support
-	   non-equivalent aliasing.  However, the `from' page
-	   needs to be flushed before it can be accessed through
-	   the kernel mapping. */
-	preempt_disable();
-	flush_dcache_page_asm(__pa(vfrom), vaddr);
-	preempt_enable();
-	copy_page_asm(vto, vfrom);
-	if (!parisc_requires_coherency())
-		flush_kernel_dcache_page_asm(vto);
-}
-EXPORT_SYMBOL(copy_user_page);
-
-#ifdef CONFIG_PA8X00
-
-void kunmap_parisc(void *addr)
-{
-	if (parisc_requires_coherency())
-		flush_kernel_dcache_page_addr(addr);
-}
-EXPORT_SYMBOL(kunmap_parisc);
-#endif
-
 void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
 {
 	unsigned long flags;
-- 
cgit v1.2.3


From 46184415368a6095d5da33991c5e011f1084353d Mon Sep 17 00:00:00 2001
From: "David E. Box" <david.e.box@linux.intel.com>
Date: Wed, 8 Jan 2014 13:27:51 -0800
Subject: arch: x86: New MailBox support driver for Intel SOC's

Current Intel SOC cores use a MailBox Interface (MBI) to provide access to
configuration registers on devices (called units) connected to the system
fabric. This is a support driver that implements access to this interface on
those platforms that can enumerate the device using PCI. Initial support is for
BayTrail, for which port definitons are provided. This is a requirement for
implementing platform specific features (e.g. RAPL driver requires this to
perform platform specific power management using the registers in PUNIT).
Dependant modules should select IOSF_MBI in their respective Kconfig
configuraiton. Serialized access is handled by all exported routines with
spinlocks.

The API includes 3 functions for access to unit registers:

int iosf_mbi_read(u8 port, u8 opcode, u32 offset, u32 *mdr)
int iosf_mbi_write(u8 port, u8 opcode, u32 offset, u32 mdr)
int iosf_mbi_modify(u8 port, u8 opcode, u32 offset, u32 mdr, u32 mask)

port:	indicating the unit being accessed
opcode:	the read or write port specific opcode
offset:	the register offset within the port
mdr:	the register data to be read, written, or modified
mask:	bit locations in mdr to change

Returns nonzero on error

Note: GPU code handles access to the GFX unit. Therefore access to that unit
with this driver is disallowed to avoid conflicts.

Signed-off-by: David E. Box <david.e.box@linux.intel.com>
Link: http://lkml.kernel.org/r/1389216471-734-1-git-send-email-david.e.box@linux.intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Cc: Matthew Garrett <mjg59@srcf.ucam.org>
---
 arch/x86/Kconfig                |   8 ++
 arch/x86/include/asm/iosf_mbi.h |  90 ++++++++++++++++
 arch/x86/kernel/Makefile        |   1 +
 arch/x86/kernel/iosf_mbi.c      | 226 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 325 insertions(+)
 create mode 100644 arch/x86/include/asm/iosf_mbi.h
 create mode 100644 arch/x86/kernel/iosf_mbi.c

(limited to 'arch')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0952ecd60eca..ca5959a3d37e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2393,6 +2393,14 @@ config X86_DMA_REMAP
 	bool
 	depends on STA2X11
 
+config IOSF_MBI
+	bool
+	depends on PCI
+	---help---
+	  To be selected by modules requiring access to the Intel OnChip System
+	  Fabric (IOSF) Sideband MailBox Interface (MBI). For MBI platforms
+	  enumerable by PCI.
+
 source "net/Kconfig"
 
 source "drivers/Kconfig"
diff --git a/arch/x86/include/asm/iosf_mbi.h b/arch/x86/include/asm/iosf_mbi.h
new file mode 100644
index 000000000000..8e71c7941767
--- /dev/null
+++ b/arch/x86/include/asm/iosf_mbi.h
@@ -0,0 +1,90 @@
+/*
+ * iosf_mbi.h: Intel OnChip System Fabric MailBox access support
+ */
+
+#ifndef IOSF_MBI_SYMS_H
+#define IOSF_MBI_SYMS_H
+
+#define MBI_MCR_OFFSET		0xD0
+#define MBI_MDR_OFFSET		0xD4
+#define MBI_MCRX_OFFSET		0xD8
+
+#define MBI_RD_MASK		0xFEFFFFFF
+#define MBI_WR_MASK		0X01000000
+
+#define MBI_MASK_HI		0xFFFFFF00
+#define MBI_MASK_LO		0x000000FF
+#define MBI_ENABLE		0xF0
+
+/* Baytrail available units */
+#define BT_MBI_UNIT_AUNIT	0x00
+#define BT_MBI_UNIT_SMC		0x01
+#define BT_MBI_UNIT_CPU		0x02
+#define BT_MBI_UNIT_BUNIT	0x03
+#define BT_MBI_UNIT_PMC		0x04
+#define BT_MBI_UNIT_GFX		0x06
+#define BT_MBI_UNIT_SMI		0x0C
+#define BT_MBI_UNIT_USB		0x43
+#define BT_MBI_UNIT_SATA	0xA3
+#define BT_MBI_UNIT_PCIE	0xA6
+
+/* Baytrail read/write opcodes */
+#define BT_MBI_AUNIT_READ	0x10
+#define BT_MBI_AUNIT_WRITE	0x11
+#define BT_MBI_SMC_READ		0x10
+#define BT_MBI_SMC_WRITE	0x11
+#define BT_MBI_CPU_READ		0x10
+#define BT_MBI_CPU_WRITE	0x11
+#define BT_MBI_BUNIT_READ	0x10
+#define BT_MBI_BUNIT_WRITE	0x11
+#define BT_MBI_PMC_READ		0x06
+#define BT_MBI_PMC_WRITE	0x07
+#define BT_MBI_GFX_READ		0x00
+#define BT_MBI_GFX_WRITE	0x01
+#define BT_MBI_SMIO_READ	0x06
+#define BT_MBI_SMIO_WRITE	0x07
+#define BT_MBI_USB_READ		0x06
+#define BT_MBI_USB_WRITE	0x07
+#define BT_MBI_SATA_READ	0x00
+#define BT_MBI_SATA_WRITE	0x01
+#define BT_MBI_PCIE_READ	0x00
+#define BT_MBI_PCIE_WRITE	0x01
+
+/**
+ * iosf_mbi_read() - MailBox Interface read command
+ * @port:	port indicating subunit being accessed
+ * @opcode:	port specific read or write opcode
+ * @offset:	register address offset
+ * @mdr:	register data to be read
+ *
+ * Locking is handled by spinlock - cannot sleep.
+ * Return: Nonzero on error
+ */
+int iosf_mbi_read(u8 port, u8 opcode, u32 offset, u32 *mdr);
+
+/**
+ * iosf_mbi_write() - MailBox unmasked write command
+ * @port:	port indicating subunit being accessed
+ * @opcode:	port specific read or write opcode
+ * @offset:	register address offset
+ * @mdr:	register data to be written
+ *
+ * Locking is handled by spinlock - cannot sleep.
+ * Return: Nonzero on error
+ */
+int iosf_mbi_write(u8 port, u8 opcode, u32 offset, u32 mdr);
+
+/**
+ * iosf_mbi_modify() - MailBox masked write command
+ * @port:	port indicating subunit being accessed
+ * @opcode:	port specific read or write opcode
+ * @offset:	register address offset
+ * @mdr:	register data being modified
+ * @mask:	mask indicating bits in mdr to be modified
+ *
+ * Locking is handled by spinlock - cannot sleep.
+ * Return: Nonzero on error
+ */
+int iosf_mbi_modify(u8 port, u8 opcode, u32 offset, u32 mdr, u32 mask);
+
+#endif /* IOSF_MBI_SYMS_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 9b0a34e2cd79..dbe9bd65ab7b 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -111,6 +111,7 @@ obj-$(CONFIG_EFI)			+= sysfb_efi.o
 
 obj-$(CONFIG_PERF_EVENTS)		+= perf_regs.o
 obj-$(CONFIG_TRACING)			+= tracepoint.o
+obj-$(CONFIG_IOSF_MBI)			+= iosf_mbi.o
 
 ###
 # 64 bit specific files
diff --git a/arch/x86/kernel/iosf_mbi.c b/arch/x86/kernel/iosf_mbi.c
new file mode 100644
index 000000000000..c3aae6672843
--- /dev/null
+++ b/arch/x86/kernel/iosf_mbi.c
@@ -0,0 +1,226 @@
+/*
+ * IOSF-SB MailBox Interface Driver
+ * Copyright (c) 2013, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ *
+ * The IOSF-SB is a fabric bus available on Atom based SOC's that uses a
+ * mailbox interface (MBI) to communicate with mutiple devices. This
+ * driver implements access to this interface for those platforms that can
+ * enumerate the device using PCI.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+
+#include <asm/iosf_mbi.h>
+
+static DEFINE_SPINLOCK(iosf_mbi_lock);
+
+static inline u32 iosf_mbi_form_mcr(u8 op, u8 port, u8 offset)
+{
+	return (op << 24) | (port << 16) | (offset << 8) | MBI_ENABLE;
+}
+
+static struct pci_dev *mbi_pdev;	/* one mbi device */
+
+static int iosf_mbi_pci_read_mdr(u32 mcrx, u32 mcr, u32 *mdr)
+{
+	int result;
+
+	if (!mbi_pdev)
+		return -ENODEV;
+
+	if (mcrx) {
+		result = pci_write_config_dword(mbi_pdev, MBI_MCRX_OFFSET,
+						mcrx);
+		if (result < 0)
+			goto fail_read;
+	}
+
+	result = pci_write_config_dword(mbi_pdev, MBI_MCR_OFFSET, mcr);
+	if (result < 0)
+		goto fail_read;
+
+	result = pci_read_config_dword(mbi_pdev, MBI_MDR_OFFSET, mdr);
+	if (result < 0)
+		goto fail_read;
+
+	return 0;
+
+fail_read:
+	dev_err(&mbi_pdev->dev, "PCI config access failed with %d\n", result);
+	return result;
+}
+
+static int iosf_mbi_pci_write_mdr(u32 mcrx, u32 mcr, u32 mdr)
+{
+	int result;
+
+	if (!mbi_pdev)
+		return -ENODEV;
+
+	result = pci_write_config_dword(mbi_pdev, MBI_MDR_OFFSET, mdr);
+	if (result < 0)
+		goto fail_write;
+
+	if (mcrx) {
+		result = pci_write_config_dword(mbi_pdev, MBI_MCRX_OFFSET,
+						mcrx);
+		if (result < 0)
+			goto fail_write;
+	}
+
+	result = pci_write_config_dword(mbi_pdev, MBI_MCR_OFFSET, mcr);
+	if (result < 0)
+		goto fail_write;
+
+	return 0;
+
+fail_write:
+	dev_err(&mbi_pdev->dev, "PCI config access failed with %d\n", result);
+	return result;
+}
+
+int iosf_mbi_read(u8 port, u8 opcode, u32 offset, u32 *mdr)
+{
+	u32 mcr, mcrx;
+	unsigned long flags;
+	int ret;
+
+	/*Access to the GFX unit is handled by GPU code */
+	if (port == BT_MBI_UNIT_GFX) {
+		WARN_ON(1);
+		return -EPERM;
+	}
+
+	mcr = iosf_mbi_form_mcr(opcode, port, offset & MBI_MASK_LO);
+	mcrx = offset & MBI_MASK_HI;
+
+	spin_lock_irqsave(&iosf_mbi_lock, flags);
+	ret = iosf_mbi_pci_read_mdr(mcrx, mcr, mdr);
+	spin_unlock_irqrestore(&iosf_mbi_lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(iosf_mbi_read);
+
+int iosf_mbi_write(u8 port, u8 opcode, u32 offset, u32 mdr)
+{
+	u32 mcr, mcrx;
+	unsigned long flags;
+	int ret;
+
+	/*Access to the GFX unit is handled by GPU code */
+	if (port == BT_MBI_UNIT_GFX) {
+		WARN_ON(1);
+		return -EPERM;
+	}
+
+	mcr = iosf_mbi_form_mcr(opcode, port, offset & MBI_MASK_LO);
+	mcrx = offset & MBI_MASK_HI;
+
+	spin_lock_irqsave(&iosf_mbi_lock, flags);
+	ret = iosf_mbi_pci_write_mdr(mcrx, mcr, mdr);
+	spin_unlock_irqrestore(&iosf_mbi_lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(iosf_mbi_write);
+
+int iosf_mbi_modify(u8 port, u8 opcode, u32 offset, u32 mdr, u32 mask)
+{
+	u32 mcr, mcrx;
+	u32 value;
+	unsigned long flags;
+	int ret;
+
+	/*Access to the GFX unit is handled by GPU code */
+	if (port == BT_MBI_UNIT_GFX) {
+		WARN_ON(1);
+		return -EPERM;
+	}
+
+	mcr = iosf_mbi_form_mcr(opcode, port, offset & MBI_MASK_LO);
+	mcrx = offset & MBI_MASK_HI;
+
+	spin_lock_irqsave(&iosf_mbi_lock, flags);
+
+	/* Read current mdr value */
+	ret = iosf_mbi_pci_read_mdr(mcrx, mcr & MBI_RD_MASK, &value);
+	if (ret < 0) {
+		spin_unlock_irqrestore(&iosf_mbi_lock, flags);
+		return ret;
+	}
+
+	/* Apply mask */
+	value &= ~mask;
+	mdr &= mask;
+	value |= mdr;
+
+	/* Write back */
+	ret = iosf_mbi_pci_write_mdr(mcrx, mcr | MBI_WR_MASK, value);
+
+	spin_unlock_irqrestore(&iosf_mbi_lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(iosf_mbi_modify);
+
+static int iosf_mbi_probe(struct pci_dev *pdev,
+			  const struct pci_device_id *unused)
+{
+	int ret;
+
+	ret = pci_enable_device(pdev);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "error: could not enable device\n");
+		return ret;
+	}
+
+	mbi_pdev = pci_dev_get(pdev);
+	return 0;
+}
+
+static DEFINE_PCI_DEVICE_TABLE(iosf_mbi_pci_ids) = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x0F00) },
+	{ 0, },
+};
+MODULE_DEVICE_TABLE(pci, iosf_mbi_pci_ids);
+
+static struct pci_driver iosf_mbi_pci_driver = {
+	.name		= "iosf_mbi_pci",
+	.probe		= iosf_mbi_probe,
+	.id_table	= iosf_mbi_pci_ids,
+};
+
+static int __init iosf_mbi_init(void)
+{
+	return pci_register_driver(&iosf_mbi_pci_driver);
+}
+
+static void __exit iosf_mbi_exit(void)
+{
+	pci_unregister_driver(&iosf_mbi_pci_driver);
+	if (mbi_pdev) {
+		pci_dev_put(mbi_pdev);
+		mbi_pdev = NULL;
+	}
+}
+
+module_init(iosf_mbi_init);
+module_exit(iosf_mbi_exit);
+
+MODULE_AUTHOR("David E. Box <david.e.box@linux.intel.com>");
+MODULE_DESCRIPTION("IOSF Mailbox Interface accessor");
+MODULE_LICENSE("GPL v2");
-- 
cgit v1.2.3


From 1739f09e33d8f66bf48ddbc3eca615574da6c4f6 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Wed, 13 Nov 2013 15:20:04 -0500
Subject: ftrace/x86: Load ftrace_ops in parameter not the variable holding it

Function tracing callbacks expect to have the ftrace_ops that registered it
passed to them, not the address of the variable that holds the ftrace_ops
that registered it.

Use a mov instead of a lea to store the ftrace_ops into the parameter
of the function tracing callback.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Reviewed-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Link: http://lkml.kernel.org/r/20131113152004.459787f9@gandalf.local.home
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Cc: <stable@vger.kernel.org> # v3.8+
---
 arch/x86/kernel/entry_32.S | 4 ++--
 arch/x86/kernel/entry_64.S | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 51e2988c5728..a2a4f4697889 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1082,7 +1082,7 @@ ENTRY(ftrace_caller)
 	pushl $0	/* Pass NULL as regs pointer */
 	movl 4*4(%esp), %eax
 	movl 0x4(%ebp), %edx
-	leal function_trace_op, %ecx
+	movl function_trace_op, %ecx
 	subl $MCOUNT_INSN_SIZE, %eax
 
 .globl ftrace_call
@@ -1140,7 +1140,7 @@ ENTRY(ftrace_regs_caller)
 	movl 12*4(%esp), %eax	/* Load ip (1st parameter) */
 	subl $MCOUNT_INSN_SIZE, %eax	/* Adjust ip */
 	movl 0x4(%ebp), %edx	/* Load parent ip (2nd parameter) */
-	leal function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */
+	movl function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */
 	pushl %esp		/* Save pt_regs as 4th parameter */
 
 GLOBAL(ftrace_regs_call)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index e21b0785a85b..1e96c3628bf2 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -88,7 +88,7 @@ END(function_hook)
 	MCOUNT_SAVE_FRAME \skip
 
 	/* Load the ftrace_ops into the 3rd parameter */
-	leaq function_trace_op, %rdx
+	movq function_trace_op(%rip), %rdx
 
 	/* Load ip into the first parameter */
 	movq RIP(%rsp), %rdi
-- 
cgit v1.2.3


From e44ef891e9e68b6ce7d3fd3bac73b7d5433050ae Mon Sep 17 00:00:00 2001
From: Sudeep Holla <sudeep.holla@arm.com>
Date: Wed, 8 Jan 2014 18:24:21 +0100
Subject: ARM: 7934/1: DT/kernel: fix arch_match_cpu_phys_id to avoid erroneous
 match

The MPIDR contains specific bitfields(MPIDR.Aff{2..0}) which uniquely
identify a CPU, in addition to some non-identifying information and
reserved bits. The ARM cpu binding defines the 'reg' property to only
contain the affinity bits, and any cpu nodes with other bits set in
their 'reg' entry are skipped.

As such it is not necessary to mask the phys_id with MPIDR_HWID_BITMASK,
and doing so could lead to matching erroneous CPU nodes in the device
tree. This patch removes the masking of the physical identifier.

Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/devtree.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c
index 739c3dfc1da2..34d5fd585bbb 100644
--- a/arch/arm/kernel/devtree.c
+++ b/arch/arm/kernel/devtree.c
@@ -171,7 +171,7 @@ void __init arm_dt_init_cpu_maps(void)
 
 bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
 {
-	return (phys_id & MPIDR_HWID_BITMASK) == cpu_logical_map(cpu);
+	return phys_id == cpu_logical_map(cpu);
 }
 
 static const void * __init arch_get_next_mach(const char *const **match)
-- 
cgit v1.2.3


From 261521f142fb426b58ed460d2476049dc8fa4fb9 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Fri, 10 Jan 2014 00:57:06 +0100
Subject: ARM: 7937/1: perf_event: Silence sparse warning

arch/arm/kernel/perf_event_cpu.c:274:25: warning: incorrect type in assignment (different modifiers)
arch/arm/kernel/perf_event_cpu.c:274:25: expected int ( *init_fn )( ... )
arch/arm/kernel/perf_event_cpu.c:274:25: got void const *const data

Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/perf_event_cpu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index d85055cd24ba..20d553c9f5e2 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -254,7 +254,7 @@ static int probe_current_pmu(struct arm_pmu *pmu)
 static int cpu_pmu_device_probe(struct platform_device *pdev)
 {
 	const struct of_device_id *of_id;
-	int (*init_fn)(struct arm_pmu *);
+	const int (*init_fn)(struct arm_pmu *);
 	struct device_node *node = pdev->dev.of_node;
 	struct arm_pmu *pmu;
 	int ret = -ENODEV;
-- 
cgit v1.2.3


From d6cd989477e2fee29ccda257614ef7b2621d0601 Mon Sep 17 00:00:00 2001
From: Taras Kondratiuk <taras.kondratiuk@linaro.org>
Date: Fri, 10 Jan 2014 01:36:00 +0100
Subject: ARM: 7939/1: traps: fix opcode endianness when read from user memory

Currently code has an inverted logic: opcode from user memory
is swapped to a proper endianness only in case of read error.
While normally opcode should be swapped only if it was read
correctly from user memory.

Reviewed-by: Victor Kamensky <victor.kamensky@linaro.org>
Signed-off-by: Ben Dooks <ben.dooks@codethink.co.uk>
Signed-off-by: Taras Kondratiuk <taras.kondratiuk@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/traps.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 6eda3bf85c52..4636d56af2db 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -431,9 +431,10 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
 			instr2 = __mem_to_opcode_thumb16(instr2);
 			instr = __opcode_thumb32_compose(instr, instr2);
 		}
-	} else if (get_user(instr, (u32 __user *)pc)) {
+	} else {
+		if (get_user(instr, (u32 __user *)pc))
+			goto die_sig;
 		instr = __mem_to_opcode_arm(instr);
-		goto die_sig;
 	}
 
 	if (call_undef_hook(regs, instr) == 0)
-- 
cgit v1.2.3


From 26bef1318adc1b3a530ecc807ef99346db2aa8b0 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 11 Jan 2014 19:15:52 -0800
Subject: x86, fpu, amd: Clear exceptions in AMD FXSAVE workaround

Before we do an EMMS in the AMD FXSAVE information leak workaround we
need to clear any pending exceptions, otherwise we trap with a
floating-point exception inside this code.

Reported-by: halfdog <me@halfdog.net>
Tested-by: Borislav Petkov <bp@suse.de>
Link: http://lkml.kernel.org/r/CA%2B55aFxQnY_PCG_n4=0w-VG=YLXL-yr7oMxyy0WU2gCBAf3ydg@mail.gmail.com
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/fpu-internal.h | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
index c49a613c6452..cea1c76d49bf 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -293,12 +293,13 @@ static inline int restore_fpu_checking(struct task_struct *tsk)
 	/* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
 	   is pending.  Clear the x87 state here by setting it to fixed
 	   values. "m" is a random variable that should be in L1 */
-	alternative_input(
-		ASM_NOP8 ASM_NOP2,
-		"emms\n\t"		/* clear stack tags */
-		"fildl %P[addr]",	/* set F?P to defined value */
-		X86_FEATURE_FXSAVE_LEAK,
-		[addr] "m" (tsk->thread.fpu.has_fpu));
+	if (unlikely(static_cpu_has(X86_FEATURE_FXSAVE_LEAK))) {
+		asm volatile(
+			"fnclex\n\t"
+			"emms\n\t"
+			"fildl %P[addr]"	/* set F?P to defined value */
+			: : [addr] "m" (tsk->thread.fpu.has_fpu));
+	}
 
 	return fpu_restore_checking(&tsk->thread.fpu);
 }
-- 
cgit v1.2.3


From 0c3351d451ae2fa438d5d1ed719fc43354fbffbb Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Thu, 2 Jan 2014 15:11:13 -0800
Subject: seqlock: Use raw_ prefix instead of _no_lockdep
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Linus disliked the _no_lockdep() naming, so instead
use the more-consistent raw_* prefix to the non-lockdep
enabled seqcount methods.

This also adds raw_ methods for the write operations
as well, which will be utilized in a following patch.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Krzysztof Hałasa <khalasa@piap.pl>
Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Cc: Willy Tarreau <w@1wt.eu>
Link: http://lkml.kernel.org/r/1388704274-5278-1-git-send-email-john.stultz@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/vdso/vclock_gettime.c |  8 ++++----
 include/linux/seqlock.h        | 27 +++++++++++++++++++--------
 2 files changed, 23 insertions(+), 12 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 2ada505067cc..eb5d7a56f8d4 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -178,7 +178,7 @@ notrace static int __always_inline do_realtime(struct timespec *ts)
 
 	ts->tv_nsec = 0;
 	do {
-		seq = read_seqcount_begin_no_lockdep(&gtod->seq);
+		seq = raw_read_seqcount_begin(&gtod->seq);
 		mode = gtod->clock.vclock_mode;
 		ts->tv_sec = gtod->wall_time_sec;
 		ns = gtod->wall_time_snsec;
@@ -198,7 +198,7 @@ notrace static int do_monotonic(struct timespec *ts)
 
 	ts->tv_nsec = 0;
 	do {
-		seq = read_seqcount_begin_no_lockdep(&gtod->seq);
+		seq = raw_read_seqcount_begin(&gtod->seq);
 		mode = gtod->clock.vclock_mode;
 		ts->tv_sec = gtod->monotonic_time_sec;
 		ns = gtod->monotonic_time_snsec;
@@ -214,7 +214,7 @@ notrace static int do_realtime_coarse(struct timespec *ts)
 {
 	unsigned long seq;
 	do {
-		seq = read_seqcount_begin_no_lockdep(&gtod->seq);
+		seq = raw_read_seqcount_begin(&gtod->seq);
 		ts->tv_sec = gtod->wall_time_coarse.tv_sec;
 		ts->tv_nsec = gtod->wall_time_coarse.tv_nsec;
 	} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
@@ -225,7 +225,7 @@ notrace static int do_monotonic_coarse(struct timespec *ts)
 {
 	unsigned long seq;
 	do {
-		seq = read_seqcount_begin_no_lockdep(&gtod->seq);
+		seq = raw_read_seqcount_begin(&gtod->seq);
 		ts->tv_sec = gtod->monotonic_time_coarse.tv_sec;
 		ts->tv_nsec = gtod->monotonic_time_coarse.tv_nsec;
 	} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index cf87a24c0f92..535f158977b9 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -117,15 +117,15 @@ repeat:
 }
 
 /**
- * read_seqcount_begin_no_lockdep - start seq-read critical section w/o lockdep
+ * raw_read_seqcount_begin - start seq-read critical section w/o lockdep
  * @s: pointer to seqcount_t
  * Returns: count to be passed to read_seqcount_retry
  *
- * read_seqcount_begin_no_lockdep opens a read critical section of the given
+ * raw_read_seqcount_begin opens a read critical section of the given
  * seqcount, but without any lockdep checking. Validity of the critical
  * section is tested by checking read_seqcount_retry function.
  */
-static inline unsigned read_seqcount_begin_no_lockdep(const seqcount_t *s)
+static inline unsigned raw_read_seqcount_begin(const seqcount_t *s)
 {
 	unsigned ret = __read_seqcount_begin(s);
 	smp_rmb();
@@ -144,7 +144,7 @@ static inline unsigned read_seqcount_begin_no_lockdep(const seqcount_t *s)
 static inline unsigned read_seqcount_begin(const seqcount_t *s)
 {
 	seqcount_lockdep_reader_access(s);
-	return read_seqcount_begin_no_lockdep(s);
+	return raw_read_seqcount_begin(s);
 }
 
 /**
@@ -206,14 +206,26 @@ static inline int read_seqcount_retry(const seqcount_t *s, unsigned start)
 }
 
 
+
+static inline void raw_write_seqcount_begin(seqcount_t *s)
+{
+	s->sequence++;
+	smp_wmb();
+}
+
+static inline void raw_write_seqcount_end(seqcount_t *s)
+{
+	smp_wmb();
+	s->sequence++;
+}
+
 /*
  * Sequence counter only version assumes that callers are using their
  * own mutexing.
  */
 static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass)
 {
-	s->sequence++;
-	smp_wmb();
+	raw_write_seqcount_begin(s);
 	seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_);
 }
 
@@ -225,8 +237,7 @@ static inline void write_seqcount_begin(seqcount_t *s)
 static inline void write_seqcount_end(seqcount_t *s)
 {
 	seqcount_release(&s->dep_map, 1, _RET_IP_);
-	smp_wmb();
-	s->sequence++;
+	raw_write_seqcount_end(s);
 }
 
 /**
-- 
cgit v1.2.3


From f228c5b882602697a1adb50d61ff688b0df1eced Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Wed, 8 Jan 2014 11:15:53 +0100
Subject: perf/x86/intel: Add Intel RAPL PP1 energy counter support

This patch adds support for the Intel RAPL energy counter
PP1 (Power Plane 1).

On client processors, it usually corresponds to the
energy consumption of the builtin graphic card. That
is why the sysfs event is called energy-gpu.

New event:
 - name: power/energy-gpu/
 - code: event=0x4
 - unit: 2^-32 Joules

On processors without graphics, this should count 0.
The patch only enables this event on client processors.

Reviewed-by: Maria Dimakopoulou <maria.n.dimakopoulou@gmail.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Cc: ak@linux.intel.com
Cc: acme@redhat.com
Cc: jolsa@redhat.com
Cc: zheng.z.yan@intel.com
Cc: bp@alien8.de
Cc: vincent.weaver@maine.edu
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1389176153-3128-3-git-send-email-eranian@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_rapl.c | 31 ++++++++++++++++++++++-------
 1 file changed, 24 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
index 0e3754e450d9..5ad35ad94d0f 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
@@ -27,6 +27,10 @@
  *	  event: rapl_energy_dram
  *    perf code: 0x3
  *
+ * dram counter: consumption of the builtin-gpu domain (client only)
+ *	  event: rapl_energy_gpu
+ *    perf code: 0x4
+ *
  * We manage those counters as free running (read-only). They may be
  * use simultaneously by other tools, such as turbostat.
  *
@@ -55,10 +59,13 @@
 #define INTEL_RAPL_PKG		0x2	/* pseudo-encoding */
 #define RAPL_IDX_RAM_NRG_STAT	2	/* DRAM */
 #define INTEL_RAPL_RAM		0x3	/* pseudo-encoding */
+#define RAPL_IDX_PP1_NRG_STAT	3	/* DRAM */
+#define INTEL_RAPL_PP1		0x4	/* pseudo-encoding */
 
 /* Clients have PP0, PKG */
 #define RAPL_IDX_CLN	(1<<RAPL_IDX_PP0_NRG_STAT|\
-			 1<<RAPL_IDX_PKG_NRG_STAT)
+			 1<<RAPL_IDX_PKG_NRG_STAT|\
+			 1<<RAPL_IDX_PP1_NRG_STAT)
 
 /* Servers have PP0, PKG, RAM */
 #define RAPL_IDX_SRV	(1<<RAPL_IDX_PP0_NRG_STAT|\
@@ -315,6 +322,10 @@ static int rapl_pmu_event_init(struct perf_event *event)
 		bit = RAPL_IDX_RAM_NRG_STAT;
 		msr = MSR_DRAM_ENERGY_STATUS;
 		break;
+	case INTEL_RAPL_PP1:
+		bit = RAPL_IDX_PP1_NRG_STAT;
+		msr = MSR_PP1_ENERGY_STATUS;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -367,19 +378,22 @@ static struct attribute_group rapl_pmu_attr_group = {
 };
 
 EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01");
-EVENT_ATTR_STR(energy-pkg  , rapl_pkg, "event=0x02");
-EVENT_ATTR_STR(energy-ram  , rapl_ram, "event=0x03");
+EVENT_ATTR_STR(energy-pkg  ,   rapl_pkg, "event=0x02");
+EVENT_ATTR_STR(energy-ram  ,   rapl_ram, "event=0x03");
+EVENT_ATTR_STR(energy-gpu  ,   rapl_gpu, "event=0x04");
 
 EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules");
-EVENT_ATTR_STR(energy-pkg.unit  , rapl_pkg_unit, "Joules");
-EVENT_ATTR_STR(energy-ram.unit  , rapl_ram_unit, "Joules");
+EVENT_ATTR_STR(energy-pkg.unit  ,   rapl_pkg_unit, "Joules");
+EVENT_ATTR_STR(energy-ram.unit  ,   rapl_ram_unit, "Joules");
+EVENT_ATTR_STR(energy-gpu.unit  ,   rapl_gpu_unit, "Joules");
 
 /*
  * we compute in 0.23 nJ increments regardless of MSR
  */
 EVENT_ATTR_STR(energy-cores.scale, rapl_cores_scale, "2.3283064365386962890625e-10");
-EVENT_ATTR_STR(energy-pkg.scale, rapl_pkg_scale, "2.3283064365386962890625e-10");
-EVENT_ATTR_STR(energy-ram.scale, rapl_ram_scale, "2.3283064365386962890625e-10");
+EVENT_ATTR_STR(energy-pkg.scale,     rapl_pkg_scale, "2.3283064365386962890625e-10");
+EVENT_ATTR_STR(energy-ram.scale,     rapl_ram_scale, "2.3283064365386962890625e-10");
+EVENT_ATTR_STR(energy-gpu.scale,     rapl_gpu_scale, "2.3283064365386962890625e-10");
 
 static struct attribute *rapl_events_srv_attr[] = {
 	EVENT_PTR(rapl_cores),
@@ -399,12 +413,15 @@ static struct attribute *rapl_events_srv_attr[] = {
 static struct attribute *rapl_events_cln_attr[] = {
 	EVENT_PTR(rapl_cores),
 	EVENT_PTR(rapl_pkg),
+	EVENT_PTR(rapl_gpu),
 
 	EVENT_PTR(rapl_cores_unit),
 	EVENT_PTR(rapl_pkg_unit),
+	EVENT_PTR(rapl_gpu_unit),
 
 	EVENT_PTR(rapl_cores_scale),
 	EVENT_PTR(rapl_pkg_scale),
+	EVENT_PTR(rapl_gpu_scale),
 	NULL,
 };
 
-- 
cgit v1.2.3


From 1de7da377bd880ff23917f78924d0e908329d978 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 8 Nov 2013 12:01:59 +0100
Subject: arch: Move smp_mb__{before,after}_atomic_{inc,dec}.h into
 asm/atomic.h

Move the barriers functions that depend on the atomic implementation
into the atomic implementation.

Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Vineet Gupta <vgupta@synopsys.com> [for arch/arc bits]
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/20131213150640.786183683@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arc/include/asm/atomic.h      | 5 +++++
 arch/arc/include/asm/barrier.h     | 5 -----
 arch/hexagon/include/asm/atomic.h  | 6 +++++-
 arch/hexagon/include/asm/barrier.h | 4 ----
 4 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 83f03ca6caf6..03e494f695d1 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -190,6 +190,11 @@ static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr)
 
 #endif /* !CONFIG_ARC_HAS_LLSC */
 
+#define smp_mb__before_atomic_dec()	barrier()
+#define smp_mb__after_atomic_dec()	barrier()
+#define smp_mb__before_atomic_inc()	barrier()
+#define smp_mb__after_atomic_inc()	barrier()
+
 /**
  * __atomic_add_unless - add unless the number is a given value
  * @v: pointer of type atomic_t
diff --git a/arch/arc/include/asm/barrier.h b/arch/arc/include/asm/barrier.h
index f6cb7c4ffb35..c32245c3d1e9 100644
--- a/arch/arc/include/asm/barrier.h
+++ b/arch/arc/include/asm/barrier.h
@@ -30,11 +30,6 @@
 #define smp_wmb()       barrier()
 #endif
 
-#define smp_mb__before_atomic_dec()	barrier()
-#define smp_mb__after_atomic_dec()	barrier()
-#define smp_mb__before_atomic_inc()	barrier()
-#define smp_mb__after_atomic_inc()	barrier()
-
 #define smp_read_barrier_depends()      do { } while (0)
 
 #endif
diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h
index 8a64ff2337f6..7aae4cb2a29a 100644
--- a/arch/hexagon/include/asm/atomic.h
+++ b/arch/hexagon/include/asm/atomic.h
@@ -160,8 +160,12 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
 #define atomic_sub_and_test(i, v) (atomic_sub_return(i, (v)) == 0)
 #define atomic_add_negative(i, v) (atomic_add_return(i, (v)) < 0)
 
-
 #define atomic_inc_return(v) (atomic_add_return(1, v))
 #define atomic_dec_return(v) (atomic_sub_return(1, v))
 
+#define smp_mb__before_atomic_dec()	barrier()
+#define smp_mb__after_atomic_dec()	barrier()
+#define smp_mb__before_atomic_inc()	barrier()
+#define smp_mb__after_atomic_inc()	barrier()
+
 #endif
diff --git a/arch/hexagon/include/asm/barrier.h b/arch/hexagon/include/asm/barrier.h
index 1041a8e70ce8..4e863daea25b 100644
--- a/arch/hexagon/include/asm/barrier.h
+++ b/arch/hexagon/include/asm/barrier.h
@@ -29,10 +29,6 @@
 #define smp_read_barrier_depends()	barrier()
 #define smp_wmb()			barrier()
 #define smp_mb()			barrier()
-#define smp_mb__before_atomic_dec()	barrier()
-#define smp_mb__after_atomic_dec()	barrier()
-#define smp_mb__before_atomic_inc()	barrier()
-#define smp_mb__after_atomic_inc()	barrier()
 
 /*  Set a value and use a memory barrier.  Used by the scheduler somewhere.  */
 #define set_mb(var, value) \
-- 
cgit v1.2.3


From 93ea02bb84354370e51de803a9405f171f3edf88 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 6 Nov 2013 14:57:36 +0100
Subject: arch: Clean up asm/barrier.h implementations using
 asm-generic/barrier.h

We're going to be adding a few new barrier primitives, and in order to
avoid endless duplication make more agressive use of
asm-generic/barrier.h.

Change the asm-generic/barrier.h such that it allows partial barrier
definitions and fills out the rest with defaults.

There are a few architectures (m32r, m68k) that could probably
do away with their barrier.h file entirely but are kept for now due to
their unconventional nop() implementation.

Suggested-by: Geert Uytterhoeven <geert@linux-m68k.org>
Reviewed-by: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Michael Ellerman <michael@ellerman.id.au>
Cc: Michael Neuling <mikey@neuling.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Victor Kaplansky <VICTORK@il.ibm.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/r/20131213150640.846368594@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/include/asm/barrier.h      | 25 +++--------
 arch/arc/include/asm/Kbuild           |  1 +
 arch/avr32/include/asm/barrier.h      | 17 +++-----
 arch/blackfin/include/asm/barrier.h   | 18 +-------
 arch/cris/include/asm/Kbuild          |  1 +
 arch/cris/include/asm/barrier.h       | 25 -----------
 arch/frv/include/asm/barrier.h        |  8 +---
 arch/hexagon/include/asm/Kbuild       |  1 +
 arch/m32r/include/asm/barrier.h       | 80 +----------------------------------
 arch/m68k/include/asm/barrier.h       | 14 +-----
 arch/microblaze/include/asm/Kbuild    |  1 +
 arch/microblaze/include/asm/barrier.h | 27 ------------
 arch/mn10300/include/asm/Kbuild       |  1 +
 arch/mn10300/include/asm/barrier.h    | 37 ----------------
 arch/parisc/include/asm/Kbuild        |  1 +
 arch/parisc/include/asm/barrier.h     | 35 ---------------
 arch/score/include/asm/Kbuild         |  1 +
 arch/score/include/asm/barrier.h      | 16 -------
 arch/sh/include/asm/barrier.h         | 21 ++-------
 arch/sparc/include/asm/barrier_32.h   | 12 +-----
 arch/tile/include/asm/barrier.h       | 68 +----------------------------
 arch/unicore32/include/asm/barrier.h  | 11 +----
 arch/xtensa/include/asm/barrier.h     |  9 +---
 include/asm-generic/barrier.h         | 42 ++++++++++++------
 24 files changed, 58 insertions(+), 414 deletions(-)
 delete mode 100644 arch/cris/include/asm/barrier.h
 delete mode 100644 arch/microblaze/include/asm/barrier.h
 delete mode 100644 arch/mn10300/include/asm/barrier.h
 delete mode 100644 arch/parisc/include/asm/barrier.h
 delete mode 100644 arch/score/include/asm/barrier.h

(limited to 'arch')

diff --git a/arch/alpha/include/asm/barrier.h b/arch/alpha/include/asm/barrier.h
index ce8860a0b32d..3832bdb794fe 100644
--- a/arch/alpha/include/asm/barrier.h
+++ b/arch/alpha/include/asm/barrier.h
@@ -3,33 +3,18 @@
 
 #include <asm/compiler.h>
 
-#define mb() \
-__asm__ __volatile__("mb": : :"memory")
+#define mb()	__asm__ __volatile__("mb": : :"memory")
+#define rmb()	__asm__ __volatile__("mb": : :"memory")
+#define wmb()	__asm__ __volatile__("wmb": : :"memory")
 
-#define rmb() \
-__asm__ __volatile__("mb": : :"memory")
-
-#define wmb() \
-__asm__ __volatile__("wmb": : :"memory")
-
-#define read_barrier_depends() \
-__asm__ __volatile__("mb": : :"memory")
+#define read_barrier_depends() __asm__ __volatile__("mb": : :"memory")
 
 #ifdef CONFIG_SMP
 #define __ASM_SMP_MB	"\tmb\n"
-#define smp_mb()	mb()
-#define smp_rmb()	rmb()
-#define smp_wmb()	wmb()
-#define smp_read_barrier_depends()	read_barrier_depends()
 #else
 #define __ASM_SMP_MB
-#define smp_mb()	barrier()
-#define smp_rmb()	barrier()
-#define smp_wmb()	barrier()
-#define smp_read_barrier_depends()	do { } while (0)
 #endif
 
-#define set_mb(var, value) \
-do { var = value; mb(); } while (0)
+#include <asm-generic/barrier.h>
 
 #endif		/* __BARRIER_H */
diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild
index 5943f7f9d325..e07c786011af 100644
--- a/arch/arc/include/asm/Kbuild
+++ b/arch/arc/include/asm/Kbuild
@@ -47,3 +47,4 @@ generic-y += user.h
 generic-y += vga.h
 generic-y += xor.h
 generic-y += preempt.h
+generic-y += barrier.h
diff --git a/arch/avr32/include/asm/barrier.h b/arch/avr32/include/asm/barrier.h
index 0961275373db..715100790fd0 100644
--- a/arch/avr32/include/asm/barrier.h
+++ b/arch/avr32/include/asm/barrier.h
@@ -8,22 +8,15 @@
 #ifndef __ASM_AVR32_BARRIER_H
 #define __ASM_AVR32_BARRIER_H
 
-#define nop()			asm volatile("nop")
-
-#define mb()			asm volatile("" : : : "memory")
-#define rmb()			mb()
-#define wmb()			asm volatile("sync 0" : : : "memory")
-#define read_barrier_depends()  do { } while(0)
-#define set_mb(var, value)      do { var = value; mb(); } while(0)
+/*
+ * Weirdest thing ever.. no full barrier, but it has a write barrier!
+ */
+#define wmb()	asm volatile("sync 0" : : : "memory")
 
 #ifdef CONFIG_SMP
 # error "The AVR32 port does not support SMP"
-#else
-# define smp_mb()		barrier()
-# define smp_rmb()		barrier()
-# define smp_wmb()		barrier()
-# define smp_read_barrier_depends() do { } while(0)
 #endif
 
+#include <asm-generic/barrier.h>
 
 #endif /* __ASM_AVR32_BARRIER_H */
diff --git a/arch/blackfin/include/asm/barrier.h b/arch/blackfin/include/asm/barrier.h
index ebb189507dd7..19283a16ac08 100644
--- a/arch/blackfin/include/asm/barrier.h
+++ b/arch/blackfin/include/asm/barrier.h
@@ -23,26 +23,10 @@
 # define rmb()	do { barrier(); smp_check_barrier(); } while (0)
 # define wmb()	do { barrier(); smp_mark_barrier(); } while (0)
 # define read_barrier_depends()	do { barrier(); smp_check_barrier(); } while (0)
-#else
-# define mb()	barrier()
-# define rmb()	barrier()
-# define wmb()	barrier()
-# define read_barrier_depends()	do { } while (0)
 #endif
 
-#else /* !CONFIG_SMP */
-
-#define mb()	barrier()
-#define rmb()	barrier()
-#define wmb()	barrier()
-#define read_barrier_depends()	do { } while (0)
-
 #endif /* !CONFIG_SMP */
 
-#define smp_mb()  mb()
-#define smp_rmb() rmb()
-#define smp_wmb() wmb()
-#define set_mb(var, value) do { var = value; mb(); } while (0)
-#define smp_read_barrier_depends()	read_barrier_depends()
+#include <asm-generic/barrier.h>
 
 #endif /* _BLACKFIN_BARRIER_H */
diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild
index b06caf649a95..35ec2e5ca832 100644
--- a/arch/cris/include/asm/Kbuild
+++ b/arch/cris/include/asm/Kbuild
@@ -12,3 +12,4 @@ generic-y += trace_clock.h
 generic-y += vga.h
 generic-y += xor.h
 generic-y += preempt.h
+generic-y += barrier.h
diff --git a/arch/cris/include/asm/barrier.h b/arch/cris/include/asm/barrier.h
deleted file mode 100644
index 198ad7fa6b25..000000000000
--- a/arch/cris/include/asm/barrier.h
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifndef __ASM_CRIS_BARRIER_H
-#define __ASM_CRIS_BARRIER_H
-
-#define nop() __asm__ __volatile__ ("nop");
-
-#define barrier() __asm__ __volatile__("": : :"memory")
-#define mb() barrier()
-#define rmb() mb()
-#define wmb() mb()
-#define read_barrier_depends() do { } while(0)
-#define set_mb(var, value)  do { var = value; mb(); } while (0)
-
-#ifdef CONFIG_SMP
-#define smp_mb()        mb()
-#define smp_rmb()       rmb()
-#define smp_wmb()       wmb()
-#define smp_read_barrier_depends()     read_barrier_depends()
-#else
-#define smp_mb()        barrier()
-#define smp_rmb()       barrier()
-#define smp_wmb()       barrier()
-#define smp_read_barrier_depends()     do { } while(0)
-#endif
-
-#endif /* __ASM_CRIS_BARRIER_H */
diff --git a/arch/frv/include/asm/barrier.h b/arch/frv/include/asm/barrier.h
index 06776ad9f5e9..abbef470154c 100644
--- a/arch/frv/include/asm/barrier.h
+++ b/arch/frv/include/asm/barrier.h
@@ -17,13 +17,7 @@
 #define mb()			asm volatile ("membar" : : :"memory")
 #define rmb()			asm volatile ("membar" : : :"memory")
 #define wmb()			asm volatile ("membar" : : :"memory")
-#define read_barrier_depends()	do { } while (0)
 
-#define smp_mb()			barrier()
-#define smp_rmb()			barrier()
-#define smp_wmb()			barrier()
-#define smp_read_barrier_depends()	do {} while(0)
-#define set_mb(var, value) \
-	do { var = (value); barrier(); } while (0)
+#include <asm-generic/barrier.h>
 
 #endif /* _ASM_BARRIER_H */
diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild
index 67c3450309b7..a614ec9747a6 100644
--- a/arch/hexagon/include/asm/Kbuild
+++ b/arch/hexagon/include/asm/Kbuild
@@ -54,3 +54,4 @@ generic-y += ucontext.h
 generic-y += unaligned.h
 generic-y += xor.h
 generic-y += preempt.h
+generic-y += barrier.h
diff --git a/arch/m32r/include/asm/barrier.h b/arch/m32r/include/asm/barrier.h
index 6976621efd3f..1a40265e8d88 100644
--- a/arch/m32r/include/asm/barrier.h
+++ b/arch/m32r/include/asm/barrier.h
@@ -11,84 +11,6 @@
 
 #define nop()  __asm__ __volatile__ ("nop" : : )
 
-/*
- * Memory barrier.
- *
- * mb() prevents loads and stores being reordered across this point.
- * rmb() prevents loads being reordered across this point.
- * wmb() prevents stores being reordered across this point.
- */
-#define mb()   barrier()
-#define rmb()  mb()
-#define wmb()  mb()
-
-/**
- * read_barrier_depends - Flush all pending reads that subsequents reads
- * depend on.
- *
- * No data-dependent reads from memory-like regions are ever reordered
- * over this barrier.  All reads preceding this primitive are guaranteed
- * to access memory (but not necessarily other CPUs' caches) before any
- * reads following this primitive that depend on the data return by
- * any of the preceding reads.  This primitive is much lighter weight than
- * rmb() on most CPUs, and is never heavier weight than is
- * rmb().
- *
- * These ordering constraints are respected by both the local CPU
- * and the compiler.
- *
- * Ordering is not guaranteed by anything other than these primitives,
- * not even by data dependencies.  See the documentation for
- * memory_barrier() for examples and URLs to more information.
- *
- * For example, the following code would force ordering (the initial
- * value of "a" is zero, "b" is one, and "p" is "&a"):
- *
- * <programlisting>
- *      CPU 0                           CPU 1
- *
- *      b = 2;
- *      memory_barrier();
- *      p = &b;                         q = p;
- *                                      read_barrier_depends();
- *                                      d = *q;
- * </programlisting>
- *
- *
- * because the read of "*q" depends on the read of "p" and these
- * two reads are separated by a read_barrier_depends().  However,
- * the following code, with the same initial values for "a" and "b":
- *
- * <programlisting>
- *      CPU 0                           CPU 1
- *
- *      a = 2;
- *      memory_barrier();
- *      b = 3;                          y = b;
- *                                      read_barrier_depends();
- *                                      x = a;
- * </programlisting>
- *
- * does not enforce ordering, since there is no data dependency between
- * the read of "a" and the read of "b".  Therefore, on some CPUs, such
- * as Alpha, "y" could be set to 3 and "x" to 0.  Use rmb()
- * in cases like this where there are no data dependencies.
- **/
-
-#define read_barrier_depends()	do { } while (0)
-
-#ifdef CONFIG_SMP
-#define smp_mb()	mb()
-#define smp_rmb()	rmb()
-#define smp_wmb()	wmb()
-#define smp_read_barrier_depends()	read_barrier_depends()
-#define set_mb(var, value) do { (void) xchg(&var, value); } while (0)
-#else
-#define smp_mb()	barrier()
-#define smp_rmb()	barrier()
-#define smp_wmb()	barrier()
-#define smp_read_barrier_depends()	do { } while (0)
-#define set_mb(var, value) do { var = value; barrier(); } while (0)
-#endif
+#include <asm-generic/barrier.h>
 
 #endif /* _ASM_M32R_BARRIER_H */
diff --git a/arch/m68k/include/asm/barrier.h b/arch/m68k/include/asm/barrier.h
index 445ce22c23cb..15c5f77c1614 100644
--- a/arch/m68k/include/asm/barrier.h
+++ b/arch/m68k/include/asm/barrier.h
@@ -1,20 +1,8 @@
 #ifndef _M68K_BARRIER_H
 #define _M68K_BARRIER_H
 
-/*
- * Force strict CPU ordering.
- * Not really required on m68k...
- */
 #define nop()		do { asm volatile ("nop"); barrier(); } while (0)
-#define mb()		barrier()
-#define rmb()		barrier()
-#define wmb()		barrier()
-#define read_barrier_depends()	((void)0)
-#define set_mb(var, value)	({ (var) = (value); wmb(); })
 
-#define smp_mb()	barrier()
-#define smp_rmb()	barrier()
-#define smp_wmb()	barrier()
-#define smp_read_barrier_depends()	((void)0)
+#include <asm-generic/barrier.h>
 
 #endif /* _M68K_BARRIER_H */
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index ce0bbf8f5640..f77fb6630b11 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -4,3 +4,4 @@ generic-y += exec.h
 generic-y += trace_clock.h
 generic-y += syscalls.h
 generic-y += preempt.h
+generic-y += barrier.h
diff --git a/arch/microblaze/include/asm/barrier.h b/arch/microblaze/include/asm/barrier.h
deleted file mode 100644
index df5be3e87044..000000000000
--- a/arch/microblaze/include/asm/barrier.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright (C) 2006 Atmark Techno, Inc.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-
-#ifndef _ASM_MICROBLAZE_BARRIER_H
-#define _ASM_MICROBLAZE_BARRIER_H
-
-#define nop()                  asm volatile ("nop")
-
-#define smp_read_barrier_depends()	do {} while (0)
-#define read_barrier_depends()		do {} while (0)
-
-#define mb()			barrier()
-#define rmb()			mb()
-#define wmb()			mb()
-#define set_mb(var, value)	do { var = value; mb(); } while (0)
-#define set_wmb(var, value)	do { var = value; wmb(); } while (0)
-
-#define smp_mb()		mb()
-#define smp_rmb()		rmb()
-#define smp_wmb()		wmb()
-
-#endif /* _ASM_MICROBLAZE_BARRIER_H */
diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild
index 74742dc6a3da..367ef399ddf7 100644
--- a/arch/mn10300/include/asm/Kbuild
+++ b/arch/mn10300/include/asm/Kbuild
@@ -3,3 +3,4 @@ generic-y += clkdev.h
 generic-y += exec.h
 generic-y += trace_clock.h
 generic-y += preempt.h
+generic-y += barrier.h
diff --git a/arch/mn10300/include/asm/barrier.h b/arch/mn10300/include/asm/barrier.h
deleted file mode 100644
index 2bd97a5c8af7..000000000000
--- a/arch/mn10300/include/asm/barrier.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/* MN10300 memory barrier definitions
- *
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- */
-#ifndef _ASM_BARRIER_H
-#define _ASM_BARRIER_H
-
-#define nop()	asm volatile ("nop")
-
-#define mb()	asm volatile ("": : :"memory")
-#define rmb()	mb()
-#define wmb()	asm volatile ("": : :"memory")
-
-#ifdef CONFIG_SMP
-#define smp_mb()	mb()
-#define smp_rmb()	rmb()
-#define smp_wmb()	wmb()
-#define set_mb(var, value)  do { xchg(&var, value); } while (0)
-#else  /* CONFIG_SMP */
-#define smp_mb()	barrier()
-#define smp_rmb()	barrier()
-#define smp_wmb()	barrier()
-#define set_mb(var, value)  do { var = value;  mb(); } while (0)
-#endif /* CONFIG_SMP */
-
-#define set_wmb(var, value) do { var = value; wmb(); } while (0)
-
-#define read_barrier_depends()		do {} while (0)
-#define smp_read_barrier_depends()	do {} while (0)
-
-#endif /* _ASM_BARRIER_H */
diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild
index a603b9ebe54c..8df06d0074f4 100644
--- a/arch/parisc/include/asm/Kbuild
+++ b/arch/parisc/include/asm/Kbuild
@@ -5,3 +5,4 @@ generic-y += word-at-a-time.h auxvec.h user.h cputime.h emergency-restart.h \
 	  poll.h xor.h clkdev.h exec.h
 generic-y += trace_clock.h
 generic-y += preempt.h
+generic-y += barrier.h
diff --git a/arch/parisc/include/asm/barrier.h b/arch/parisc/include/asm/barrier.h
deleted file mode 100644
index e77d834aa803..000000000000
--- a/arch/parisc/include/asm/barrier.h
+++ /dev/null
@@ -1,35 +0,0 @@
-#ifndef __PARISC_BARRIER_H
-#define __PARISC_BARRIER_H
-
-/*
-** This is simply the barrier() macro from linux/kernel.h but when serial.c
-** uses tqueue.h uses smp_mb() defined using barrier(), linux/kernel.h
-** hasn't yet been included yet so it fails, thus repeating the macro here.
-**
-** PA-RISC architecture allows for weakly ordered memory accesses although
-** none of the processors use it. There is a strong ordered bit that is
-** set in the O-bit of the page directory entry. Operating systems that
-** can not tolerate out of order accesses should set this bit when mapping
-** pages. The O-bit of the PSW should also be set to 1 (I don't believe any
-** of the processor implemented the PSW O-bit). The PCX-W ERS states that
-** the TLB O-bit is not implemented so the page directory does not need to
-** have the O-bit set when mapping pages (section 3.1). This section also
-** states that the PSW Y, Z, G, and O bits are not implemented.
-** So it looks like nothing needs to be done for parisc-linux (yet).
-** (thanks to chada for the above comment -ggg)
-**
-** The __asm__ op below simple prevents gcc/ld from reordering
-** instructions across the mb() "call".
-*/
-#define mb()		__asm__ __volatile__("":::"memory")	/* barrier() */
-#define rmb()		mb()
-#define wmb()		mb()
-#define smp_mb()	mb()
-#define smp_rmb()	mb()
-#define smp_wmb()	mb()
-#define smp_read_barrier_depends()	do { } while(0)
-#define read_barrier_depends()		do { } while(0)
-
-#define set_mb(var, value)		do { var = value; mb(); } while (0)
-
-#endif /* __PARISC_BARRIER_H */
diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild
index f3414ade77a3..ee2993b6e5d1 100644
--- a/arch/score/include/asm/Kbuild
+++ b/arch/score/include/asm/Kbuild
@@ -5,3 +5,4 @@ generic-y += clkdev.h
 generic-y += trace_clock.h
 generic-y += xor.h
 generic-y += preempt.h
+generic-y += barrier.h
diff --git a/arch/score/include/asm/barrier.h b/arch/score/include/asm/barrier.h
deleted file mode 100644
index 0eacb6471e6d..000000000000
--- a/arch/score/include/asm/barrier.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef _ASM_SCORE_BARRIER_H
-#define _ASM_SCORE_BARRIER_H
-
-#define mb()		barrier()
-#define rmb()		barrier()
-#define wmb()		barrier()
-#define smp_mb()	barrier()
-#define smp_rmb()	barrier()
-#define smp_wmb()	barrier()
-
-#define read_barrier_depends()		do {} while (0)
-#define smp_read_barrier_depends()	do {} while (0)
-
-#define set_mb(var, value) 		do {var = value; wmb(); } while (0)
-
-#endif /* _ASM_SCORE_BARRIER_H */
diff --git a/arch/sh/include/asm/barrier.h b/arch/sh/include/asm/barrier.h
index 72c103dae300..43715308b068 100644
--- a/arch/sh/include/asm/barrier.h
+++ b/arch/sh/include/asm/barrier.h
@@ -26,29 +26,14 @@
 #if defined(CONFIG_CPU_SH4A) || defined(CONFIG_CPU_SH5)
 #define mb()		__asm__ __volatile__ ("synco": : :"memory")
 #define rmb()		mb()
-#define wmb()		__asm__ __volatile__ ("synco": : :"memory")
+#define wmb()		mb()
 #define ctrl_barrier()	__icbi(PAGE_OFFSET)
-#define read_barrier_depends()	do { } while(0)
 #else
-#define mb()		__asm__ __volatile__ ("": : :"memory")
-#define rmb()		mb()
-#define wmb()		__asm__ __volatile__ ("": : :"memory")
 #define ctrl_barrier()	__asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop")
-#define read_barrier_depends()	do { } while(0)
-#endif
-
-#ifdef CONFIG_SMP
-#define smp_mb()	mb()
-#define smp_rmb()	rmb()
-#define smp_wmb()	wmb()
-#define smp_read_barrier_depends()	read_barrier_depends()
-#else
-#define smp_mb()	barrier()
-#define smp_rmb()	barrier()
-#define smp_wmb()	barrier()
-#define smp_read_barrier_depends()	do { } while(0)
 #endif
 
 #define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
 
+#include <asm-generic/barrier.h>
+
 #endif /* __ASM_SH_BARRIER_H */
diff --git a/arch/sparc/include/asm/barrier_32.h b/arch/sparc/include/asm/barrier_32.h
index c1b76654ee76..ae69eda288f4 100644
--- a/arch/sparc/include/asm/barrier_32.h
+++ b/arch/sparc/include/asm/barrier_32.h
@@ -1,15 +1,7 @@
 #ifndef __SPARC_BARRIER_H
 #define __SPARC_BARRIER_H
 
-/* XXX Change this if we ever use a PSO mode kernel. */
-#define mb()	__asm__ __volatile__ ("" : : : "memory")
-#define rmb()	mb()
-#define wmb()	mb()
-#define read_barrier_depends()	do { } while(0)
-#define set_mb(__var, __value)  do { __var = __value; mb(); } while(0)
-#define smp_mb()	__asm__ __volatile__("":::"memory")
-#define smp_rmb()	__asm__ __volatile__("":::"memory")
-#define smp_wmb()	__asm__ __volatile__("":::"memory")
-#define smp_read_barrier_depends()	do { } while(0)
+#include <asm/processor.h> /* for nop() */
+#include <asm-generic/barrier.h>
 
 #endif /* !(__SPARC_BARRIER_H) */
diff --git a/arch/tile/include/asm/barrier.h b/arch/tile/include/asm/barrier.h
index a9a73da5865d..b5a05d050a8f 100644
--- a/arch/tile/include/asm/barrier.h
+++ b/arch/tile/include/asm/barrier.h
@@ -22,59 +22,6 @@
 #include <arch/spr_def.h>
 #include <asm/timex.h>
 
-/*
- * read_barrier_depends - Flush all pending reads that subsequents reads
- * depend on.
- *
- * No data-dependent reads from memory-like regions are ever reordered
- * over this barrier.  All reads preceding this primitive are guaranteed
- * to access memory (but not necessarily other CPUs' caches) before any
- * reads following this primitive that depend on the data return by
- * any of the preceding reads.  This primitive is much lighter weight than
- * rmb() on most CPUs, and is never heavier weight than is
- * rmb().
- *
- * These ordering constraints are respected by both the local CPU
- * and the compiler.
- *
- * Ordering is not guaranteed by anything other than these primitives,
- * not even by data dependencies.  See the documentation for
- * memory_barrier() for examples and URLs to more information.
- *
- * For example, the following code would force ordering (the initial
- * value of "a" is zero, "b" is one, and "p" is "&a"):
- *
- * <programlisting>
- *	CPU 0				CPU 1
- *
- *	b = 2;
- *	memory_barrier();
- *	p = &b;				q = p;
- *					read_barrier_depends();
- *					d = *q;
- * </programlisting>
- *
- * because the read of "*q" depends on the read of "p" and these
- * two reads are separated by a read_barrier_depends().  However,
- * the following code, with the same initial values for "a" and "b":
- *
- * <programlisting>
- *	CPU 0				CPU 1
- *
- *	a = 2;
- *	memory_barrier();
- *	b = 3;				y = b;
- *					read_barrier_depends();
- *					x = a;
- * </programlisting>
- *
- * does not enforce ordering, since there is no data dependency between
- * the read of "a" and the read of "b".  Therefore, on some CPUs, such
- * as Alpha, "y" could be set to 3 and "x" to 0.  Use rmb()
- * in cases like this where there are no data dependencies.
- */
-#define read_barrier_depends()	do { } while (0)
-
 #define __sync()	__insn_mf()
 
 #include <hv/syscall_public.h>
@@ -125,20 +72,7 @@ mb_incoherent(void)
 #define mb()		fast_mb()
 #define iob()		fast_iob()
 
-#ifdef CONFIG_SMP
-#define smp_mb()	mb()
-#define smp_rmb()	rmb()
-#define smp_wmb()	wmb()
-#define smp_read_barrier_depends()	read_barrier_depends()
-#else
-#define smp_mb()	barrier()
-#define smp_rmb()	barrier()
-#define smp_wmb()	barrier()
-#define smp_read_barrier_depends()	do { } while (0)
-#endif
-
-#define set_mb(var, value) \
-	do { var = value; mb(); } while (0)
+#include <asm-generic/barrier.h>
 
 #endif /* !__ASSEMBLY__ */
 #endif /* _ASM_TILE_BARRIER_H */
diff --git a/arch/unicore32/include/asm/barrier.h b/arch/unicore32/include/asm/barrier.h
index a6620e5336b6..83d6a520f4bd 100644
--- a/arch/unicore32/include/asm/barrier.h
+++ b/arch/unicore32/include/asm/barrier.h
@@ -14,15 +14,6 @@
 #define dsb() __asm__ __volatile__ ("" : : : "memory")
 #define dmb() __asm__ __volatile__ ("" : : : "memory")
 
-#define mb()				barrier()
-#define rmb()				barrier()
-#define wmb()				barrier()
-#define smp_mb()			barrier()
-#define smp_rmb()			barrier()
-#define smp_wmb()			barrier()
-#define read_barrier_depends()		do { } while (0)
-#define smp_read_barrier_depends()	do { } while (0)
-
-#define set_mb(var, value)		do { var = value; smp_mb(); } while (0)
+#include <asm-generic/barrier.h>
 
 #endif /* __UNICORE_BARRIER_H__ */
diff --git a/arch/xtensa/include/asm/barrier.h b/arch/xtensa/include/asm/barrier.h
index ef021677d536..e1ee6b51dfc5 100644
--- a/arch/xtensa/include/asm/barrier.h
+++ b/arch/xtensa/include/asm/barrier.h
@@ -9,21 +9,14 @@
 #ifndef _XTENSA_SYSTEM_H
 #define _XTENSA_SYSTEM_H
 
-#define smp_read_barrier_depends() do { } while(0)
-#define read_barrier_depends() do { } while(0)
-
 #define mb()  ({ __asm__ __volatile__("memw" : : : "memory"); })
 #define rmb() barrier()
 #define wmb() mb()
 
 #ifdef CONFIG_SMP
 #error smp_* not defined
-#else
-#define smp_mb()	barrier()
-#define smp_rmb()	barrier()
-#define smp_wmb()	barrier()
 #endif
 
-#define set_mb(var, value)	do { var = value; mb(); } while (0)
+#include <asm-generic/barrier.h>
 
 #endif /* _XTENSA_SYSTEM_H */
diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h
index 639d7a4d033b..d12a90f93689 100644
--- a/include/asm-generic/barrier.h
+++ b/include/asm-generic/barrier.h
@@ -1,4 +1,5 @@
-/* Generic barrier definitions, based on MN10300 definitions.
+/*
+ * Generic barrier definitions, originally based on MN10300 definitions.
  *
  * It should be possible to use these on really simple architectures,
  * but it serves more as a starting point for new ports.
@@ -16,35 +17,50 @@
 
 #ifndef __ASSEMBLY__
 
-#define nop() asm volatile ("nop")
+#include <linux/compiler.h>
+
+#ifndef nop
+#define nop()	asm volatile ("nop")
+#endif
 
 /*
- * Force strict CPU ordering.
- * And yes, this is required on UP too when we're talking
- * to devices.
+ * Force strict CPU ordering. And yes, this is required on UP too when we're
+ * talking to devices.
  *
- * This implementation only contains a compiler barrier.
+ * Fall back to compiler barriers if nothing better is provided.
  */
 
-#define mb()	asm volatile ("": : :"memory")
+#ifndef mb
+#define mb()	barrier()
+#endif
+
+#ifndef rmb
 #define rmb()	mb()
-#define wmb()	asm volatile ("": : :"memory")
+#endif
+
+#ifndef wmb
+#define wmb()	mb()
+#endif
+
+#ifndef read_barrier_depends
+#define read_barrier_depends()		do { } while (0)
+#endif
 
 #ifdef CONFIG_SMP
 #define smp_mb()	mb()
 #define smp_rmb()	rmb()
 #define smp_wmb()	wmb()
+#define smp_read_barrier_depends()	read_barrier_depends()
 #else
 #define smp_mb()	barrier()
 #define smp_rmb()	barrier()
 #define smp_wmb()	barrier()
+#define smp_read_barrier_depends()	do { } while (0)
 #endif
 
-#define set_mb(var, value)  do { var = value;  mb(); } while (0)
-#define set_wmb(var, value) do { var = value; wmb(); } while (0)
-
-#define read_barrier_depends()		do {} while (0)
-#define smp_read_barrier_depends()	do {} while (0)
+#ifndef set_mb
+#define set_mb(var, value)  do { (var) = (value); mb(); } while (0)
+#endif
 
 #endif /* !__ASSEMBLY__ */
 #endif /* __ASM_GENERIC_BARRIER_H */
-- 
cgit v1.2.3


From 47933ad41a86a4a9b50bed7c9b9bd2ba242aac63 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 6 Nov 2013 14:57:36 +0100
Subject: arch: Introduce smp_load_acquire(), smp_store_release()

A number of situations currently require the heavyweight smp_mb(),
even though there is no need to order prior stores against later
loads.  Many architectures have much cheaper ways to handle these
situations, but the Linux kernel currently has no portable way
to make use of them.

This commit therefore supplies smp_load_acquire() and
smp_store_release() to remedy this situation.  The new
smp_load_acquire() primitive orders the specified load against
any subsequent reads or writes, while the new smp_store_release()
primitive orders the specifed store against any prior reads or
writes.  These primitives allow array-based circular FIFOs to be
implemented without an smp_mb(), and also allow a theoretical
hole in rcu_assign_pointer() to be closed at no additional
expense on most architectures.

In addition, the RCU experience transitioning from explicit
smp_read_barrier_depends() and smp_wmb() to rcu_dereference()
and rcu_assign_pointer(), respectively resulted in substantial
improvements in readability.  It therefore seems likely that
replacing other explicit barriers with smp_load_acquire() and
smp_store_release() will provide similar benefits.  It appears
that roughly half of the explicit barriers in core kernel code
might be so replaced.

[Changelog by PaulMck]

Reviewed-by: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Michael Ellerman <michael@ellerman.id.au>
Cc: Michael Neuling <mikey@neuling.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Victor Kaplansky <VICTORK@il.ibm.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Link: http://lkml.kernel.org/r/20131213150640.908486364@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm/include/asm/barrier.h      | 15 +++++++++++
 arch/arm64/include/asm/barrier.h    | 50 +++++++++++++++++++++++++++++++++++++
 arch/ia64/include/asm/barrier.h     | 23 +++++++++++++++++
 arch/metag/include/asm/barrier.h    | 15 +++++++++++
 arch/mips/include/asm/barrier.h     | 15 +++++++++++
 arch/powerpc/include/asm/barrier.h  | 21 +++++++++++++++-
 arch/s390/include/asm/barrier.h     | 15 +++++++++++
 arch/sparc/include/asm/barrier_64.h | 15 +++++++++++
 arch/x86/include/asm/barrier.h      | 43 ++++++++++++++++++++++++++++++-
 include/asm-generic/barrier.h       | 15 +++++++++++
 include/linux/compiler.h            |  9 +++++++
 11 files changed, 234 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h
index 60f15e274e6d..2f59f7443396 100644
--- a/arch/arm/include/asm/barrier.h
+++ b/arch/arm/include/asm/barrier.h
@@ -59,6 +59,21 @@
 #define smp_wmb()	dmb(ishst)
 #endif
 
+#define smp_store_release(p, v)						\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	smp_mb();							\
+	ACCESS_ONCE(*p) = (v);						\
+} while (0)
+
+#define smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	compiletime_assert_atomic_type(*p);				\
+	smp_mb();							\
+	___p1;								\
+})
+
 #define read_barrier_depends()		do { } while(0)
 #define smp_read_barrier_depends()	do { } while(0)
 
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index d4a63338a53c..78e20ba8806b 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -35,10 +35,60 @@
 #define smp_mb()	barrier()
 #define smp_rmb()	barrier()
 #define smp_wmb()	barrier()
+
+#define smp_store_release(p, v)						\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	smp_mb();							\
+	ACCESS_ONCE(*p) = (v);						\
+} while (0)
+
+#define smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	compiletime_assert_atomic_type(*p);				\
+	smp_mb();							\
+	___p1;								\
+})
+
 #else
+
 #define smp_mb()	asm volatile("dmb ish" : : : "memory")
 #define smp_rmb()	asm volatile("dmb ishld" : : : "memory")
 #define smp_wmb()	asm volatile("dmb ishst" : : : "memory")
+
+#define smp_store_release(p, v)						\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	switch (sizeof(*p)) {						\
+	case 4:								\
+		asm volatile ("stlr %w1, %0"				\
+				: "=Q" (*p) : "r" (v) : "memory");	\
+		break;							\
+	case 8:								\
+		asm volatile ("stlr %1, %0"				\
+				: "=Q" (*p) : "r" (v) : "memory");	\
+		break;							\
+	}								\
+} while (0)
+
+#define smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1;						\
+	compiletime_assert_atomic_type(*p);				\
+	switch (sizeof(*p)) {						\
+	case 4:								\
+		asm volatile ("ldar %w0, %1"				\
+			: "=r" (___p1) : "Q" (*p) : "memory");		\
+		break;							\
+	case 8:								\
+		asm volatile ("ldar %0, %1"				\
+			: "=r" (___p1) : "Q" (*p) : "memory");		\
+		break;							\
+	}								\
+	___p1;								\
+})
+
 #endif
 
 #define read_barrier_depends()		do { } while(0)
diff --git a/arch/ia64/include/asm/barrier.h b/arch/ia64/include/asm/barrier.h
index 60576e06b6fb..d0a69aa35e27 100644
--- a/arch/ia64/include/asm/barrier.h
+++ b/arch/ia64/include/asm/barrier.h
@@ -45,13 +45,36 @@
 # define smp_rmb()	rmb()
 # define smp_wmb()	wmb()
 # define smp_read_barrier_depends()	read_barrier_depends()
+
 #else
+
 # define smp_mb()	barrier()
 # define smp_rmb()	barrier()
 # define smp_wmb()	barrier()
 # define smp_read_barrier_depends()	do { } while(0)
+
 #endif
 
+/*
+ * IA64 GCC turns volatile stores into st.rel and volatile loads into ld.acq no
+ * need for asm trickery!
+ */
+
+#define smp_store_release(p, v)						\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	barrier();							\
+	ACCESS_ONCE(*p) = (v);						\
+} while (0)
+
+#define smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	compiletime_assert_atomic_type(*p);				\
+	barrier();							\
+	___p1;								\
+})
+
 /*
  * XXX check on this ---I suspect what Linus really wants here is
  * acquire vs release semantics but we can't discuss this stuff with
diff --git a/arch/metag/include/asm/barrier.h b/arch/metag/include/asm/barrier.h
index c90bfc6bf648..5d6b4b407dda 100644
--- a/arch/metag/include/asm/barrier.h
+++ b/arch/metag/include/asm/barrier.h
@@ -82,4 +82,19 @@ static inline void fence(void)
 #define smp_read_barrier_depends()     do { } while (0)
 #define set_mb(var, value) do { var = value; smp_mb(); } while (0)
 
+#define smp_store_release(p, v)						\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	smp_mb();							\
+	ACCESS_ONCE(*p) = (v);						\
+} while (0)
+
+#define smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	compiletime_assert_atomic_type(*p);				\
+	smp_mb();							\
+	___p1;								\
+})
+
 #endif /* _ASM_METAG_BARRIER_H */
diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h
index f26d8e1bf3c3..e1aa4e4c2984 100644
--- a/arch/mips/include/asm/barrier.h
+++ b/arch/mips/include/asm/barrier.h
@@ -180,4 +180,19 @@
 #define nudge_writes() mb()
 #endif
 
+#define smp_store_release(p, v)						\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	smp_mb();							\
+	ACCESS_ONCE(*p) = (v);						\
+} while (0)
+
+#define smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	compiletime_assert_atomic_type(*p);				\
+	smp_mb();							\
+	___p1;								\
+})
+
 #endif /* __ASM_BARRIER_H */
diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
index ae782254e731..f89da808ce31 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -45,11 +45,15 @@
 #    define SMPWMB      eieio
 #endif
 
+#define __lwsync()	__asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory")
+
 #define smp_mb()	mb()
-#define smp_rmb()	__asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory")
+#define smp_rmb()	__lwsync()
 #define smp_wmb()	__asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory")
 #define smp_read_barrier_depends()	read_barrier_depends()
 #else
+#define __lwsync()	barrier()
+
 #define smp_mb()	barrier()
 #define smp_rmb()	barrier()
 #define smp_wmb()	barrier()
@@ -65,4 +69,19 @@
 #define data_barrier(x)	\
 	asm volatile("twi 0,%0,0; isync" : : "r" (x) : "memory");
 
+#define smp_store_release(p, v)						\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	__lwsync();							\
+	ACCESS_ONCE(*p) = (v);						\
+} while (0)
+
+#define smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	compiletime_assert_atomic_type(*p);				\
+	__lwsync();							\
+	___p1;								\
+})
+
 #endif /* _ASM_POWERPC_BARRIER_H */
diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h
index 16760eeb79b0..578680f6207a 100644
--- a/arch/s390/include/asm/barrier.h
+++ b/arch/s390/include/asm/barrier.h
@@ -32,4 +32,19 @@
 
 #define set_mb(var, value)		do { var = value; mb(); } while (0)
 
+#define smp_store_release(p, v)						\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	barrier();							\
+	ACCESS_ONCE(*p) = (v);						\
+} while (0)
+
+#define smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	compiletime_assert_atomic_type(*p);				\
+	barrier();							\
+	___p1;								\
+})
+
 #endif /* __ASM_BARRIER_H */
diff --git a/arch/sparc/include/asm/barrier_64.h b/arch/sparc/include/asm/barrier_64.h
index 95d45986f908..b5aad964558e 100644
--- a/arch/sparc/include/asm/barrier_64.h
+++ b/arch/sparc/include/asm/barrier_64.h
@@ -53,4 +53,19 @@ do {	__asm__ __volatile__("ba,pt	%%xcc, 1f\n\t" \
 
 #define smp_read_barrier_depends()	do { } while(0)
 
+#define smp_store_release(p, v)						\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	barrier();							\
+	ACCESS_ONCE(*p) = (v);						\
+} while (0)
+
+#define smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	compiletime_assert_atomic_type(*p);				\
+	barrier();							\
+	___p1;								\
+})
+
 #endif /* !(__SPARC64_BARRIER_H) */
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index c6cd358a1eec..04a48903b2eb 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -92,12 +92,53 @@
 #endif
 #define smp_read_barrier_depends()	read_barrier_depends()
 #define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
-#else
+#else /* !SMP */
 #define smp_mb()	barrier()
 #define smp_rmb()	barrier()
 #define smp_wmb()	barrier()
 #define smp_read_barrier_depends()	do { } while (0)
 #define set_mb(var, value) do { var = value; barrier(); } while (0)
+#endif /* SMP */
+
+#if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE)
+
+/*
+ * For either of these options x86 doesn't have a strong TSO memory
+ * model and we should fall back to full barriers.
+ */
+
+#define smp_store_release(p, v)						\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	smp_mb();							\
+	ACCESS_ONCE(*p) = (v);						\
+} while (0)
+
+#define smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	compiletime_assert_atomic_type(*p);				\
+	smp_mb();							\
+	___p1;								\
+})
+
+#else /* regular x86 TSO memory ordering */
+
+#define smp_store_release(p, v)						\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	barrier();							\
+	ACCESS_ONCE(*p) = (v);						\
+} while (0)
+
+#define smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	compiletime_assert_atomic_type(*p);				\
+	barrier();							\
+	___p1;								\
+})
+
 #endif
 
 /*
diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h
index d12a90f93689..6f692f8ac664 100644
--- a/include/asm-generic/barrier.h
+++ b/include/asm-generic/barrier.h
@@ -62,5 +62,20 @@
 #define set_mb(var, value)  do { (var) = (value); mb(); } while (0)
 #endif
 
+#define smp_store_release(p, v)						\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	smp_mb();							\
+	ACCESS_ONCE(*p) = (v);						\
+} while (0)
+
+#define smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	compiletime_assert_atomic_type(*p);				\
+	smp_mb();							\
+	___p1;								\
+})
+
 #endif /* !__ASSEMBLY__ */
 #endif /* __ASM_GENERIC_BARRIER_H */
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 92669cd182a6..fe7a686dfd8d 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -298,6 +298,11 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 # define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
 #endif
 
+/* Is this type a native word size -- useful for atomic operations */
+#ifndef __native_word
+# define __native_word(t) (sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long))
+#endif
+
 /* Compile time object size, -1 for unknown */
 #ifndef __compiletime_object_size
 # define __compiletime_object_size(obj) -1
@@ -337,6 +342,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 #define compiletime_assert(condition, msg) \
 	_compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
 
+#define compiletime_assert_atomic_type(t)				\
+	compiletime_assert(__native_word(t),				\
+		"Need native word sized stores/loads for atomicity.")
+
 /*
  * Prevent the compiler from merging or refetching accesses.  The compiler
  * is also forbidden from reordering successive instances of ACCESS_ONCE(),
-- 
cgit v1.2.3


From 9345005f4eed805308193658d12e4e7e9c261e74 Mon Sep 17 00:00:00 2001
From: Prarit Bhargava <prarit@redhat.com>
Date: Sun, 5 Jan 2014 11:10:52 -0500
Subject: x86/irq: Fix do_IRQ() interrupt warning for cpu hotplug retriggered
 irqs

During heavy CPU-hotplug operations the following spurious kernel warnings
can trigger:

  do_IRQ: No ... irq handler for vector (irq -1)

  [ See: https://bugzilla.kernel.org/show_bug.cgi?id=64831 ]

When downing a cpu it is possible that there are unhandled irqs
left in the APIC IRR register.  The following code path shows
how the problem can occur:

 1. CPU 5 is to go down.

 2. cpu_disable() on CPU 5 executes with interrupt flag cleared
    by local_irq_save() via stop_machine().

 3. IRQ 12 asserts on CPU 5, setting IRR but not ISR because
    interrupt flag is cleared (CPU unabled to handle the irq)

 4. IRQs are migrated off of CPU 5, and the vectors' irqs are set
    to -1. 5. stop_machine() finishes cpu_disable()

 6. cpu_die() for CPU 5 executes in normal context.

 7. CPU 5 attempts to handle IRQ 12 because the IRR is set for
    IRQ 12.  The code attempts to find the vector's IRQ and cannot
    because it has been set to -1. 8. do_IRQ() warning displays
    warning about CPU 5 IRQ 12.

I added a debug printk to output which CPU & vector was
retriggered and discovered that that we are getting bogus
events.  I see a 100% correlation between this debug printk in
fixup_irqs() and the do_IRQ() warning.

This patchset resolves this by adding definitions for
VECTOR_UNDEFINED(-1) and VECTOR_RETRIGGERED(-2) and modifying
the code to use them.

Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=64831
Signed-off-by: Prarit Bhargava <prarit@redhat.com>
Reviewed-by: Rui Wang <rui.y.wang@intel.com>
Cc: Michel Lespinasse <walken@google.com>
Cc: Seiji Aguchi <seiji.aguchi@hds.com>
Cc: Yang Zhang <yang.z.zhang@Intel.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: janet.morgan@Intel.com
Cc: tony.luck@Intel.com
Cc: ruiv.wang@gmail.com
Link: http://lkml.kernel.org/r/1388938252-16627-1-git-send-email-prarit@redhat.com
[ Cleaned up the code a bit. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/hw_irq.h  |  3 +++
 arch/x86/kernel/apic/io_apic.c | 18 +++++++++---------
 arch/x86/kernel/irq.c          | 19 +++++++++++++------
 arch/x86/kernel/irqinit.c      |  4 ++--
 4 files changed, 27 insertions(+), 17 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index cba45d99ac1a..67d69b8e2d20 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -191,6 +191,9 @@ extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void);
 #define trace_interrupt interrupt
 #endif
 
+#define VECTOR_UNDEFINED	-1
+#define VECTOR_RETRIGGERED	-2
+
 typedef int vector_irq_t[NR_VECTORS];
 DECLARE_PER_CPU(vector_irq_t, vector_irq);
 extern void setup_vector_irq(int cpu);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index e63a5bd2a78f..6df0b660753b 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1142,9 +1142,10 @@ next:
 		if (test_bit(vector, used_vectors))
 			goto next;
 
-		for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
-			if (per_cpu(vector_irq, new_cpu)[vector] != -1)
+		for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) {
+			if (per_cpu(vector_irq, new_cpu)[vector] > VECTOR_UNDEFINED)
 				goto next;
+		}
 		/* Found one! */
 		current_vector = vector;
 		current_offset = offset;
@@ -1183,7 +1184,7 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
 
 	vector = cfg->vector;
 	for_each_cpu_and(cpu, cfg->domain, cpu_online_mask)
-		per_cpu(vector_irq, cpu)[vector] = -1;
+		per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
 
 	cfg->vector = 0;
 	cpumask_clear(cfg->domain);
@@ -1191,11 +1192,10 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
 	if (likely(!cfg->move_in_progress))
 		return;
 	for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) {
-		for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
-								vector++) {
+		for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
 			if (per_cpu(vector_irq, cpu)[vector] != irq)
 				continue;
-			per_cpu(vector_irq, cpu)[vector] = -1;
+			per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
 			break;
 		}
 	}
@@ -1228,12 +1228,12 @@ void __setup_vector_irq(int cpu)
 	/* Mark the free vectors */
 	for (vector = 0; vector < NR_VECTORS; ++vector) {
 		irq = per_cpu(vector_irq, cpu)[vector];
-		if (irq < 0)
+		if (irq <= VECTOR_UNDEFINED)
 			continue;
 
 		cfg = irq_cfg(irq);
 		if (!cpumask_test_cpu(cpu, cfg->domain))
-			per_cpu(vector_irq, cpu)[vector] = -1;
+			per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
 	}
 	raw_spin_unlock(&vector_lock);
 }
@@ -2208,7 +2208,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
 		struct irq_cfg *cfg;
 		irq = __this_cpu_read(vector_irq[vector]);
 
-		if (irq == -1)
+		if (irq <= VECTOR_UNDEFINED)
 			continue;
 
 		desc = irq_to_desc(irq);
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 22d0687e7fda..884d875c1434 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -193,9 +193,13 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
 	if (!handle_irq(irq, regs)) {
 		ack_APIC_irq();
 
-		if (printk_ratelimit())
-			pr_emerg("%s: %d.%d No irq handler for vector (irq %d)\n",
-				__func__, smp_processor_id(), vector, irq);
+		if (irq != VECTOR_RETRIGGERED) {
+			pr_emerg_ratelimited("%s: %d.%d No irq handler for vector (irq %d)\n",
+					     __func__, smp_processor_id(),
+					     vector, irq);
+		} else {
+			__this_cpu_write(vector_irq[vector], VECTOR_UNDEFINED);
+		}
 	}
 
 	irq_exit();
@@ -344,7 +348,7 @@ void fixup_irqs(void)
 	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
 		unsigned int irr;
 
-		if (__this_cpu_read(vector_irq[vector]) < 0)
+		if (__this_cpu_read(vector_irq[vector]) <= VECTOR_UNDEFINED)
 			continue;
 
 		irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
@@ -355,11 +359,14 @@ void fixup_irqs(void)
 			data = irq_desc_get_irq_data(desc);
 			chip = irq_data_get_irq_chip(data);
 			raw_spin_lock(&desc->lock);
-			if (chip->irq_retrigger)
+			if (chip->irq_retrigger) {
 				chip->irq_retrigger(data);
+				__this_cpu_write(vector_irq[vector], VECTOR_RETRIGGERED);
+			}
 			raw_spin_unlock(&desc->lock);
 		}
-		__this_cpu_write(vector_irq[vector], -1);
+		if (__this_cpu_read(vector_irq[vector]) != VECTOR_RETRIGGERED)
+			__this_cpu_write(vector_irq[vector], VECTOR_UNDEFINED);
 	}
 }
 #endif
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index a2a1fbc594ff..7f50156542fb 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -52,7 +52,7 @@ static struct irqaction irq2 = {
 };
 
 DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
-	[0 ... NR_VECTORS - 1] = -1,
+	[0 ... NR_VECTORS - 1] = VECTOR_UNDEFINED,
 };
 
 int vector_used_by_percpu_irq(unsigned int vector)
@@ -60,7 +60,7 @@ int vector_used_by_percpu_irq(unsigned int vector)
 	int cpu;
 
 	for_each_online_cpu(cpu) {
-		if (per_cpu(vector_irq, cpu)[vector] != -1)
+		if (per_cpu(vector_irq, cpu)[vector] > VECTOR_UNDEFINED)
 			return 1;
 	}
 
-- 
cgit v1.2.3


From b25f3e1c358434bf850220e04f28eebfc45eb634 Mon Sep 17 00:00:00 2001
From: Taras Kondratiuk <taras.kondratiuk@linaro.org>
Date: Fri, 10 Jan 2014 01:27:08 +0100
Subject: ARM: 7938/1: OMAP4/highbank: Flush L2 cache before disabling

Kexec disables outer cache before jumping to reboot code, but it doesn't
flush it explicitly. Flush is done implicitly inside of l2x0_disable().
But some SoC's override default .disable handler and don't flush cache.
This may lead to a corrupted memory during Kexec reboot on these
platforms.

This patch adds cache flush inside of OMAP4 and Highbank outer_cache.disable()
handlers to make it consistent with default l2x0_disable().

Acked-by: Rob Herring <rob.herring@calxeda.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Taras Kondratiuk <taras.kondratiuk@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mach-highbank/highbank.c  | 1 +
 arch/arm/mach-omap2/omap4-common.c | 1 +
 2 files changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/mach-highbank/highbank.c b/arch/arm/mach-highbank/highbank.c
index b3d7e5634b83..ae171506cb06 100644
--- a/arch/arm/mach-highbank/highbank.c
+++ b/arch/arm/mach-highbank/highbank.c
@@ -50,6 +50,7 @@ static void __init highbank_scu_map_io(void)
 
 static void highbank_l2x0_disable(void)
 {
+	outer_flush_all();
 	/* Disable PL310 L2 Cache controller */
 	highbank_smc1(0x102, 0x0);
 }
diff --git a/arch/arm/mach-omap2/omap4-common.c b/arch/arm/mach-omap2/omap4-common.c
index 57911430324e..3f44b162fcab 100644
--- a/arch/arm/mach-omap2/omap4-common.c
+++ b/arch/arm/mach-omap2/omap4-common.c
@@ -163,6 +163,7 @@ void __iomem *omap4_get_l2cache_base(void)
 
 static void omap4_l2x0_disable(void)
 {
+	outer_flush_all();
 	/* Disable PL310 L2 Cache controller */
 	omap_smc1(0x102, 0x0);
 }
-- 
cgit v1.2.3


From 4f75d8412792777a314ac5c1393a9ed43d695fd1 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Mon, 23 Dec 2013 18:05:02 +0100
Subject: x86, mce: Fix mce_start_timer semantics

So mce_start_timer() has a 'cpu' argument which is supposed to mean to
start a timer on that cpu. However, the code currently starts a timer on
the *current* cpu the function runs on and causes the sanity-check in
mce_timer_fn to fire:

	WARNING: CPU: 0 PID: 0 at arch/x86/kernel/cpu/mcheck/mce.c:1286 mce_timer_fn

because it is running on the wrong cpu.

This was triggered by Prarit Bhargava <prarit@redhat.com> by offlining
all the cpus in succession.

Then, we were fiddling with the CMCI storm settings when starting the
timer whereas there's no need for that - if there's storm happening
on this newly restarted cpu, we're going to be in normal CMCI mode
initially and then when the CMCI interrupt starts firing, we're going to
go to the polling mode with the timer real soon.

Signed-off-by: Borislav Petkov <bp@suse.de>
Tested-by: Prarit Bhargava <prarit@redhat.com>
Cc: Tony Luck <tony.luck@intel.com>
Reviewed-by: Chen, Gong <gong.chen@linux.intel.com>
Link: http://lkml.kernel.org/r/1387722156-5511-1-git-send-email-prarit@redhat.com
---
 arch/x86/kernel/cpu/mcheck/mce.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index a389c1d859ec..4d5419b249da 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1638,15 +1638,15 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
 
 static void mce_start_timer(unsigned int cpu, struct timer_list *t)
 {
-	unsigned long iv = mce_adjust_timer(check_interval * HZ);
-
-	__this_cpu_write(mce_next_interval, iv);
+	unsigned long iv = check_interval * HZ;
 
 	if (mca_cfg.ignore_ce || !iv)
 		return;
 
+	per_cpu(mce_next_interval, cpu) = iv;
+
 	t->expires = round_jiffies(jiffies + iv);
-	add_timer_on(t, smp_processor_id());
+	add_timer_on(t, cpu);
 }
 
 static void __mcheck_cpu_init_timer(void)
-- 
cgit v1.2.3


From 106553aa4ead528b1bab37fb3cc47004a4451acc Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Sun, 12 Jan 2014 15:52:21 +0100
Subject: um, x86: Fix vDSO build

Commit 663b55b9b39 ("x86: Delete non-required instances of include <linux/init.h>")
broke the UML build.

  arch/x86/um/vdso/vdso.S: Assembler messages:
  arch/x86/um/vdso/vdso.S:2: Error: no such instruction: `__initdata'
  arch/x86/um/vdso/vdso.S:9: Error: no such instruction: `__finit'

UML's vDSO needs linux/init.h.

Reported-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Richard Weinberger <richard@nod.at>
Cc: user-mode-linux-devel@lists.sourceforge.net
Cc: paul.gortmaker@windriver.com
Link: http://lkml.kernel.org/r/1389538341-31383-1-git-send-email-richard@nod.at
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/um/vdso/vdso.S | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/x86/um/vdso/vdso.S b/arch/x86/um/vdso/vdso.S
index 4b4bd4cc06ab..1cb468adacbb 100644
--- a/arch/x86/um/vdso/vdso.S
+++ b/arch/x86/um/vdso/vdso.S
@@ -1,3 +1,4 @@
+#include <linux/init.h>
 
 __INITDATA
 
-- 
cgit v1.2.3


From 10348f5976830e5d8f74e8abb04a9a057a5e8478 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Mon, 13 Jan 2014 09:49:17 +1100
Subject: powerpc: Check return value of instance-to-package OF call

On PA-Semi firmware, the instance-to-package callback doesn't seem
to be implemented. We didn't check for error, however, thus
subsequently passed the -1 value returned into stdout_node to
thins like prom_getprop etc...

Thus caused the firmware to load values around 0 (physical) internally
as node structures. It somewhat "worked" as long as we had a NULL in the
right place (address 8) at the beginning of the kernel, we didn't "see"
the bug. But commit 5c0484e25ec03243d4c2f2d4416d4a13efc77f6a
"powerpc: Endian safe trampoline" changed the kernel entry point causing
that old bug to now cause a crash early during boot.

This fixes booting on PA-Semi board by properly checking the return
value from instance-to-package.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Tested-by: Olof Johansson <olof@lixom.net>
---
---
 arch/powerpc/kernel/prom_init.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index cb64a6e1dc51..078145acf7fb 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -1986,19 +1986,23 @@ static void __init prom_init_stdout(void)
 	/* Get the full OF pathname of the stdout device */
 	memset(path, 0, 256);
 	call_prom("instance-to-path", 3, 1, prom.stdout, path, 255);
-	stdout_node = call_prom("instance-to-package", 1, 1, prom.stdout);
-	val = cpu_to_be32(stdout_node);
-	prom_setprop(prom.chosen, "/chosen", "linux,stdout-package",
-		     &val, sizeof(val));
 	prom_printf("OF stdout device is: %s\n", of_stdout_device);
 	prom_setprop(prom.chosen, "/chosen", "linux,stdout-path",
 		     path, strlen(path) + 1);
 
-	/* If it's a display, note it */
-	memset(type, 0, sizeof(type));
-	prom_getprop(stdout_node, "device_type", type, sizeof(type));
-	if (strcmp(type, "display") == 0)
-		prom_setprop(stdout_node, path, "linux,boot-display", NULL, 0);
+	/* instance-to-package fails on PA-Semi */
+	stdout_node = call_prom("instance-to-package", 1, 1, prom.stdout);
+	if (stdout_node != PROM_ERROR) {
+		val = cpu_to_be32(stdout_node);
+		prom_setprop(prom.chosen, "/chosen", "linux,stdout-package",
+			     &val, sizeof(val));
+
+		/* If it's a display, note it */
+		memset(type, 0, sizeof(type));
+		prom_getprop(stdout_node, "device_type", type, sizeof(type));
+		if (strcmp(type, "display") == 0)
+			prom_setprop(stdout_node, path, "linux,boot-display", NULL, 0);
+	}
 }
 
 static int __init prom_find_machine_type(void)
-- 
cgit v1.2.3


From a0b7b242261c8c1097925663bf44d51c1e357e58 Mon Sep 17 00:00:00 2001
From: Michael Schmitz <schmitzmic@gmail.com>
Date: Sat, 11 Jan 2014 13:58:55 +1300
Subject: m68k/irq - Use polled IRQ flag for MFP timer cascaded interrupts

Some Atari hardware has no capacity to raise interrupts (e.g.
network or USB adapter hardware attached via ROM port). The driver
interrupt routine is called from a timer interrupt (timer D) in
these cases, using chained device specific pseudo interrupts
(IRQ_MFP_TIMER1 ff.)

These interrupts will more often than not, return IRQ_NONE as
there is not always work for the device handler when called.
Too many unhandled interrupts will result in the interrupt
being disabled by the stuck interrupt watchdog.

As preferred option to flag interrupts as needing exclusion
from the watchdog mechanism, tglx added the IRQ_IS_POLLED flag
for use in such a case. Currently, two interrupts need to use
this flag. Add more users as needed.

Signed-off-by: Michael Schmitz <schmitz@debian.org>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/atari/ataints.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/m68k/atari/ataints.c b/arch/m68k/atari/ataints.c
index 20cde4e9fc77..3e73a63c066f 100644
--- a/arch/m68k/atari/ataints.c
+++ b/arch/m68k/atari/ataints.c
@@ -333,6 +333,9 @@ void __init atari_init_IRQ(void)
 	m68k_setup_irq_controller(&atari_mfptimer_chip, handle_simple_irq,
 				  IRQ_MFP_TIMER1, 8);
 
+	irq_set_status_flags(IRQ_MFP_TIMER1, IRQ_IS_POLLED);
+	irq_set_status_flags(IRQ_MFP_TIMER2, IRQ_IS_POLLED);
+
 	/* prepare timer D data for use as poll interrupt */
 	/* set Timer D data Register - needs to be > 0 */
 	st_mfp.tim_dt_d = 254;	/* < 100 Hz */
-- 
cgit v1.2.3


From d50dde5a10f305253cbc3855307f608f8a3c5f73 Mon Sep 17 00:00:00 2001
From: Dario Faggioli <raistlin@linux.it>
Date: Thu, 7 Nov 2013 14:43:36 +0100
Subject: sched: Add new scheduler syscalls to support an extended scheduling
 parameters ABI

Add the syscalls needed for supporting scheduling algorithms
with extended scheduling parameters (e.g., SCHED_DEADLINE).

In general, it makes possible to specify a periodic/sporadic task,
that executes for a given amount of runtime at each instance, and is
scheduled according to the urgency of their own timing constraints,
i.e.:

 - a (maximum/typical) instance execution time,
 - a minimum interval between consecutive instances,
 - a time constraint by which each instance must be completed.

Thus, both the data structure that holds the scheduling parameters of
the tasks and the system calls dealing with it must be extended.
Unfortunately, modifying the existing struct sched_param would break
the ABI and result in potentially serious compatibility issues with
legacy binaries.

For these reasons, this patch:

 - defines the new struct sched_attr, containing all the fields
   that are necessary for specifying a task in the computational
   model described above;

 - defines and implements the new scheduling related syscalls that
   manipulate it, i.e., sched_setattr() and sched_getattr().

Syscalls are introduced for x86 (32 and 64 bits) and ARM only, as a
proof of concept and for developing and testing purposes. Making them
available on other architectures is straightforward.

Since no "user" for these new parameters is introduced in this patch,
the implementation of the new system calls is just identical to their
already existing counterpart. Future patches that implement scheduling
policies able to exploit the new data structure must also take care of
modifying the sched_*attr() calls accordingly with their own purposes.

Signed-off-by: Dario Faggioli <raistlin@linux.it>
[ Rewrote to use sched_attr. ]
Signed-off-by: Juri Lelli <juri.lelli@gmail.com>
[ Removed sched_setscheduler2() for now. ]
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1383831828-15501-3-git-send-email-juri.lelli@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm/include/asm/unistd.h      |   2 +-
 arch/arm/include/uapi/asm/unistd.h |   2 +
 arch/arm/kernel/calls.S            |   2 +
 arch/x86/syscalls/syscall_32.tbl   |   2 +
 arch/x86/syscalls/syscall_64.tbl   |   2 +
 include/linux/sched.h              |  62 +++++++++
 include/linux/syscalls.h           |   6 +
 kernel/sched/core.c                | 263 ++++++++++++++++++++++++++++++++++---
 kernel/sched/sched.h               |   9 +-
 9 files changed, 326 insertions(+), 24 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index 141baa3f9a72..acabef1a75df 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -15,7 +15,7 @@
 
 #include <uapi/asm/unistd.h>
 
-#define __NR_syscalls  (380)
+#define __NR_syscalls  (384)
 #define __ARM_NR_cmpxchg		(__ARM_NR_BASE+0x00fff0)
 
 #define __ARCH_WANT_STAT64
diff --git a/arch/arm/include/uapi/asm/unistd.h b/arch/arm/include/uapi/asm/unistd.h
index af33b44990ed..fb5584d0cc05 100644
--- a/arch/arm/include/uapi/asm/unistd.h
+++ b/arch/arm/include/uapi/asm/unistd.h
@@ -406,6 +406,8 @@
 #define __NR_process_vm_writev		(__NR_SYSCALL_BASE+377)
 #define __NR_kcmp			(__NR_SYSCALL_BASE+378)
 #define __NR_finit_module		(__NR_SYSCALL_BASE+379)
+#define __NR_sched_setattr		(__NR_SYSCALL_BASE+380)
+#define __NR_sched_getattr		(__NR_SYSCALL_BASE+381)
 
 /*
  * This may need to be greater than __NR_last_syscall+1 in order to
diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
index c6ca7e376773..166e945de832 100644
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S
@@ -389,6 +389,8 @@
 		CALL(sys_process_vm_writev)
 		CALL(sys_kcmp)
 		CALL(sys_finit_module)
+/* 380 */	CALL(sys_sched_setattr)
+		CALL(sys_sched_getattr)
 #ifndef syscalls_counted
 .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
 #define syscalls_counted
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index aabfb8380a1c..96bc506ac6de 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -357,3 +357,5 @@
 348	i386	process_vm_writev	sys_process_vm_writev		compat_sys_process_vm_writev
 349	i386	kcmp			sys_kcmp
 350	i386	finit_module		sys_finit_module
+351	i386	sched_setattr		sys_sched_setattr
+352	i386	sched_getattr		sys_sched_getattr
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index 38ae65dfd14f..a12bddc7ccea 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -320,6 +320,8 @@
 311	64	process_vm_writev	sys_process_vm_writev
 312	common	kcmp			sys_kcmp
 313	common	finit_module		sys_finit_module
+314	common	sched_setattr		sys_sched_setattr
+315	common	sched_getattr		sys_sched_getattr
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3a1e9857b393..86025b6c6387 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -56,6 +56,66 @@ struct sched_param {
 
 #include <asm/processor.h>
 
+#define SCHED_ATTR_SIZE_VER0	48	/* sizeof first published struct */
+
+/*
+ * Extended scheduling parameters data structure.
+ *
+ * This is needed because the original struct sched_param can not be
+ * altered without introducing ABI issues with legacy applications
+ * (e.g., in sched_getparam()).
+ *
+ * However, the possibility of specifying more than just a priority for
+ * the tasks may be useful for a wide variety of application fields, e.g.,
+ * multimedia, streaming, automation and control, and many others.
+ *
+ * This variant (sched_attr) is meant at describing a so-called
+ * sporadic time-constrained task. In such model a task is specified by:
+ *  - the activation period or minimum instance inter-arrival time;
+ *  - the maximum (or average, depending on the actual scheduling
+ *    discipline) computation time of all instances, a.k.a. runtime;
+ *  - the deadline (relative to the actual activation time) of each
+ *    instance.
+ * Very briefly, a periodic (sporadic) task asks for the execution of
+ * some specific computation --which is typically called an instance--
+ * (at most) every period. Moreover, each instance typically lasts no more
+ * than the runtime and must be completed by time instant t equal to
+ * the instance activation time + the deadline.
+ *
+ * This is reflected by the actual fields of the sched_attr structure:
+ *
+ *  @size		size of the structure, for fwd/bwd compat.
+ *
+ *  @sched_policy	task's scheduling policy
+ *  @sched_flags	for customizing the scheduler behaviour
+ *  @sched_nice		task's nice value      (SCHED_NORMAL/BATCH)
+ *  @sched_priority	task's static priority (SCHED_FIFO/RR)
+ *  @sched_deadline	representative of the task's deadline
+ *  @sched_runtime	representative of the task's runtime
+ *  @sched_period	representative of the task's period
+ *
+ * Given this task model, there are a multiplicity of scheduling algorithms
+ * and policies, that can be used to ensure all the tasks will make their
+ * timing constraints.
+ */
+struct sched_attr {
+	u32 size;
+
+	u32 sched_policy;
+	u64 sched_flags;
+
+	/* SCHED_NORMAL, SCHED_BATCH */
+	s32 sched_nice;
+
+	/* SCHED_FIFO, SCHED_RR */
+	u32 sched_priority;
+
+	/* SCHED_DEADLINE */
+	u64 sched_runtime;
+	u64 sched_deadline;
+	u64 sched_period;
+};
+
 struct exec_domain;
 struct futex_pi_state;
 struct robust_list_head;
@@ -1958,6 +2018,8 @@ extern int sched_setscheduler(struct task_struct *, int,
 			      const struct sched_param *);
 extern int sched_setscheduler_nocheck(struct task_struct *, int,
 				      const struct sched_param *);
+extern int sched_setattr(struct task_struct *,
+			 const struct sched_attr *);
 extern struct task_struct *idle_task(int cpu);
 /**
  * is_idle_task - is the specified task an idle task?
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 94273bbe6050..40ed9e9a77e5 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -38,6 +38,7 @@ struct rlimit;
 struct rlimit64;
 struct rusage;
 struct sched_param;
+struct sched_attr;
 struct sel_arg_struct;
 struct semaphore;
 struct sembuf;
@@ -279,9 +280,14 @@ asmlinkage long sys_sched_setscheduler(pid_t pid, int policy,
 					struct sched_param __user *param);
 asmlinkage long sys_sched_setparam(pid_t pid,
 					struct sched_param __user *param);
+asmlinkage long sys_sched_setattr(pid_t pid,
+					struct sched_attr __user *attr);
 asmlinkage long sys_sched_getscheduler(pid_t pid);
 asmlinkage long sys_sched_getparam(pid_t pid,
 					struct sched_param __user *param);
+asmlinkage long sys_sched_getattr(pid_t pid,
+					struct sched_attr __user *attr,
+					unsigned int size);
 asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len,
 					unsigned long __user *user_mask_ptr);
 asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len,
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b21a63ed5d62..8174f889076c 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2817,6 +2817,7 @@ out_unlock:
 	__task_rq_unlock(rq);
 }
 #endif
+
 void set_user_nice(struct task_struct *p, long nice)
 {
 	int old_prio, delta, on_rq;
@@ -2991,22 +2992,29 @@ static struct task_struct *find_process_by_pid(pid_t pid)
 	return pid ? find_task_by_vpid(pid) : current;
 }
 
-/* Actually do priority change: must hold rq lock. */
-static void
-__setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio)
+/* Actually do priority change: must hold pi & rq lock. */
+static void __setscheduler(struct rq *rq, struct task_struct *p,
+			   const struct sched_attr *attr)
 {
+	int policy = attr->sched_policy;
+
 	p->policy = policy;
-	p->rt_priority = prio;
+
+	if (rt_policy(policy))
+		p->rt_priority = attr->sched_priority;
+	else
+		p->static_prio = NICE_TO_PRIO(attr->sched_nice);
+
 	p->normal_prio = normal_prio(p);
-	/* we are holding p->pi_lock already */
 	p->prio = rt_mutex_getprio(p);
+
 	if (rt_prio(p->prio))
 		p->sched_class = &rt_sched_class;
 	else
 		p->sched_class = &fair_sched_class;
+
 	set_load_weight(p);
 }
-
 /*
  * check the target process has a UID that matches the current process's
  */
@@ -3023,10 +3031,12 @@ static bool check_same_owner(struct task_struct *p)
 	return match;
 }
 
-static int __sched_setscheduler(struct task_struct *p, int policy,
-				const struct sched_param *param, bool user)
+static int __sched_setscheduler(struct task_struct *p,
+				const struct sched_attr *attr,
+				bool user)
 {
 	int retval, oldprio, oldpolicy = -1, on_rq, running;
+	int policy = attr->sched_policy;
 	unsigned long flags;
 	const struct sched_class *prev_class;
 	struct rq *rq;
@@ -3054,17 +3064,22 @@ recheck:
 	 * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL,
 	 * SCHED_BATCH and SCHED_IDLE is 0.
 	 */
-	if (param->sched_priority < 0 ||
-	    (p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) ||
-	    (!p->mm && param->sched_priority > MAX_RT_PRIO-1))
+	if (attr->sched_priority < 0 ||
+	    (p->mm && attr->sched_priority > MAX_USER_RT_PRIO-1) ||
+	    (!p->mm && attr->sched_priority > MAX_RT_PRIO-1))
 		return -EINVAL;
-	if (rt_policy(policy) != (param->sched_priority != 0))
+	if (rt_policy(policy) != (attr->sched_priority != 0))
 		return -EINVAL;
 
 	/*
 	 * Allow unprivileged RT tasks to decrease priority:
 	 */
 	if (user && !capable(CAP_SYS_NICE)) {
+		if (fair_policy(policy)) {
+			if (!can_nice(p, attr->sched_nice))
+				return -EPERM;
+		}
+
 		if (rt_policy(policy)) {
 			unsigned long rlim_rtprio =
 					task_rlimit(p, RLIMIT_RTPRIO);
@@ -3074,8 +3089,8 @@ recheck:
 				return -EPERM;
 
 			/* can't increase priority */
-			if (param->sched_priority > p->rt_priority &&
-			    param->sched_priority > rlim_rtprio)
+			if (attr->sched_priority > p->rt_priority &&
+			    attr->sched_priority > rlim_rtprio)
 				return -EPERM;
 		}
 
@@ -3123,11 +3138,16 @@ recheck:
 	/*
 	 * If not changing anything there's no need to proceed further:
 	 */
-	if (unlikely(policy == p->policy && (!rt_policy(policy) ||
-			param->sched_priority == p->rt_priority))) {
+	if (unlikely(policy == p->policy)) {
+		if (fair_policy(policy) && attr->sched_nice != TASK_NICE(p))
+			goto change;
+		if (rt_policy(policy) && attr->sched_priority != p->rt_priority)
+			goto change;
+
 		task_rq_unlock(rq, p, &flags);
 		return 0;
 	}
+change:
 
 #ifdef CONFIG_RT_GROUP_SCHED
 	if (user) {
@@ -3161,7 +3181,7 @@ recheck:
 
 	oldprio = p->prio;
 	prev_class = p->sched_class;
-	__setscheduler(rq, p, policy, param->sched_priority);
+	__setscheduler(rq, p, attr);
 
 	if (running)
 		p->sched_class->set_curr_task(rq);
@@ -3189,10 +3209,20 @@ recheck:
 int sched_setscheduler(struct task_struct *p, int policy,
 		       const struct sched_param *param)
 {
-	return __sched_setscheduler(p, policy, param, true);
+	struct sched_attr attr = {
+		.sched_policy   = policy,
+		.sched_priority = param->sched_priority
+	};
+	return __sched_setscheduler(p, &attr, true);
 }
 EXPORT_SYMBOL_GPL(sched_setscheduler);
 
+int sched_setattr(struct task_struct *p, const struct sched_attr *attr)
+{
+	return __sched_setscheduler(p, attr, true);
+}
+EXPORT_SYMBOL_GPL(sched_setattr);
+
 /**
  * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace.
  * @p: the task in question.
@@ -3209,7 +3239,11 @@ EXPORT_SYMBOL_GPL(sched_setscheduler);
 int sched_setscheduler_nocheck(struct task_struct *p, int policy,
 			       const struct sched_param *param)
 {
-	return __sched_setscheduler(p, policy, param, false);
+	struct sched_attr attr = {
+		.sched_policy   = policy,
+		.sched_priority = param->sched_priority
+	};
+	return __sched_setscheduler(p, &attr, false);
 }
 
 static int
@@ -3234,6 +3268,79 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
 	return retval;
 }
 
+/*
+ * Mimics kernel/events/core.c perf_copy_attr().
+ */
+static int sched_copy_attr(struct sched_attr __user *uattr,
+			   struct sched_attr *attr)
+{
+	u32 size;
+	int ret;
+
+	if (!access_ok(VERIFY_WRITE, uattr, SCHED_ATTR_SIZE_VER0))
+		return -EFAULT;
+
+	/*
+	 * zero the full structure, so that a short copy will be nice.
+	 */
+	memset(attr, 0, sizeof(*attr));
+
+	ret = get_user(size, &uattr->size);
+	if (ret)
+		return ret;
+
+	if (size > PAGE_SIZE)	/* silly large */
+		goto err_size;
+
+	if (!size)		/* abi compat */
+		size = SCHED_ATTR_SIZE_VER0;
+
+	if (size < SCHED_ATTR_SIZE_VER0)
+		goto err_size;
+
+	/*
+	 * If we're handed a bigger struct than we know of,
+	 * ensure all the unknown bits are 0 - i.e. new
+	 * user-space does not rely on any kernel feature
+	 * extensions we dont know about yet.
+	 */
+	if (size > sizeof(*attr)) {
+		unsigned char __user *addr;
+		unsigned char __user *end;
+		unsigned char val;
+
+		addr = (void __user *)uattr + sizeof(*attr);
+		end  = (void __user *)uattr + size;
+
+		for (; addr < end; addr++) {
+			ret = get_user(val, addr);
+			if (ret)
+				return ret;
+			if (val)
+				goto err_size;
+		}
+		size = sizeof(*attr);
+	}
+
+	ret = copy_from_user(attr, uattr, size);
+	if (ret)
+		return -EFAULT;
+
+	/*
+	 * XXX: do we want to be lenient like existing syscalls; or do we want
+	 * to be strict and return an error on out-of-bounds values?
+	 */
+	attr->sched_nice = clamp(attr->sched_nice, -20, 19);
+
+out:
+	return ret;
+
+err_size:
+	put_user(sizeof(*attr), &uattr->size);
+	ret = -E2BIG;
+	goto out;
+}
+
 /**
  * sys_sched_setscheduler - set/change the scheduler policy and RT priority
  * @pid: the pid in question.
@@ -3264,6 +3371,33 @@ SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
 	return do_sched_setscheduler(pid, -1, param);
 }
 
+/**
+ * sys_sched_setattr - same as above, but with extended sched_attr
+ * @pid: the pid in question.
+ * @attr: structure containing the extended parameters.
+ */
+SYSCALL_DEFINE2(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr)
+{
+	struct sched_attr attr;
+	struct task_struct *p;
+	int retval;
+
+	if (!uattr || pid < 0)
+		return -EINVAL;
+
+	if (sched_copy_attr(uattr, &attr))
+		return -EFAULT;
+
+	rcu_read_lock();
+	retval = -ESRCH;
+	p = find_process_by_pid(pid);
+	if (p != NULL)
+		retval = sched_setattr(p, &attr);
+	rcu_read_unlock();
+
+	return retval;
+}
+
 /**
  * sys_sched_getscheduler - get the policy (scheduling class) of a thread
  * @pid: the pid in question.
@@ -3334,6 +3468,92 @@ out_unlock:
 	return retval;
 }
 
+static int sched_read_attr(struct sched_attr __user *uattr,
+			   struct sched_attr *attr,
+			   unsigned int usize)
+{
+	int ret;
+
+	if (!access_ok(VERIFY_WRITE, uattr, usize))
+		return -EFAULT;
+
+	/*
+	 * If we're handed a smaller struct than we know of,
+	 * ensure all the unknown bits are 0 - i.e. old
+	 * user-space does not get uncomplete information.
+	 */
+	if (usize < sizeof(*attr)) {
+		unsigned char *addr;
+		unsigned char *end;
+
+		addr = (void *)attr + usize;
+		end  = (void *)attr + sizeof(*attr);
+
+		for (; addr < end; addr++) {
+			if (*addr)
+				goto err_size;
+		}
+
+		attr->size = usize;
+	}
+
+	ret = copy_to_user(uattr, attr, usize);
+	if (ret)
+		return -EFAULT;
+
+out:
+	return ret;
+
+err_size:
+	ret = -E2BIG;
+	goto out;
+}
+
+/**
+ * sys_sched_getattr - same as above, but with extended "sched_param"
+ * @pid: the pid in question.
+ * @attr: structure containing the extended parameters.
+ * @size: sizeof(attr) for fwd/bwd comp.
+ */
+SYSCALL_DEFINE3(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
+		unsigned int, size)
+{
+	struct sched_attr attr = {
+		.size = sizeof(struct sched_attr),
+	};
+	struct task_struct *p;
+	int retval;
+
+	if (!uattr || pid < 0 || size > PAGE_SIZE ||
+	    size < SCHED_ATTR_SIZE_VER0)
+		return -EINVAL;
+
+	rcu_read_lock();
+	p = find_process_by_pid(pid);
+	retval = -ESRCH;
+	if (!p)
+		goto out_unlock;
+
+	retval = security_task_getscheduler(p);
+	if (retval)
+		goto out_unlock;
+
+	attr.sched_policy = p->policy;
+	if (task_has_rt_policy(p))
+		attr.sched_priority = p->rt_priority;
+	else
+		attr.sched_nice = TASK_NICE(p);
+
+	rcu_read_unlock();
+
+	retval = sched_read_attr(uattr, &attr, size);
+	return retval;
+
+out_unlock:
+	rcu_read_unlock();
+	return retval;
+}
+
 long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
 {
 	cpumask_var_t cpus_allowed, new_mask;
@@ -6400,13 +6620,16 @@ EXPORT_SYMBOL(__might_sleep);
 static void normalize_task(struct rq *rq, struct task_struct *p)
 {
 	const struct sched_class *prev_class = p->sched_class;
+	struct sched_attr attr = {
+		.sched_policy = SCHED_NORMAL,
+	};
 	int old_prio = p->prio;
 	int on_rq;
 
 	on_rq = p->on_rq;
 	if (on_rq)
 		dequeue_task(rq, p, 0);
-	__setscheduler(rq, p, SCHED_NORMAL, 0);
+	__setscheduler(rq, p, &attr);
 	if (on_rq) {
 		enqueue_task(rq, p, 0);
 		resched_task(rq->curr);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index b3b4a4953efc..df023db7721c 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -81,11 +81,14 @@ extern void update_cpu_load_active(struct rq *this_rq);
  */
 #define RUNTIME_INF	((u64)~0ULL)
 
+static inline int fair_policy(int policy)
+{
+	return policy == SCHED_NORMAL || policy == SCHED_BATCH;
+}
+
 static inline int rt_policy(int policy)
 {
-	if (policy == SCHED_FIFO || policy == SCHED_RR)
-		return 1;
-	return 0;
+	return policy == SCHED_FIFO || policy == SCHED_RR;
 }
 
 static inline int task_has_rt_policy(struct task_struct *p)
-- 
cgit v1.2.3


From 5dd12c2152743747ca9f50ef80281e54cc416dc0 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 29 Nov 2013 18:04:39 +0100
Subject: sched/clock, x86: Use mul_u64_u32_shr() for native_sched_clock()

Use mul_u64_u32_shr() so that x86_64 can use a single 64x64->128 mul.

Before:

0000000000000560 <native_sched_clock>:
 560:   44 8b 1d 00 00 00 00    mov    0x0(%rip),%r11d        # 567 <native_sched_clock+0x7>
 567:   55                      push   %rbp
 568:   48 89 e5                mov    %rsp,%rbp
 56b:   45 85 db                test   %r11d,%r11d
 56e:   75 4f                   jne    5bf <native_sched_clock+0x5f>
 570:   0f 31                   rdtsc
 572:   89 c0                   mov    %eax,%eax
 574:   48 c1 e2 20             shl    $0x20,%rdx
 578:   48 c7 c1 00 00 00 00    mov    $0x0,%rcx
 57f:   48 09 c2                or     %rax,%rdx
 582:   48 c7 c7 00 00 00 00    mov    $0x0,%rdi
 589:   65 8b 04 25 00 00 00    mov    %gs:0x0,%eax
 590:   00
 591:   48 98                   cltq
 593:   48 8b 34 c5 00 00 00    mov    0x0(,%rax,8),%rsi
 59a:   00
 59b:   48 89 d0                mov    %rdx,%rax
 59e:   81 e2 ff 03 00 00       and    $0x3ff,%edx
 5a4:   48 c1 e8 0a             shr    $0xa,%rax
 5a8:   48 0f af 14 0e          imul   (%rsi,%rcx,1),%rdx
 5ad:   48 0f af 04 0e          imul   (%rsi,%rcx,1),%rax
 5b2:   5d                      pop    %rbp
 5b3:   48 03 04 3e             add    (%rsi,%rdi,1),%rax
 5b7:   48 c1 ea 0a             shr    $0xa,%rdx
 5bb:   48 01 d0                add    %rdx,%rax
 5be:   c3                      retq

After:

0000000000000550 <native_sched_clock>:
 550:   8b 3d 00 00 00 00       mov    0x0(%rip),%edi        # 556 <native_sched_clock+0x6>
 556:   55                      push   %rbp
 557:   48 89 e5                mov    %rsp,%rbp
 55a:   48 83 e4 f0             and    $0xfffffffffffffff0,%rsp
 55e:   85 ff                   test   %edi,%edi
 560:   75 2c                   jne    58e <native_sched_clock+0x3e>
 562:   0f 31                   rdtsc
 564:   89 c0                   mov    %eax,%eax
 566:   48 c1 e2 20             shl    $0x20,%rdx
 56a:   48 09 c2                or     %rax,%rdx
 56d:   65 48 8b 04 25 00 00    mov    %gs:0x0,%rax
 574:   00 00
 576:   89 c0                   mov    %eax,%eax
 578:   48 f7 e2                mul    %rdx
 57b:   65 48 8b 0c 25 00 00    mov    %gs:0x0,%rcx
 582:   00 00
 584:   c9                      leaveq
 585:   48 0f ac d0 0a          shrd   $0xa,%rdx,%rax
 58a:   48 01 c8                add    %rcx,%rax
 58d:   c3                      retq

                        MAINLINE   POST

    sched_clock_stable: 1	   1
    (cold) sched_clock: 329841     331312
    (cold) local_clock: 301773     310296
    (warm) sched_clock: 38375      38247
    (warm) local_clock: 100371     102713
    (warm) rdtsc:       27340      27289
    sched_clock_stable: 0          0
    (cold) sched_clock: 382634     372706
    (cold) local_clock: 396890     399275
    (warm) sched_clock: 38194      38124
    (warm) local_clock: 143452     148698
    (warm) rdtsc:       27345      27365

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/n/tip-piu203ses5y1g36bnyw2n16x@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/timer.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h
index 34baa0eb5d0c..10a78c037910 100644
--- a/arch/x86/include/asm/timer.h
+++ b/arch/x86/include/asm/timer.h
@@ -4,6 +4,7 @@
 #include <linux/pm.h>
 #include <linux/percpu.h>
 #include <linux/interrupt.h>
+#include <linux/math64.h>
 
 #define TICK_SIZE (tick_nsec / 1000)
 
@@ -57,10 +58,8 @@ DECLARE_PER_CPU(unsigned long long, cyc2ns_offset);
 
 static inline unsigned long long __cycles_2_ns(unsigned long long cyc)
 {
-	int cpu = smp_processor_id();
-	unsigned long long ns = per_cpu(cyc2ns_offset, cpu);
-	ns += mult_frac(cyc, per_cpu(cyc2ns, cpu),
-			(1UL << CYC2NS_SCALE_FACTOR));
+	unsigned long long ns = this_cpu_read(cyc2ns_offset);
+	ns += mul_u64_u32_shr(cyc, this_cpu_read(cyc2ns), CYC2NS_SCALE_FACTOR);
 	return ns;
 }
 
-- 
cgit v1.2.3


From 57c67da274f3fab38e08d2c9edf08b89e1d9c71d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 29 Nov 2013 15:39:25 +0100
Subject: sched/clock, x86: Move some cyc2ns() code around

There are no __cycles_2_ns() users outside of arch/x86/kernel/tsc.c,
so move it there.

There are no cycles_2_ns() users.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/n/tip-01lslnavfgo3kmbo4532zlcj@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/timer.h |  59 -----------------------
 arch/x86/kernel/tsc.c        | 112 +++++++++++++++++++++++--------------------
 2 files changed, 61 insertions(+), 110 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h
index 10a78c037910..b4c667693a21 100644
--- a/arch/x86/include/asm/timer.h
+++ b/arch/x86/include/asm/timer.h
@@ -13,66 +13,7 @@ extern int recalibrate_cpu_khz(void);
 
 extern int no_timer_check;
 
-/* Accelerators for sched_clock()
- * convert from cycles(64bits) => nanoseconds (64bits)
- *  basic equation:
- *		ns = cycles / (freq / ns_per_sec)
- *		ns = cycles * (ns_per_sec / freq)
- *		ns = cycles * (10^9 / (cpu_khz * 10^3))
- *		ns = cycles * (10^6 / cpu_khz)
- *
- *	Then we use scaling math (suggested by george@mvista.com) to get:
- *		ns = cycles * (10^6 * SC / cpu_khz) / SC
- *		ns = cycles * cyc2ns_scale / SC
- *
- *	And since SC is a constant power of two, we can convert the div
- *  into a shift.
- *
- *  We can use khz divisor instead of mhz to keep a better precision, since
- *  cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
- *  (mathieu.desnoyers@polymtl.ca)
- *
- *			-johnstul@us.ibm.com "math is hard, lets go shopping!"
- *
- * In:
- *
- * ns = cycles * cyc2ns_scale / SC
- *
- * Although we may still have enough bits to store the value of ns,
- * in some cases, we may not have enough bits to store cycles * cyc2ns_scale,
- * leading to an incorrect result.
- *
- * To avoid this, we can decompose 'cycles' into quotient and remainder
- * of division by SC.  Then,
- *
- * ns = (quot * SC + rem) * cyc2ns_scale / SC
- *    = quot * cyc2ns_scale + (rem * cyc2ns_scale) / SC
- *
- *			- sqazi@google.com
- */
-
 DECLARE_PER_CPU(unsigned long, cyc2ns);
 DECLARE_PER_CPU(unsigned long long, cyc2ns_offset);
 
-#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
-
-static inline unsigned long long __cycles_2_ns(unsigned long long cyc)
-{
-	unsigned long long ns = this_cpu_read(cyc2ns_offset);
-	ns += mul_u64_u32_shr(cyc, this_cpu_read(cyc2ns), CYC2NS_SCALE_FACTOR);
-	return ns;
-}
-
-static inline unsigned long long cycles_2_ns(unsigned long long cyc)
-{
-	unsigned long long ns;
-	unsigned long flags;
-
-	local_irq_save(flags);
-	ns = __cycles_2_ns(cyc);
-	local_irq_restore(flags);
-
-	return ns;
-}
-
 #endif /* _ASM_X86_TIMER_H */
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 930e5d48f560..b4a04ac1d7aa 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -38,6 +38,66 @@ static int __read_mostly tsc_unstable;
 static int __read_mostly tsc_disabled = -1;
 
 int tsc_clocksource_reliable;
+
+/* Accelerators for sched_clock()
+ * convert from cycles(64bits) => nanoseconds (64bits)
+ *  basic equation:
+ *              ns = cycles / (freq / ns_per_sec)
+ *              ns = cycles * (ns_per_sec / freq)
+ *              ns = cycles * (10^9 / (cpu_khz * 10^3))
+ *              ns = cycles * (10^6 / cpu_khz)
+ *
+ *      Then we use scaling math (suggested by george@mvista.com) to get:
+ *              ns = cycles * (10^6 * SC / cpu_khz) / SC
+ *              ns = cycles * cyc2ns_scale / SC
+ *
+ *      And since SC is a constant power of two, we can convert the div
+ *  into a shift.
+ *
+ *  We can use khz divisor instead of mhz to keep a better precision, since
+ *  cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
+ *  (mathieu.desnoyers@polymtl.ca)
+ *
+ *                      -johnstul@us.ibm.com "math is hard, lets go shopping!"
+ */
+
+DEFINE_PER_CPU(unsigned long, cyc2ns);
+DEFINE_PER_CPU(unsigned long long, cyc2ns_offset);
+
+#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
+
+static inline unsigned long long cycles_2_ns(unsigned long long cyc)
+{
+	unsigned long long ns = this_cpu_read(cyc2ns_offset);
+	ns += mul_u64_u32_shr(cyc, this_cpu_read(cyc2ns), CYC2NS_SCALE_FACTOR);
+	return ns;
+}
+
+static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
+{
+	unsigned long long tsc_now, ns_now, *offset;
+	unsigned long flags, *scale;
+
+	local_irq_save(flags);
+	sched_clock_idle_sleep_event();
+
+	scale = &per_cpu(cyc2ns, cpu);
+	offset = &per_cpu(cyc2ns_offset, cpu);
+
+	rdtscll(tsc_now);
+	ns_now = cycles_2_ns(tsc_now);
+
+	if (cpu_khz) {
+		*scale = ((NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR) +
+				cpu_khz / 2) / cpu_khz;
+		*offset = ns_now - mult_frac(tsc_now, *scale,
+					     (1UL << CYC2NS_SCALE_FACTOR));
+	}
+
+	sched_clock_idle_wakeup_event(0);
+	local_irq_restore(flags);
+}
+
 /*
  * Scheduler clock - returns current time in nanosec units.
  */
@@ -62,7 +122,7 @@ u64 native_sched_clock(void)
 	rdtscll(this_offset);
 
 	/* return the value in ns */
-	return __cycles_2_ns(this_offset);
+	return cycles_2_ns(this_offset);
 }
 
 /* We need to define a real function for sched_clock, to override the
@@ -589,56 +649,6 @@ int recalibrate_cpu_khz(void)
 EXPORT_SYMBOL(recalibrate_cpu_khz);
 
 
-/* Accelerators for sched_clock()
- * convert from cycles(64bits) => nanoseconds (64bits)
- *  basic equation:
- *              ns = cycles / (freq / ns_per_sec)
- *              ns = cycles * (ns_per_sec / freq)
- *              ns = cycles * (10^9 / (cpu_khz * 10^3))
- *              ns = cycles * (10^6 / cpu_khz)
- *
- *      Then we use scaling math (suggested by george@mvista.com) to get:
- *              ns = cycles * (10^6 * SC / cpu_khz) / SC
- *              ns = cycles * cyc2ns_scale / SC
- *
- *      And since SC is a constant power of two, we can convert the div
- *  into a shift.
- *
- *  We can use khz divisor instead of mhz to keep a better precision, since
- *  cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
- *  (mathieu.desnoyers@polymtl.ca)
- *
- *                      -johnstul@us.ibm.com "math is hard, lets go shopping!"
- */
-
-DEFINE_PER_CPU(unsigned long, cyc2ns);
-DEFINE_PER_CPU(unsigned long long, cyc2ns_offset);
-
-static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
-{
-	unsigned long long tsc_now, ns_now, *offset;
-	unsigned long flags, *scale;
-
-	local_irq_save(flags);
-	sched_clock_idle_sleep_event();
-
-	scale = &per_cpu(cyc2ns, cpu);
-	offset = &per_cpu(cyc2ns_offset, cpu);
-
-	rdtscll(tsc_now);
-	ns_now = __cycles_2_ns(tsc_now);
-
-	if (cpu_khz) {
-		*scale = ((NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR) +
-				cpu_khz / 2) / cpu_khz;
-		*offset = ns_now - mult_frac(tsc_now, *scale,
-					     (1UL << CYC2NS_SCALE_FACTOR));
-	}
-
-	sched_clock_idle_wakeup_event(0);
-	local_irq_restore(flags);
-}
-
 static unsigned long long cyc2ns_suspend;
 
 void tsc_save_sched_clock_state(void)
-- 
cgit v1.2.3


From c7a730fa4624092e2d1c0cb7b750816e87c32364 Mon Sep 17 00:00:00 2001
From: Prarit Bhargava <prarit@redhat.com>
Date: Mon, 13 Jan 2014 08:40:20 -0500
Subject: x86/irq: Fix kbuild warning in smp_irq_move_cleanup_interrupt()

Fengguang Wu's 0day kernel build service reported the following build warning:

  arch/x86/kernel/apic/io_apic.c:2211
  smp_irq_move_cleanup_interrupt() warn: always true condition '(irq <= -1) => (0-u32max <= (-1))'

because irq is defined as an unsigned int instead of an int.
Fix this trivial error by redefining irq as a signed int.  The
remaining consumers of the int are okay.

Signed-off-by: Prarit Bhargava <prarit@redhat.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Fengguang Wu <fengguang.wu@intel.com>
Link: http://lkml.kernel.org/r/1389620420-7110-1-git-send-email-prarit@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/apic/io_apic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 6df0b660753b..a43f068ebec1 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2202,7 +2202,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
 
 	me = smp_processor_id();
 	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
-		unsigned int irq;
+		int irq;
 		unsigned int irr;
 		struct irq_desc *desc;
 		struct irq_cfg *cfg;
-- 
cgit v1.2.3


From 20d1c86a57762f0a33a78988e3fc8818316badd4 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 29 Nov 2013 15:40:29 +0100
Subject: sched/clock, x86: Rewrite cyc2ns() to avoid the need to disable IRQs

Use a ring-buffer like multi-version object structure which allows
always having a coherent object; we use this to avoid having to
disable IRQs while reading sched_clock() and avoids a problem when
getting an NMI while changing the cyc2ns data.

                        MAINLINE   PRE        POST

    sched_clock_stable: 1          1          1
    (cold) sched_clock: 329841     331312     257223
    (cold) local_clock: 301773     310296     309889
    (warm) sched_clock: 38375      38247      25280
    (warm) local_clock: 100371     102713     85268
    (warm) rdtsc:       27340      27289      24247
    sched_clock_stable: 0          0          0
    (cold) sched_clock: 382634     372706     301224
    (cold) local_clock: 396890     399275     399870
    (warm) sched_clock: 38194      38124      25630
    (warm) local_clock: 143452     148698     129629
    (warm) rdtsc:       27345      27365      24307

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/n/tip-s567in1e5ekq2nlyhn8f987r@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/timer.h     |  23 +++-
 arch/x86/kernel/cpu/perf_event.c |  14 ++-
 arch/x86/kernel/tsc.c            | 229 +++++++++++++++++++++++++++++++++++----
 arch/x86/platform/uv/tlb_uv.c    |  66 ++++++-----
 4 files changed, 276 insertions(+), 56 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h
index b4c667693a21..3de54ef0aea5 100644
--- a/arch/x86/include/asm/timer.h
+++ b/arch/x86/include/asm/timer.h
@@ -13,7 +13,26 @@ extern int recalibrate_cpu_khz(void);
 
 extern int no_timer_check;
 
-DECLARE_PER_CPU(unsigned long, cyc2ns);
-DECLARE_PER_CPU(unsigned long long, cyc2ns_offset);
+/*
+ * We use the full linear equation: f(x) = a + b*x, in order to allow
+ * a continuous function in the face of dynamic freq changes.
+ *
+ * Continuity means that when our frequency changes our slope (b); we want to
+ * ensure that: f(t) == f'(t), which gives: a + b*t == a' + b'*t.
+ *
+ * Without an offset (a) the above would not be possible.
+ *
+ * See the comment near cycles_2_ns() for details on how we compute (b).
+ */
+struct cyc2ns_data {
+	u32 cyc2ns_mul;
+	u32 cyc2ns_shift;
+	u64 cyc2ns_offset;
+	u32 __count;
+	/* u32 hole */
+}; /* 24 bytes -- do not grow */
+
+extern struct cyc2ns_data *cyc2ns_read_begin(void);
+extern void cyc2ns_read_end(struct cyc2ns_data *);
 
 #endif /* _ASM_X86_TIMER_H */
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 8e132931614d..9f97bd03f74f 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1883,6 +1883,8 @@ static struct pmu pmu = {
 
 void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
 {
+	struct cyc2ns_data *data;
+
 	userpg->cap_user_time = 0;
 	userpg->cap_user_time_zero = 0;
 	userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc;
@@ -1891,13 +1893,17 @@ void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
 	if (!sched_clock_stable)
 		return;
 
+	data = cyc2ns_read_begin();
+
 	userpg->cap_user_time = 1;
-	userpg->time_mult = this_cpu_read(cyc2ns);
-	userpg->time_shift = CYC2NS_SCALE_FACTOR;
-	userpg->time_offset = this_cpu_read(cyc2ns_offset) - now;
+	userpg->time_mult = data->cyc2ns_mul;
+	userpg->time_shift = data->cyc2ns_shift;
+	userpg->time_offset = data->cyc2ns_offset - now;
 
 	userpg->cap_user_time_zero = 1;
-	userpg->time_zero = this_cpu_read(cyc2ns_offset);
+	userpg->time_zero = data->cyc2ns_offset;
+
+	cyc2ns_read_end(data);
 }
 
 /*
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index b4a04ac1d7aa..92b090b2b79e 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -39,7 +39,119 @@ static int __read_mostly tsc_disabled = -1;
 
 int tsc_clocksource_reliable;
 
-/* Accelerators for sched_clock()
+/*
+ * Use a ring-buffer like data structure, where a writer advances the head by
+ * writing a new data entry and a reader advances the tail when it observes a
+ * new entry.
+ *
+ * Writers are made to wait on readers until there's space to write a new
+ * entry.
+ *
+ * This means that we can always use an {offset, mul} pair to compute a ns
+ * value that is 'roughly' in the right direction, even if we're writing a new
+ * {offset, mul} pair during the clock read.
+ *
+ * The down-side is that we can no longer guarantee strict monotonicity anymore
+ * (assuming the TSC was that to begin with), because while we compute the
+ * intersection point of the two clock slopes and make sure the time is
+ * continuous at the point of switching; we can no longer guarantee a reader is
+ * strictly before or after the switch point.
+ *
+ * It does mean a reader no longer needs to disable IRQs in order to avoid
+ * CPU-Freq updates messing with his times, and similarly an NMI reader will
+ * no longer run the risk of hitting half-written state.
+ */
+
+struct cyc2ns {
+	struct cyc2ns_data data[2];	/*  0 + 2*24 = 48 */
+	struct cyc2ns_data *head;	/* 48 + 8    = 56 */
+	struct cyc2ns_data *tail;	/* 56 + 8    = 64 */
+}; /* exactly fits one cacheline */
+
+static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns);
+
+struct cyc2ns_data *cyc2ns_read_begin(void)
+{
+	struct cyc2ns_data *head;
+
+	preempt_disable();
+
+	head = this_cpu_read(cyc2ns.head);
+	/*
+	 * Ensure we observe the entry when we observe the pointer to it.
+	 * matches the wmb from cyc2ns_write_end().
+	 */
+	smp_read_barrier_depends();
+	head->__count++;
+	barrier();
+
+	return head;
+}
+
+void cyc2ns_read_end(struct cyc2ns_data *head)
+{
+	barrier();
+	/*
+	 * If we're the outer most nested read; update the tail pointer
+	 * when we're done. This notifies possible pending writers
+	 * that we've observed the head pointer and that the other
+	 * entry is now free.
+	 */
+	if (!--head->__count) {
+		/*
+		 * x86-TSO does not reorder writes with older reads;
+		 * therefore once this write becomes visible to another
+		 * cpu, we must be finished reading the cyc2ns_data.
+		 *
+		 * matches with cyc2ns_write_begin().
+		 */
+		this_cpu_write(cyc2ns.tail, head);
+	}
+	preempt_enable();
+}
+
+/*
+ * Begin writing a new @data entry for @cpu.
+ *
+ * Assumes some sort of write side lock; currently 'provided' by the assumption
+ * that cpufreq will call its notifiers sequentially.
+ */
+static struct cyc2ns_data *cyc2ns_write_begin(int cpu)
+{
+	struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu);
+	struct cyc2ns_data *data = c2n->data;
+
+	if (data == c2n->head)
+		data++;
+
+	/* XXX send an IPI to @cpu in order to guarantee a read? */
+
+	/*
+	 * When we observe the tail write from cyc2ns_read_end(),
+	 * the cpu must be done with that entry and its safe
+	 * to start writing to it.
+	 */
+	while (c2n->tail == data)
+		cpu_relax();
+
+	return data;
+}
+
+static void cyc2ns_write_end(int cpu, struct cyc2ns_data *data)
+{
+	struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu);
+
+	/*
+	 * Ensure the @data writes are visible before we publish the
+	 * entry. Matches the data-depencency in cyc2ns_read_begin().
+	 */
+	smp_wmb();
+
+	ACCESS_ONCE(c2n->head) = data;
+}
+
+/*
+ * Accelerators for sched_clock()
  * convert from cycles(64bits) => nanoseconds (64bits)
  *  basic equation:
  *              ns = cycles / (freq / ns_per_sec)
@@ -61,49 +173,106 @@ int tsc_clocksource_reliable;
  *                      -johnstul@us.ibm.com "math is hard, lets go shopping!"
  */
 
-DEFINE_PER_CPU(unsigned long, cyc2ns);
-DEFINE_PER_CPU(unsigned long long, cyc2ns_offset);
-
 #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
 
+static void cyc2ns_data_init(struct cyc2ns_data *data)
+{
+	data->cyc2ns_mul = 1U << CYC2NS_SCALE_FACTOR;
+	data->cyc2ns_shift = CYC2NS_SCALE_FACTOR;
+	data->cyc2ns_offset = 0;
+	data->__count = 0;
+}
+
+static void cyc2ns_init(int cpu)
+{
+	struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu);
+
+	cyc2ns_data_init(&c2n->data[0]);
+	cyc2ns_data_init(&c2n->data[1]);
+
+	c2n->head = c2n->data;
+	c2n->tail = c2n->data;
+}
+
 static inline unsigned long long cycles_2_ns(unsigned long long cyc)
 {
-	unsigned long long ns = this_cpu_read(cyc2ns_offset);
-	ns += mul_u64_u32_shr(cyc, this_cpu_read(cyc2ns), CYC2NS_SCALE_FACTOR);
+	struct cyc2ns_data *data, *tail;
+	unsigned long long ns;
+
+	/*
+	 * See cyc2ns_read_*() for details; replicated in order to avoid
+	 * an extra few instructions that came with the abstraction.
+	 * Notable, it allows us to only do the __count and tail update
+	 * dance when its actually needed.
+	 */
+
+	preempt_disable();
+	data = this_cpu_read(cyc2ns.head);
+	tail = this_cpu_read(cyc2ns.tail);
+
+	if (likely(data == tail)) {
+		ns = data->cyc2ns_offset;
+		ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, CYC2NS_SCALE_FACTOR);
+	} else {
+		data->__count++;
+
+		barrier();
+
+		ns = data->cyc2ns_offset;
+		ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, CYC2NS_SCALE_FACTOR);
+
+		barrier();
+
+		if (!--data->__count)
+			this_cpu_write(cyc2ns.tail, data);
+	}
+	preempt_enable();
+
 	return ns;
 }
 
+/* XXX surely we already have this someplace in the kernel?! */
+#define DIV_ROUND(n, d) (((n) + ((d) / 2)) / (d))
+
 static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
 {
-	unsigned long long tsc_now, ns_now, *offset;
-	unsigned long flags, *scale;
+	unsigned long long tsc_now, ns_now;
+	struct cyc2ns_data *data;
+	unsigned long flags;
 
 	local_irq_save(flags);
 	sched_clock_idle_sleep_event();
 
-	scale = &per_cpu(cyc2ns, cpu);
-	offset = &per_cpu(cyc2ns_offset, cpu);
+	if (!cpu_khz)
+		goto done;
+
+	data = cyc2ns_write_begin(cpu);
 
 	rdtscll(tsc_now);
 	ns_now = cycles_2_ns(tsc_now);
 
-	if (cpu_khz) {
-		*scale = ((NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR) +
-				cpu_khz / 2) / cpu_khz;
-		*offset = ns_now - mult_frac(tsc_now, *scale,
-					     (1UL << CYC2NS_SCALE_FACTOR));
-	}
+	/*
+	 * Compute a new multiplier as per the above comment and ensure our
+	 * time function is continuous; see the comment near struct
+	 * cyc2ns_data.
+	 */
+	data->cyc2ns_mul = DIV_ROUND(NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR, cpu_khz);
+	data->cyc2ns_shift = CYC2NS_SCALE_FACTOR;
+	data->cyc2ns_offset = ns_now -
+		mul_u64_u32_shr(tsc_now, data->cyc2ns_mul, CYC2NS_SCALE_FACTOR);
+
+	cyc2ns_write_end(cpu, data);
 
+done:
 	sched_clock_idle_wakeup_event(0);
 	local_irq_restore(flags);
 }
-
 /*
  * Scheduler clock - returns current time in nanosec units.
  */
 u64 native_sched_clock(void)
 {
-	u64 this_offset;
+	u64 tsc_now;
 
 	/*
 	 * Fall back to jiffies if there's no TSC available:
@@ -119,10 +288,10 @@ u64 native_sched_clock(void)
 	}
 
 	/* read the Time Stamp Counter: */
-	rdtscll(this_offset);
+	rdtscll(tsc_now);
 
 	/* return the value in ns */
-	return cycles_2_ns(this_offset);
+	return cycles_2_ns(tsc_now);
 }
 
 /* We need to define a real function for sched_clock, to override the
@@ -678,11 +847,21 @@ void tsc_restore_sched_clock_state(void)
 
 	local_irq_save(flags);
 
-	__this_cpu_write(cyc2ns_offset, 0);
+	/*
+	 * We're comming out of suspend, there's no concurrency yet; don't
+	 * bother being nice about the RCU stuff, just write to both
+	 * data fields.
+	 */
+
+	this_cpu_write(cyc2ns.data[0].cyc2ns_offset, 0);
+	this_cpu_write(cyc2ns.data[1].cyc2ns_offset, 0);
+
 	offset = cyc2ns_suspend - sched_clock();
 
-	for_each_possible_cpu(cpu)
-		per_cpu(cyc2ns_offset, cpu) = offset;
+	for_each_possible_cpu(cpu) {
+		per_cpu(cyc2ns.data[0].cyc2ns_offset, cpu) = offset;
+		per_cpu(cyc2ns.data[1].cyc2ns_offset, cpu) = offset;
+	}
 
 	local_irq_restore(flags);
 }
@@ -1005,8 +1184,10 @@ void __init tsc_init(void)
 	 * speed as the bootup CPU. (cpufreq notifiers will fix this
 	 * up if their speed diverges)
 	 */
-	for_each_possible_cpu(cpu)
+	for_each_possible_cpu(cpu) {
+		cyc2ns_init(cpu);
 		set_cyc2ns_scale(cpu_khz, cpu);
+	}
 
 	if (tsc_disabled > 0)
 		return;
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index efe4d7220397..dfe605ac1bcd 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -433,15 +433,49 @@ static void reset_with_ipi(struct pnmask *distribution, struct bau_control *bcp)
 	return;
 }
 
-static inline unsigned long cycles_2_us(unsigned long long cyc)
+/*
+ * Not to be confused with cycles_2_ns() from tsc.c; this gives a relative
+ * number, not an absolute. It converts a duration in cycles to a duration in
+ * ns.
+ */
+static inline unsigned long long cycles_2_ns(unsigned long long cyc)
 {
+	struct cyc2ns_data *data = cyc2ns_read_begin();
 	unsigned long long ns;
-	unsigned long us;
-	int cpu = smp_processor_id();
 
-	ns =  (cyc * per_cpu(cyc2ns, cpu)) >> CYC2NS_SCALE_FACTOR;
-	us = ns / 1000;
-	return us;
+	ns = mul_u64_u32_shr(cyc, data->cyc2ns_mul, data->cyc2ns_shift);
+
+	cyc2ns_read_end(data);
+	return ns;
+}
+
+/*
+ * The reverse of the above; converts a duration in ns to a duration in cycles.
+ */ 
+static inline unsigned long long ns_2_cycles(unsigned long long ns)
+{
+	struct cyc2ns_data *data = cyc2ns_read_begin();
+	unsigned long long cyc;
+
+	cyc = (ns << data->cyc2ns_shift) / data->cyc2ns_mul;
+
+	cyc2ns_read_end(data);
+	return cyc;
+}
+
+static inline unsigned long cycles_2_us(unsigned long long cyc)
+{
+	return cycles_2_ns(cyc) / NSEC_PER_USEC;
+}
+
+static inline cycles_t sec_2_cycles(unsigned long sec)
+{
+	return ns_2_cycles(sec * NSEC_PER_SEC);
+}
+
+static inline unsigned long long usec_2_cycles(unsigned long usec)
+{
+	return ns_2_cycles(usec * NSEC_PER_USEC);
 }
 
 /*
@@ -668,16 +702,6 @@ static int wait_completion(struct bau_desc *bau_desc,
 								bcp, try);
 }
 
-static inline cycles_t sec_2_cycles(unsigned long sec)
-{
-	unsigned long ns;
-	cycles_t cyc;
-
-	ns = sec * 1000000000;
-	cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id()));
-	return cyc;
-}
-
 /*
  * Our retries are blocked by all destination sw ack resources being
  * in use, and a timeout is pending. In that case hardware immediately
@@ -1327,16 +1351,6 @@ static void ptc_seq_stop(struct seq_file *file, void *data)
 {
 }
 
-static inline unsigned long long usec_2_cycles(unsigned long microsec)
-{
-	unsigned long ns;
-	unsigned long long cyc;
-
-	ns = microsec * 1000;
-	cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id()));
-	return cyc;
-}
-
 /*
  * Display the statistics thru /proc/sgi_uv/ptc_statistics
  * 'data' points to the cpu number
-- 
cgit v1.2.3


From 35af99e646c7f7ea46dc2977601e9e71a51dadd5 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 28 Nov 2013 19:38:42 +0100
Subject: sched/clock, x86: Use a static_key for sched_clock_stable

In order to avoid the runtime condition and variable load turn
sched_clock_stable into a static_key.

Also provide a shorter implementation of local_clock() and
cpu_clock(int) when sched_clock_stable==1.

                        MAINLINE   PRE       POST

    sched_clock_stable: 1          1         1
    (cold) sched_clock: 329841     221876    215295
    (cold) local_clock: 301773     234692    220773
    (warm) sched_clock: 38375      25602     25659
    (warm) local_clock: 100371     33265     27242
    (warm) rdtsc:       27340      24214     24208
    sched_clock_stable: 0          0         0
    (cold) sched_clock: 382634     235941    237019
    (cold) local_clock: 396890     297017    294819
    (warm) sched_clock: 38194      25233     25609
    (warm) local_clock: 143452     71234     71232
    (warm) rdtsc:       27345      24245     24243

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/n/tip-eummbdechzz37mwmpags1gjr@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/amd.c        |  2 +-
 arch/x86/kernel/cpu/intel.c      |  2 +-
 arch/x86/kernel/cpu/perf_event.c |  2 +-
 arch/x86/kernel/tsc.c            |  6 +++---
 include/linux/sched.h            |  4 +++-
 kernel/sched/clock.c             | 41 +++++++++++++++++++++++++++++++++-------
 kernel/sched/debug.c             |  2 +-
 kernel/time/tick-sched.c         |  2 +-
 kernel/trace/ring_buffer.c       |  2 +-
 9 files changed, 46 insertions(+), 17 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index bca023bdd6b2..8bc79cddd9a2 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -487,7 +487,7 @@ static void early_init_amd(struct cpuinfo_x86 *c)
 		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
 		set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
 		if (!check_tsc_unstable())
-			sched_clock_stable = 1;
+			set_sched_clock_stable();
 	}
 
 #ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index ea04b342c026..1a439c047ff3 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -93,7 +93,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
 		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
 		set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
 		if (!check_tsc_unstable())
-			sched_clock_stable = 1;
+			set_sched_clock_stable();
 	}
 
 	/* Penwell and Cloverview have the TSC which doesn't sleep on S3 */
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 9f97bd03f74f..b88645191fe5 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1890,7 +1890,7 @@ void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
 	userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc;
 	userpg->pmc_width = x86_pmu.cntval_bits;
 
-	if (!sched_clock_stable)
+	if (!sched_clock_stable())
 		return;
 
 	data = cyc2ns_read_begin();
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 92b090b2b79e..53c123537245 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -822,7 +822,7 @@ static unsigned long long cyc2ns_suspend;
 
 void tsc_save_sched_clock_state(void)
 {
-	if (!sched_clock_stable)
+	if (!sched_clock_stable())
 		return;
 
 	cyc2ns_suspend = sched_clock();
@@ -842,7 +842,7 @@ void tsc_restore_sched_clock_state(void)
 	unsigned long flags;
 	int cpu;
 
-	if (!sched_clock_stable)
+	if (!sched_clock_stable())
 		return;
 
 	local_irq_save(flags);
@@ -984,7 +984,7 @@ void mark_tsc_unstable(char *reason)
 {
 	if (!tsc_unstable) {
 		tsc_unstable = 1;
-		sched_clock_stable = 0;
+		clear_sched_clock_stable();
 		disable_sched_clock_irqtime();
 		pr_info("Marking TSC unstable due to %s\n", reason);
 		/* Change only the rating, when not registered */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a196cb7fc6f2..a03875221663 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1994,7 +1994,9 @@ static inline void sched_clock_idle_wakeup_event(u64 delta_ns)
  * but then during bootup it turns out that sched_clock()
  * is reliable after all:
  */
-extern int sched_clock_stable;
+extern int sched_clock_stable(void);
+extern void set_sched_clock_stable(void);
+extern void clear_sched_clock_stable(void);
 
 extern void sched_clock_tick(void);
 extern void sched_clock_idle_sleep_event(void);
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index 59371549ddf0..c9b34c4e3ecc 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -58,6 +58,7 @@
 #include <linux/percpu.h>
 #include <linux/ktime.h>
 #include <linux/sched.h>
+#include <linux/static_key.h>
 
 /*
  * Scheduler clock - returns current time in nanosec units.
@@ -74,7 +75,27 @@ EXPORT_SYMBOL_GPL(sched_clock);
 __read_mostly int sched_clock_running;
 
 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
-__read_mostly int sched_clock_stable;
+static struct static_key __sched_clock_stable = STATIC_KEY_INIT;
+
+int sched_clock_stable(void)
+{
+	if (static_key_false(&__sched_clock_stable))
+		return false;
+	return true;
+}
+
+void set_sched_clock_stable(void)
+{
+	if (!sched_clock_stable())
+		static_key_slow_dec(&__sched_clock_stable);
+}
+
+void clear_sched_clock_stable(void)
+{
+	/* XXX worry about clock continuity */
+	if (sched_clock_stable())
+		static_key_slow_inc(&__sched_clock_stable);
+}
 
 struct sched_clock_data {
 	u64			tick_raw;
@@ -234,7 +255,7 @@ u64 sched_clock_cpu(int cpu)
 	struct sched_clock_data *scd;
 	u64 clock;
 
-	if (sched_clock_stable)
+	if (sched_clock_stable())
 		return sched_clock();
 
 	if (unlikely(!sched_clock_running))
@@ -257,7 +278,7 @@ void sched_clock_tick(void)
 	struct sched_clock_data *scd;
 	u64 now, now_gtod;
 
-	if (sched_clock_stable)
+	if (sched_clock_stable())
 		return;
 
 	if (unlikely(!sched_clock_running))
@@ -308,7 +329,10 @@ EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
  */
 u64 cpu_clock(int cpu)
 {
-	return sched_clock_cpu(cpu);
+	if (static_key_false(&__sched_clock_stable))
+		return sched_clock_cpu(cpu);
+
+	return sched_clock();
 }
 
 /*
@@ -320,7 +344,10 @@ u64 cpu_clock(int cpu)
  */
 u64 local_clock(void)
 {
-	return sched_clock_cpu(raw_smp_processor_id());
+	if (static_key_false(&__sched_clock_stable))
+		return sched_clock_cpu(raw_smp_processor_id());
+
+	return sched_clock();
 }
 
 #else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
@@ -340,12 +367,12 @@ u64 sched_clock_cpu(int cpu)
 
 u64 cpu_clock(int cpu)
 {
-	return sched_clock_cpu(cpu);
+	return sched_clock();
 }
 
 u64 local_clock(void)
 {
-	return sched_clock_cpu(0);
+	return sched_clock();
 }
 
 #endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 374fe04a5e6e..dd52e7ffb10e 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -371,7 +371,7 @@ static void sched_debug_header(struct seq_file *m)
 	PN(cpu_clk);
 	P(jiffies);
 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
-	P(sched_clock_stable);
+	P(sched_clock_stable());
 #endif
 #undef PN
 #undef P
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index ea20f7d1ac2c..c833249ab0fb 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -177,7 +177,7 @@ static bool can_stop_full_tick(void)
 	 * TODO: kick full dynticks CPUs when
 	 * sched_clock_stable is set.
 	 */
-	if (!sched_clock_stable) {
+	if (!sched_clock_stable()) {
 		trace_tick_stop(0, "unstable sched clock\n");
 		/*
 		 * Don't allow the user to think they can get
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index cc2f66f68dc5..294b8a271a04 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2558,7 +2558,7 @@ rb_reserve_next_event(struct ring_buffer *buffer,
 		if (unlikely(test_time_stamp(delta))) {
 			int local_clock_stable = 1;
 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
-			local_clock_stable = sched_clock_stable;
+			local_clock_stable = sched_clock_stable();
 #endif
 			WARN_ONCE(delta > (1ULL << 59),
 				  KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s",
-- 
cgit v1.2.3


From 10b033d434c25a6c9e0f4f4dc2418af1b8236c63 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 28 Nov 2013 19:01:40 +0100
Subject: sched/clock, x86: Avoid a runtime condition in native_sched_clock()

Use a static_key to avoid touching tsc_disabled and a runtime
condition in native_sched_clock() -- less cachelines touched is always
better.

                        MAINLINE   PRE       POST

    sched_clock_stable: 1          1         1
    (cold) sched_clock: 329841     215295    213039
    (cold) local_clock: 301773     220773    216084
    (warm) sched_clock: 38375      25659     25231
    (warm) local_clock: 100371     27242     27601
    (warm) rdtsc:       27340      24208     24203
    sched_clock_stable: 0          0         0
    (cold) sched_clock: 382634     237019    240055
    (cold) local_clock: 396890     294819    299942
    (warm) sched_clock: 38194      25609     25276
    (warm) local_clock: 143452     71232     73232
    (warm) rdtsc:       27345      24243     24244

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/n/tip-hrz87bo37qke25bty6pnfy4b@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/tsc.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 53c123537245..6377fb28b958 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -11,6 +11,7 @@
 #include <linux/clocksource.h>
 #include <linux/percpu.h>
 #include <linux/timex.h>
+#include <linux/static_key.h>
 
 #include <asm/hpet.h>
 #include <asm/timer.h>
@@ -37,6 +38,8 @@ static int __read_mostly tsc_unstable;
    erroneous rdtsc usage on !cpu_has_tsc processors */
 static int __read_mostly tsc_disabled = -1;
 
+static struct static_key __use_tsc = STATIC_KEY_INIT;
+
 int tsc_clocksource_reliable;
 
 /*
@@ -282,7 +285,7 @@ u64 native_sched_clock(void)
 	 *   very important for it to be as fast as the platform
 	 *   can achieve it. )
 	 */
-	if (unlikely(tsc_disabled)) {
+	if (!static_key_false(&__use_tsc)) {
 		/* No locking but a rare wrong value is not a big deal: */
 		return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
 	}
@@ -1193,7 +1196,9 @@ void __init tsc_init(void)
 		return;
 
 	/* now allow native_sched_clock() to use rdtsc */
+
 	tsc_disabled = 0;
+	static_key_slow_inc(&__use_tsc);
 
 	if (!no_sched_irq_time)
 		enable_sched_clock_irqtime();
-- 
cgit v1.2.3


From 075dfd82102d2048e43e1cbf48d558d915c50072 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 13 Jan 2014 13:35:16 +0100
Subject: s390/compat: fix PSW32_USER_BITS definition

PSW32_USER_BITS should define the primary address space for user space
instead of the home address space.
Symptom of this bug is that gdb doesn't work in compat mode.

The bug was introduced with e258d719ff28 "s390/uaccess: always run the kernel
in home space" and f26946d7ecad "s390/compat: make psw32_user_bits a constant
value again".

Cc: stable@vger.kernel.org # v3.13+
Reported-by: Andreas Arnez <arnez@linux.vnet.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/compat.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
index 4bf9da03591e..5d7e8cf83bd6 100644
--- a/arch/s390/include/asm/compat.h
+++ b/arch/s390/include/asm/compat.h
@@ -38,7 +38,8 @@
 
 #define PSW32_USER_BITS (PSW32_MASK_DAT | PSW32_MASK_IO | PSW32_MASK_EXT | \
 			 PSW32_DEFAULT_KEY | PSW32_MASK_BASE | \
-			 PSW32_MASK_MCHECK | PSW32_MASK_PSTATE | PSW32_ASC_HOME)
+			 PSW32_MASK_MCHECK | PSW32_MASK_PSTATE | \
+			 PSW32_ASC_PRIMARY)
 
 #define COMPAT_USER_HZ		100
 #define COMPAT_UTS_MACHINE	"s390\0\0\0\0"
-- 
cgit v1.2.3


From 28aa39b853fc889e672b73f69ce591d15e6306b0 Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Mon, 13 Jan 2014 16:02:28 +0100
Subject: s390: delete new instances of __cpuinit usage

The patch "s390/perf: add support for the CPU-Measurement Sampling
Facility" added a new instance of the __cpuinit macro usage.

We removed this a couple versions ago; we now want to remove
the compat no-op stubs.  Introducing new users is not what
we want to see at this point in time, as it will break once
the stubs are gone.

Cc: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/perf_cpum_sf.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 3c3bc8d7b220..6c0d29827cb6 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -1512,8 +1512,8 @@ static void cpumf_measurement_alert(struct ext_code ext_code,
 	}
 }
 
-static int __cpuinit cpumf_pmu_notifier(struct notifier_block *self,
-					unsigned long action, void *hcpu)
+static int cpumf_pmu_notifier(struct notifier_block *self,
+			      unsigned long action, void *hcpu)
 {
 	unsigned int cpu = (long) hcpu;
 	int flags;
-- 
cgit v1.2.3


From 8cb75e0c4ec9786b81439761eac1d18d4a931af3 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 20 Nov 2013 12:22:37 +0100
Subject: sched/preempt: Fix up missed PREEMPT_NEED_RESCHED folding

With various drivers wanting to inject idle time; we get people
calling idle routines outside of the idle loop proper.

Therefore we need to be extra careful about not missing
TIF_NEED_RESCHED -> PREEMPT_NEED_RESCHED propagations.

While looking at this, I also realized there's a small window in the
existing idle loop where we can miss TIF_NEED_RESCHED; when it hits
right after the tif_need_resched() test at the end of the loop but
right before the need_resched() test at the start of the loop.

So move preempt_fold_need_resched() out of the loop where we're
guaranteed to have TIF_NEED_RESCHED set.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/n/tip-x9jgh45oeayzajz2mjt0y7d6@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/mwait.h |  2 +-
 include/linux/preempt.h      | 15 +++++++++++++++
 include/linux/sched.h        | 15 +++++++++++++++
 kernel/cpu/idle.c            | 17 ++++++++++-------
 kernel/sched/core.c          |  3 +--
 5 files changed, 42 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index 19b71c439256..1da25a5f96f9 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -53,7 +53,7 @@ static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
 		if (!need_resched())
 			__mwait(eax, ecx);
 	}
-	__current_clr_polling();
+	current_clr_polling();
 }
 
 #endif /* _ASM_X86_MWAIT_H */
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index dd9ddf8af205..59749fc48328 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -134,6 +134,21 @@ do { \
 #undef preempt_check_resched
 #endif
 
+#ifdef CONFIG_PREEMPT
+#define preempt_set_need_resched() \
+do { \
+	set_preempt_need_resched(); \
+} while (0)
+#define preempt_fold_need_resched() \
+do { \
+	if (tif_need_resched()) \
+		set_preempt_need_resched(); \
+} while (0)
+#else
+#define preempt_set_need_resched() do { } while (0)
+#define preempt_fold_need_resched() do { } while (0)
+#endif
+
 #ifdef CONFIG_PREEMPT_NOTIFIERS
 
 struct preempt_notifier;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a03875221663..ffccdad050b5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2745,6 +2745,21 @@ static inline bool __must_check current_clr_polling_and_test(void)
 }
 #endif
 
+static inline void current_clr_polling(void)
+{
+	__current_clr_polling();
+
+	/*
+	 * Ensure we check TIF_NEED_RESCHED after we clear the polling bit.
+	 * Once the bit is cleared, we'll get IPIs with every new
+	 * TIF_NEED_RESCHED and the IPI handler, scheduler_ipi(), will also
+	 * fold.
+	 */
+	smp_mb(); /* paired with resched_task() */
+
+	preempt_fold_need_resched();
+}
+
 static __always_inline bool need_resched(void)
 {
 	return unlikely(tif_need_resched());
diff --git a/kernel/cpu/idle.c b/kernel/cpu/idle.c
index 988573a9a387..277f494c2a9a 100644
--- a/kernel/cpu/idle.c
+++ b/kernel/cpu/idle.c
@@ -105,14 +105,17 @@ static void cpu_idle_loop(void)
 				__current_set_polling();
 			}
 			arch_cpu_idle_exit();
-			/*
-			 * We need to test and propagate the TIF_NEED_RESCHED
-			 * bit here because we might not have send the
-			 * reschedule IPI to idle tasks.
-			 */
-			if (tif_need_resched())
-				set_preempt_need_resched();
 		}
+
+		/*
+		 * Since we fell out of the loop above, we know
+		 * TIF_NEED_RESCHED must be set, propagate it into
+		 * PREEMPT_NEED_RESCHED.
+		 *
+		 * This is required because for polling idle loops we will
+		 * not have had an IPI to fold the state for us.
+		 */
+		preempt_set_need_resched();
 		tick_nohz_idle_exit();
 		schedule_preempt_disabled();
 	}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 392c6f87906e..0326c06953eb 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1510,8 +1510,7 @@ void scheduler_ipi(void)
 	 * TIF_NEED_RESCHED remotely (for the first time) will also send
 	 * this IPI.
 	 */
-	if (tif_need_resched())
-		set_preempt_need_resched();
+	preempt_fold_need_resched();
 
 	if (llist_empty(&this_rq()->wake_list)
 			&& !tick_nohz_full_cpu(smp_processor_id())
-- 
cgit v1.2.3


From 5aa3d718f259007121b9366d36315fb8a2983d3d Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Wed, 4 Dec 2013 20:50:42 +0100
Subject: x86, ramdisk: Export relocated ramdisk VA

The ramdisk can possibly get relocated if the whole image is not mapped.
And since we're going over it in the microcode loader and fishing out
the relevant microcode patches, we want access it at its new location.
Thus, export it.

Signed-off-by: Borislav Petkov <bp@suse.de>
Tested-by: Aravind Gopalakrishnan <Aravind.Gopalakrishnan@amd.com>
---
 arch/x86/include/asm/setup.h |  3 ++-
 arch/x86/kernel/setup.c      | 19 ++++++++++---------
 2 files changed, 12 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index 59bcf4e22418..d62c9f809bc5 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -3,7 +3,6 @@
 
 #include <uapi/asm/setup.h>
 
-
 #define COMMAND_LINE_SIZE 2048
 
 #include <linux/linkage.h>
@@ -29,6 +28,8 @@
 #include <asm/bootparam.h>
 #include <asm/x86_init.h>
 
+extern u64 relocated_ramdisk;
+
 /* Interrupt control for vSMPowered x86_64 systems */
 #ifdef CONFIG_X86_64
 void vsmp_init(void);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index cb233bc9dee3..baefc6dc553c 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -295,6 +295,8 @@ static void __init reserve_brk(void)
 	_brk_start = 0;
 }
 
+u64 relocated_ramdisk;
+
 #ifdef CONFIG_BLK_DEV_INITRD
 
 static u64 __init get_ramdisk_image(void)
@@ -321,25 +323,24 @@ static void __init relocate_initrd(void)
 	u64 ramdisk_image = get_ramdisk_image();
 	u64 ramdisk_size  = get_ramdisk_size();
 	u64 area_size     = PAGE_ALIGN(ramdisk_size);
-	u64 ramdisk_here;
 	unsigned long slop, clen, mapaddr;
 	char *p, *q;
 
 	/* We need to move the initrd down into directly mapped mem */
-	ramdisk_here = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
-						 area_size, PAGE_SIZE);
+	relocated_ramdisk = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
+						   area_size, PAGE_SIZE);
 
-	if (!ramdisk_here)
+	if (!relocated_ramdisk)
 		panic("Cannot find place for new RAMDISK of size %lld\n",
-			 ramdisk_size);
+		      ramdisk_size);
 
 	/* Note: this includes all the mem currently occupied by
 	   the initrd, we rely on that fact to keep the data intact. */
-	memblock_reserve(ramdisk_here, area_size);
-	initrd_start = ramdisk_here + PAGE_OFFSET;
+	memblock_reserve(relocated_ramdisk, area_size);
+	initrd_start = relocated_ramdisk + PAGE_OFFSET;
 	initrd_end   = initrd_start + ramdisk_size;
 	printk(KERN_INFO "Allocated new RAMDISK: [mem %#010llx-%#010llx]\n",
-			 ramdisk_here, ramdisk_here + ramdisk_size - 1);
+	       relocated_ramdisk, relocated_ramdisk + ramdisk_size - 1);
 
 	q = (char *)initrd_start;
 
@@ -363,7 +364,7 @@ static void __init relocate_initrd(void)
 	printk(KERN_INFO "Move RAMDISK from [mem %#010llx-%#010llx] to"
 		" [mem %#010llx-%#010llx]\n",
 		ramdisk_image, ramdisk_image + ramdisk_size - 1,
-		ramdisk_here, ramdisk_here + ramdisk_size - 1);
+		relocated_ramdisk, relocated_ramdisk + ramdisk_size - 1);
 }
 
 static void __init early_reserve_initrd(void)
-- 
cgit v1.2.3


From e1b43e3f13f7157249fb962ccf88b84eb0421fb4 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Wed, 4 Dec 2013 12:31:31 +0100
Subject: x86, microcode: Share native MSR accessing variants

We want to use those in AMD's early loading path too. Also, add a
native_wrmsrl variant.

Signed-off-by: Borislav Petkov <bp@suse.de>
Tested-by: Aravind Gopalakrishnan <Aravind.Gopalakrishnan@amd.com>
---
 arch/x86/include/asm/microcode.h        | 15 +++++++++++++++
 arch/x86/kernel/microcode_intel_early.c | 10 ----------
 2 files changed, 15 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index f98bd6625318..b59827e76529 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -1,6 +1,21 @@
 #ifndef _ASM_X86_MICROCODE_H
 #define _ASM_X86_MICROCODE_H
 
+#define native_rdmsr(msr, val1, val2)			\
+do {							\
+	u64 __val = native_read_msr((msr));		\
+	(void)((val1) = (u32)__val);			\
+	(void)((val2) = (u32)(__val >> 32));		\
+} while (0)
+
+#define native_wrmsr(msr, low, high)			\
+	native_write_msr(msr, low, high)
+
+#define native_wrmsrl(msr, val)				\
+	native_write_msr((msr),				\
+			 (u32)((u64)(val)),		\
+			 (u32)((u64)(val) >> 32))
+
 struct cpu_signature {
 	unsigned int sig;
 	unsigned int pf;
diff --git a/arch/x86/kernel/microcode_intel_early.c b/arch/x86/kernel/microcode_intel_early.c
index 1575deb2e636..18f739129e72 100644
--- a/arch/x86/kernel/microcode_intel_early.c
+++ b/arch/x86/kernel/microcode_intel_early.c
@@ -365,16 +365,6 @@ out:
 	return state;
 }
 
-#define native_rdmsr(msr, val1, val2)		\
-do {						\
-	u64 __val = native_read_msr((msr));	\
-	(void)((val1) = (u32)__val);		\
-	(void)((val2) = (u32)(__val >> 32));	\
-} while (0)
-
-#define native_wrmsr(msr, low, high)		\
-	native_write_msr(msr, low, high);
-
 static int collect_cpu_info_early(struct ucode_cpu_info *uci)
 {
 	unsigned int val[2];
-- 
cgit v1.2.3


From 5335ba5cf475369f88db8e6835764efdcad8ab96 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Fri, 29 Nov 2013 14:58:44 +0100
Subject: x86, microcode, AMD: Fix early ucode loading

The original idea to use the microcode cache for the APs doesn't pan out
because we do memory allocation there very early and with IRQs disabled
and we don't want to involve GFP_ATOMIC allocations. Not if it can be
helped.

Thus, extend the caching of the BSP patch approach to the APs and
iterate over the ucode in the initrd instead of using the cache. We
still save the relevant patches to it but later, right before we
jettison the initrd.

While at it, fix early ucode loading on 32-bit too.

Signed-off-by: Borislav Petkov <bp@suse.de>
Tested-by: Aravind Gopalakrishnan <Aravind.Gopalakrishnan@amd.com>
---
 arch/x86/include/asm/microcode_amd.h  |   7 +-
 arch/x86/kernel/microcode_amd.c       |  13 +-
 arch/x86/kernel/microcode_amd_early.c | 239 ++++++++++++++++++++++------------
 3 files changed, 170 insertions(+), 89 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h
index 4c019179a57d..b7b10b82d3e5 100644
--- a/arch/x86/include/asm/microcode_amd.h
+++ b/arch/x86/include/asm/microcode_amd.h
@@ -61,11 +61,10 @@ extern int __apply_microcode_amd(struct microcode_amd *mc_amd);
 extern int apply_microcode_amd(int cpu);
 extern enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size);
 
+#define PATCH_MAX_SIZE PAGE_SIZE
+extern u8 amd_ucode_patch[PATCH_MAX_SIZE];
+
 #ifdef CONFIG_MICROCODE_AMD_EARLY
-#ifdef CONFIG_X86_32
-#define MPB_MAX_SIZE PAGE_SIZE
-extern u8 amd_bsp_mpb[MPB_MAX_SIZE];
-#endif
 extern void __init load_ucode_amd_bsp(void);
 extern void load_ucode_amd_ap(void);
 extern int __init save_microcode_in_initrd_amd(void);
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index c3d4cc972eca..4a6ff747aaad 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -182,10 +182,10 @@ int __apply_microcode_amd(struct microcode_amd *mc_amd)
 {
 	u32 rev, dummy;
 
-	wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code);
+	native_wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code);
 
 	/* verify patch application was successful */
-	rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
+	native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
 	if (rev != mc_amd->hdr.patch_id)
 		return -1;
 
@@ -332,6 +332,9 @@ static int verify_and_add_patch(u8 family, u8 *fw, unsigned int leftover)
 	patch->patch_id  = mc_hdr->patch_id;
 	patch->equiv_cpu = proc_id;
 
+	pr_debug("%s: Added patch_id: 0x%08x, proc_id: 0x%04x\n",
+		 __func__, patch->patch_id, proc_id);
+
 	/* ... and add to cache. */
 	update_cache(patch);
 
@@ -390,9 +393,9 @@ enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size)
 	if (cpu_data(smp_processor_id()).cpu_index == boot_cpu_data.cpu_index) {
 		struct ucode_patch *p = find_patch(smp_processor_id());
 		if (p) {
-			memset(amd_bsp_mpb, 0, MPB_MAX_SIZE);
-			memcpy(amd_bsp_mpb, p->data, min_t(u32, ksize(p->data),
-							   MPB_MAX_SIZE));
+			memset(amd_ucode_patch, 0, PATCH_MAX_SIZE);
+			memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data),
+							       PATCH_MAX_SIZE));
 		}
 	}
 #endif
diff --git a/arch/x86/kernel/microcode_amd_early.c b/arch/x86/kernel/microcode_amd_early.c
index 6073104ccaa3..8384c0fa206f 100644
--- a/arch/x86/kernel/microcode_amd_early.c
+++ b/arch/x86/kernel/microcode_amd_early.c
@@ -2,6 +2,7 @@
  * Copyright (C) 2013 Advanced Micro Devices, Inc.
  *
  * Author: Jacob Shin <jacob.shin@amd.com>
+ * Fixes: Borislav Petkov <bp@suse.de>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -15,10 +16,18 @@
 #include <asm/setup.h>
 #include <asm/microcode_amd.h>
 
-static bool ucode_loaded;
+/*
+ * This points to the current valid container of microcode patches which we will
+ * save from the initrd before jettisoning its contents.
+ */
+static u8 *container;
+static size_t container_size;
+
 static u32 ucode_new_rev;
-static unsigned long ucode_offset;
-static size_t ucode_size;
+u8 amd_ucode_patch[PATCH_MAX_SIZE];
+static u16 this_equiv_id;
+
+struct cpio_data ucode_cpio;
 
 /*
  * Microcode patch container file is prepended to the initrd in cpio format.
@@ -32,9 +41,6 @@ static struct cpio_data __init find_ucode_in_initrd(void)
 	char *path;
 	void *start;
 	size_t size;
-	unsigned long *uoffset;
-	size_t *usize;
-	struct cpio_data cd;
 
 #ifdef CONFIG_X86_32
 	struct boot_params *p;
@@ -47,30 +53,50 @@ static struct cpio_data __init find_ucode_in_initrd(void)
 	path    = (char *)__pa_nodebug(ucode_path);
 	start   = (void *)p->hdr.ramdisk_image;
 	size    = p->hdr.ramdisk_size;
-	uoffset = (unsigned long *)__pa_nodebug(&ucode_offset);
-	usize   = (size_t *)__pa_nodebug(&ucode_size);
 #else
 	path    = ucode_path;
 	start   = (void *)(boot_params.hdr.ramdisk_image + PAGE_OFFSET);
 	size    = boot_params.hdr.ramdisk_size;
-	uoffset = &ucode_offset;
-	usize   = &ucode_size;
 #endif
 
-	cd = find_cpio_data(path, start, size, &offset);
-	if (!cd.data)
-		return cd;
+	return find_cpio_data(path, start, size, &offset);
+}
 
-	if (*(u32 *)cd.data != UCODE_MAGIC) {
-		cd.data = NULL;
-		cd.size = 0;
-		return cd;
-	}
+static size_t compute_container_size(u8 *data, u32 total_size)
+{
+	size_t size = 0;
+	u32 *header = (u32 *)data;
 
-	*uoffset = (u8 *)cd.data - (u8 *)start;
-	*usize   = cd.size;
+	if (header[0] != UCODE_MAGIC ||
+	    header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */
+	    header[2] == 0)                            /* size */
+		return size;
 
-	return cd;
+	size = header[2] + CONTAINER_HDR_SZ;
+	total_size -= size;
+	data += size;
+
+	while (total_size) {
+		u16 patch_size;
+
+		header = (u32 *)data;
+
+		if (header[0] != UCODE_UCODE_TYPE)
+			break;
+
+		/*
+		 * Sanity-check patch size.
+		 */
+		patch_size = header[1];
+		if (patch_size > PATCH_MAX_SIZE)
+			break;
+
+		size	   += patch_size + SECTION_HDR_SIZE;
+		data	   += patch_size + SECTION_HDR_SIZE;
+		total_size -= patch_size + SECTION_HDR_SIZE;
+	}
+
+	return size;
 }
 
 /*
@@ -85,23 +111,22 @@ static struct cpio_data __init find_ucode_in_initrd(void)
 static void apply_ucode_in_initrd(void *ucode, size_t size)
 {
 	struct equiv_cpu_entry *eq;
+	size_t *cont_sz;
 	u32 *header;
-	u8  *data;
+	u8  *data, **cont;
 	u16 eq_id = 0;
 	int offset, left;
-	u32 rev, eax;
+	u32 rev, eax, ebx, ecx, edx;
 	u32 *new_rev;
-	unsigned long *uoffset;
-	size_t *usize;
 
 #ifdef CONFIG_X86_32
 	new_rev = (u32 *)__pa_nodebug(&ucode_new_rev);
-	uoffset = (unsigned long *)__pa_nodebug(&ucode_offset);
-	usize   = (size_t *)__pa_nodebug(&ucode_size);
+	cont_sz = (size_t *)__pa_nodebug(&container_size);
+	cont	= (u8 **)__pa_nodebug(&container);
 #else
 	new_rev = &ucode_new_rev;
-	uoffset = &ucode_offset;
-	usize   = &ucode_size;
+	cont_sz = &container_size;
+	cont	= &container;
 #endif
 
 	data   = ucode;
@@ -109,23 +134,37 @@ static void apply_ucode_in_initrd(void *ucode, size_t size)
 	header = (u32 *)data;
 
 	/* find equiv cpu table */
-
-	if (header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */
+	if (header[0] != UCODE_MAGIC ||
+	    header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */
 	    header[2] == 0)                            /* size */
 		return;
 
-	eax = cpuid_eax(0x00000001);
+	eax = 0x00000001;
+	ecx = 0;
+	native_cpuid(&eax, &ebx, &ecx, &edx);
 
 	while (left > 0) {
 		eq = (struct equiv_cpu_entry *)(data + CONTAINER_HDR_SZ);
 
+		*cont = data;
+
+		/* Advance past the container header */
 		offset = header[2] + CONTAINER_HDR_SZ;
 		data  += offset;
 		left  -= offset;
 
 		eq_id = find_equiv_id(eq, eax);
-		if (eq_id)
+		if (eq_id) {
+			this_equiv_id = eq_id;
+			*cont_sz = compute_container_size(*cont, left + offset);
+
+			/*
+			 * truncate how much we need to iterate over in the
+			 * ucode update loop below
+			 */
+			left = *cont_sz - offset;
 			break;
+		}
 
 		/*
 		 * support multiple container files appended together. if this
@@ -145,19 +184,18 @@ static void apply_ucode_in_initrd(void *ucode, size_t size)
 
 		/* mark where the next microcode container file starts */
 		offset    = data - (u8 *)ucode;
-		*uoffset += offset;
-		*usize   -= offset;
 		ucode     = data;
 	}
 
 	if (!eq_id) {
-		*usize = 0;
+		*cont = NULL;
+		*cont_sz = 0;
 		return;
 	}
 
 	/* find ucode and update if needed */
 
-	rdmsr(MSR_AMD64_PATCH_LEVEL, rev, eax);
+	native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, eax);
 
 	while (left > 0) {
 		struct microcode_amd *mc;
@@ -168,73 +206,83 @@ static void apply_ucode_in_initrd(void *ucode, size_t size)
 			break;
 
 		mc = (struct microcode_amd *)(data + SECTION_HDR_SIZE);
-		if (eq_id == mc->hdr.processor_rev_id && rev < mc->hdr.patch_id)
-			if (__apply_microcode_amd(mc) == 0) {
+
+		if (eq_id == mc->hdr.processor_rev_id && rev < mc->hdr.patch_id) {
+
+			if (!__apply_microcode_amd(mc)) {
 				rev = mc->hdr.patch_id;
 				*new_rev = rev;
+
+				/* save ucode patch */
+				memcpy(amd_ucode_patch, mc,
+				       min_t(u32, header[1], PATCH_MAX_SIZE));
 			}
+		}
 
 		offset  = header[1] + SECTION_HDR_SIZE;
 		data   += offset;
 		left   -= offset;
 	}
-
-	/* mark where this microcode container file ends */
-	offset  = *usize - (data - (u8 *)ucode);
-	*usize -= offset;
-
-	if (!(*new_rev))
-		*usize = 0;
 }
 
 void __init load_ucode_amd_bsp(void)
 {
-	struct cpio_data cd = find_ucode_in_initrd();
-	if (!cd.data)
+	struct cpio_data cp;
+	void **data;
+	size_t *size;
+
+#ifdef CONFIG_X86_32
+	data =  (void **)__pa_nodebug(&ucode_cpio.data);
+	size = (size_t *)__pa_nodebug(&ucode_cpio.size);
+#else
+	data = &ucode_cpio.data;
+	size = &ucode_cpio.size;
+#endif
+
+	cp = find_ucode_in_initrd();
+	if (!cp.data)
 		return;
 
-	apply_ucode_in_initrd(cd.data, cd.size);
+	*data = cp.data;
+	*size = cp.size;
+
+	apply_ucode_in_initrd(cp.data, cp.size);
 }
 
 #ifdef CONFIG_X86_32
-u8 amd_bsp_mpb[MPB_MAX_SIZE];
-
 /*
  * On 32-bit, since AP's early load occurs before paging is turned on, we
  * cannot traverse cpu_equiv_table and pcache in kernel heap memory. So during
  * cold boot, AP will apply_ucode_in_initrd() just like the BSP. During
- * save_microcode_in_initrd_amd() BSP's patch is copied to amd_bsp_mpb, which
- * is used upon resume from suspend.
+ * save_microcode_in_initrd_amd() BSP's patch is copied to amd_ucode_patch,
+ * which is used upon resume from suspend.
  */
 void load_ucode_amd_ap(void)
 {
 	struct microcode_amd *mc;
-	unsigned long *initrd;
-	unsigned long *uoffset;
 	size_t *usize;
-	void *ucode;
+	void **ucode;
 
-	mc = (struct microcode_amd *)__pa(amd_bsp_mpb);
+	mc = (struct microcode_amd *)__pa(amd_ucode_patch);
 	if (mc->hdr.patch_id && mc->hdr.processor_rev_id) {
 		__apply_microcode_amd(mc);
 		return;
 	}
 
-	initrd  = (unsigned long *)__pa(&initrd_start);
-	uoffset = (unsigned long *)__pa(&ucode_offset);
-	usize   = (size_t *)__pa(&ucode_size);
+	ucode = (void *)__pa_nodebug(&container);
+	usize = (size_t *)__pa_nodebug(&container_size);
 
-	if (!*usize || !*initrd)
+	if (!*ucode || !*usize)
 		return;
 
-	ucode = (void *)((unsigned long)__pa(*initrd) + *uoffset);
-	apply_ucode_in_initrd(ucode, *usize);
+	apply_ucode_in_initrd(*ucode, *usize);
 }
 
 static void __init collect_cpu_sig_on_bsp(void *arg)
 {
 	unsigned int cpu = smp_processor_id();
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+
 	uci->cpu_sig.sig = cpuid_eax(0x00000001);
 }
 #else
@@ -242,36 +290,54 @@ void load_ucode_amd_ap(void)
 {
 	unsigned int cpu = smp_processor_id();
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+	struct equiv_cpu_entry *eq;
+	struct microcode_amd *mc;
 	u32 rev, eax;
+	u16 eq_id;
+
+	/* Exit if called on the BSP. */
+	if (!cpu)
+		return;
+
+	if (!container)
+		return;
 
 	rdmsr(MSR_AMD64_PATCH_LEVEL, rev, eax);
-	eax = cpuid_eax(0x00000001);
 
 	uci->cpu_sig.rev = rev;
 	uci->cpu_sig.sig = eax;
 
-	if (cpu && !ucode_loaded) {
-		void *ucode;
+	eax = cpuid_eax(0x00000001);
+	eq  = (struct equiv_cpu_entry *)(container + CONTAINER_HDR_SZ);
 
-		if (!ucode_size || !initrd_start)
-			return;
+	eq_id = find_equiv_id(eq, eax);
+	if (!eq_id)
+		return;
+
+	if (eq_id == this_equiv_id) {
+		mc = (struct microcode_amd *)amd_ucode_patch;
 
-		ucode = (void *)(initrd_start + ucode_offset);
-		eax   = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
-		if (load_microcode_amd(eax, ucode, ucode_size) != UCODE_OK)
+		if (mc && rev < mc->hdr.patch_id) {
+			if (!__apply_microcode_amd(mc))
+				ucode_new_rev = mc->hdr.patch_id;
+		}
+
+	} else {
+		if (!ucode_cpio.data)
 			return;
 
-		ucode_loaded = true;
+		/*
+		 * AP has a different equivalence ID than BSP, looks like
+		 * mixed-steppings silicon so go through the ucode blob anew.
+		 */
+		apply_ucode_in_initrd(ucode_cpio.data, ucode_cpio.size);
 	}
-
-	apply_microcode_amd(cpu);
 }
 #endif
 
 int __init save_microcode_in_initrd_amd(void)
 {
 	enum ucode_state ret;
-	void *ucode;
 	u32 eax;
 
 #ifdef CONFIG_X86_32
@@ -280,22 +346,35 @@ int __init save_microcode_in_initrd_amd(void)
 
 	if (!uci->cpu_sig.sig)
 		smp_call_function_single(bsp, collect_cpu_sig_on_bsp, NULL, 1);
+
+	/*
+	 * Take into account the fact that the ramdisk might get relocated
+	 * and therefore we need to recompute the container's position in
+	 * virtual memory space.
+	 */
+	container = (u8 *)(__va((u32)relocated_ramdisk) +
+			   ((u32)container - boot_params.hdr.ramdisk_image));
 #endif
 	if (ucode_new_rev)
 		pr_info("microcode: updated early to new patch_level=0x%08x\n",
 			ucode_new_rev);
 
-	if (ucode_loaded || !ucode_size || !initrd_start)
-		return 0;
+	if (!container)
+		return -EINVAL;
 
-	ucode = (void *)(initrd_start + ucode_offset);
 	eax   = cpuid_eax(0x00000001);
 	eax   = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
 
-	ret = load_microcode_amd(eax, ucode, ucode_size);
+	ret = load_microcode_amd(eax, container, container_size);
 	if (ret != UCODE_OK)
 		return -EINVAL;
 
-	ucode_loaded = true;
+	/*
+	 * This will be freed any msec now, stash patches for the current
+	 * family and switch to patch cache for cpu hotplug, etc later.
+	 */
+	container = NULL;
+	container_size = 0;
+
 	return 0;
 }
-- 
cgit v1.2.3


From bad5fa631fca5466401cd4a48e30cc1f1cb6101e Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Sun, 1 Dec 2013 18:09:58 +0100
Subject: x86, microcode: Move to a proper location

We've grown a bunch of microcode loader files all prefixed with
"microcode_". They should be under cpu/ because this is strictly
CPU-related functionality so do that and drop the prefix since they're
in their own directory now which gives that prefix. :)

While at it, drop MICROCODE_INTEL_LIB config item and stash the
functionality under CONFIG_MICROCODE_INTEL as it was its only user.

Signed-off-by: Borislav Petkov <bp@suse.de>
Tested-by: Aravind Gopalakrishnan <Aravind.Gopalakrishnan@amd.com>
---
 arch/x86/Kconfig                            |   4 -
 arch/x86/kernel/Makefile                    |   9 -
 arch/x86/kernel/cpu/Makefile                |   1 +
 arch/x86/kernel/cpu/microcode/Makefile      |   7 +
 arch/x86/kernel/cpu/microcode/amd.c         | 492 +++++++++++++++++
 arch/x86/kernel/cpu/microcode/amd_early.c   | 380 ++++++++++++++
 arch/x86/kernel/cpu/microcode/core.c        | 645 +++++++++++++++++++++++
 arch/x86/kernel/cpu/microcode/core_early.c  | 141 +++++
 arch/x86/kernel/cpu/microcode/intel.c       | 333 ++++++++++++
 arch/x86/kernel/cpu/microcode/intel_early.c | 787 ++++++++++++++++++++++++++++
 arch/x86/kernel/cpu/microcode/intel_lib.c   | 174 ++++++
 arch/x86/kernel/microcode_amd.c             | 492 -----------------
 arch/x86/kernel/microcode_amd_early.c       | 380 --------------
 arch/x86/kernel/microcode_core.c            | 645 -----------------------
 arch/x86/kernel/microcode_core_early.c      | 141 -----
 arch/x86/kernel/microcode_intel.c           | 333 ------------
 arch/x86/kernel/microcode_intel_early.c     | 787 ----------------------------
 arch/x86/kernel/microcode_intel_lib.c       | 174 ------
 18 files changed, 2960 insertions(+), 2965 deletions(-)
 create mode 100644 arch/x86/kernel/cpu/microcode/Makefile
 create mode 100644 arch/x86/kernel/cpu/microcode/amd.c
 create mode 100644 arch/x86/kernel/cpu/microcode/amd_early.c
 create mode 100644 arch/x86/kernel/cpu/microcode/core.c
 create mode 100644 arch/x86/kernel/cpu/microcode/core_early.c
 create mode 100644 arch/x86/kernel/cpu/microcode/intel.c
 create mode 100644 arch/x86/kernel/cpu/microcode/intel_early.c
 create mode 100644 arch/x86/kernel/cpu/microcode/intel_lib.c
 delete mode 100644 arch/x86/kernel/microcode_amd.c
 delete mode 100644 arch/x86/kernel/microcode_amd_early.c
 delete mode 100644 arch/x86/kernel/microcode_core.c
 delete mode 100644 arch/x86/kernel/microcode_core_early.c
 delete mode 100644 arch/x86/kernel/microcode_intel.c
 delete mode 100644 arch/x86/kernel/microcode_intel_early.c
 delete mode 100644 arch/x86/kernel/microcode_intel_lib.c

(limited to 'arch')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0952ecd60eca..01bfb9e6ba55 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1080,10 +1080,6 @@ config MICROCODE_OLD_INTERFACE
 	def_bool y
 	depends on MICROCODE
 
-config MICROCODE_INTEL_LIB
-	def_bool y
-	depends on MICROCODE_INTEL
-
 config MICROCODE_INTEL_EARLY
 	def_bool n
 
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 9b0a34e2cd79..4d0094dbb6c9 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -91,15 +91,6 @@ obj-$(CONFIG_PARAVIRT_CLOCK)	+= pvclock.o
 
 obj-$(CONFIG_PCSPKR_PLATFORM)	+= pcspeaker.o
 
-obj-$(CONFIG_MICROCODE_EARLY)		+= microcode_core_early.o
-obj-$(CONFIG_MICROCODE_INTEL_EARLY)	+= microcode_intel_early.o
-obj-$(CONFIG_MICROCODE_INTEL_LIB)	+= microcode_intel_lib.o
-microcode-y				:= microcode_core.o
-microcode-$(CONFIG_MICROCODE_INTEL)	+= microcode_intel.o
-microcode-$(CONFIG_MICROCODE_AMD)	+= microcode_amd.o
-obj-$(CONFIG_MICROCODE_AMD_EARLY)	+= microcode_amd_early.o
-obj-$(CONFIG_MICROCODE)			+= microcode.o
-
 obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o
 
 obj-$(CONFIG_SWIOTLB)			+= pci-swiotlb.o
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 47b56a7e99cb..0710eeffcf51 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -42,6 +42,7 @@ endif
 
 obj-$(CONFIG_X86_MCE)			+= mcheck/
 obj-$(CONFIG_MTRR)			+= mtrr/
+obj-$(CONFIG_MICROCODE)			+= microcode/
 
 obj-$(CONFIG_X86_LOCAL_APIC)		+= perfctr-watchdog.o perf_event_amd_ibs.o
 
diff --git a/arch/x86/kernel/cpu/microcode/Makefile b/arch/x86/kernel/cpu/microcode/Makefile
new file mode 100644
index 000000000000..285c85427c32
--- /dev/null
+++ b/arch/x86/kernel/cpu/microcode/Makefile
@@ -0,0 +1,7 @@
+microcode-y				:= core.o
+obj-$(CONFIG_MICROCODE)			+= microcode.o
+microcode-$(CONFIG_MICROCODE_INTEL)	+= intel.o intel_lib.o
+microcode-$(CONFIG_MICROCODE_AMD)	+= amd.o
+obj-$(CONFIG_MICROCODE_EARLY)		+= core_early.o
+obj-$(CONFIG_MICROCODE_INTEL_EARLY)	+= intel_early.o
+obj-$(CONFIG_MICROCODE_AMD_EARLY)	+= amd_early.o
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
new file mode 100644
index 000000000000..4a6ff747aaad
--- /dev/null
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -0,0 +1,492 @@
+/*
+ *  AMD CPU Microcode Update Driver for Linux
+ *  Copyright (C) 2008-2011 Advanced Micro Devices Inc.
+ *
+ *  Author: Peter Oruba <peter.oruba@amd.com>
+ *
+ *  Based on work by:
+ *  Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
+ *
+ *  Maintainers:
+ *  Andreas Herrmann <herrmann.der.user@googlemail.com>
+ *  Borislav Petkov <bp@alien8.de>
+ *
+ *  This driver allows to upgrade microcode on F10h AMD
+ *  CPUs and later.
+ *
+ *  Licensed under the terms of the GNU General Public
+ *  License version 2. See file COPYING for details.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/firmware.h>
+#include <linux/pci_ids.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include <asm/microcode.h>
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include <asm/microcode_amd.h>
+
+MODULE_DESCRIPTION("AMD Microcode Update Driver");
+MODULE_AUTHOR("Peter Oruba");
+MODULE_LICENSE("GPL v2");
+
+static struct equiv_cpu_entry *equiv_cpu_table;
+
+struct ucode_patch {
+	struct list_head plist;
+	void *data;
+	u32 patch_id;
+	u16 equiv_cpu;
+};
+
+static LIST_HEAD(pcache);
+
+static u16 __find_equiv_id(unsigned int cpu)
+{
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+	return find_equiv_id(equiv_cpu_table, uci->cpu_sig.sig);
+}
+
+static u32 find_cpu_family_by_equiv_cpu(u16 equiv_cpu)
+{
+	int i = 0;
+
+	BUG_ON(!equiv_cpu_table);
+
+	while (equiv_cpu_table[i].equiv_cpu != 0) {
+		if (equiv_cpu == equiv_cpu_table[i].equiv_cpu)
+			return equiv_cpu_table[i].installed_cpu;
+		i++;
+	}
+	return 0;
+}
+
+/*
+ * a small, trivial cache of per-family ucode patches
+ */
+static struct ucode_patch *cache_find_patch(u16 equiv_cpu)
+{
+	struct ucode_patch *p;
+
+	list_for_each_entry(p, &pcache, plist)
+		if (p->equiv_cpu == equiv_cpu)
+			return p;
+	return NULL;
+}
+
+static void update_cache(struct ucode_patch *new_patch)
+{
+	struct ucode_patch *p;
+
+	list_for_each_entry(p, &pcache, plist) {
+		if (p->equiv_cpu == new_patch->equiv_cpu) {
+			if (p->patch_id >= new_patch->patch_id)
+				/* we already have the latest patch */
+				return;
+
+			list_replace(&p->plist, &new_patch->plist);
+			kfree(p->data);
+			kfree(p);
+			return;
+		}
+	}
+	/* no patch found, add it */
+	list_add_tail(&new_patch->plist, &pcache);
+}
+
+static void free_cache(void)
+{
+	struct ucode_patch *p, *tmp;
+
+	list_for_each_entry_safe(p, tmp, &pcache, plist) {
+		__list_del(p->plist.prev, p->plist.next);
+		kfree(p->data);
+		kfree(p);
+	}
+}
+
+static struct ucode_patch *find_patch(unsigned int cpu)
+{
+	u16 equiv_id;
+
+	equiv_id = __find_equiv_id(cpu);
+	if (!equiv_id)
+		return NULL;
+
+	return cache_find_patch(equiv_id);
+}
+
+static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
+{
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+	struct ucode_patch *p;
+
+	csig->sig = cpuid_eax(0x00000001);
+	csig->rev = c->microcode;
+
+	/*
+	 * a patch could have been loaded early, set uci->mc so that
+	 * mc_bp_resume() can call apply_microcode()
+	 */
+	p = find_patch(cpu);
+	if (p && (p->patch_id == csig->rev))
+		uci->mc = p->data;
+
+	pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev);
+
+	return 0;
+}
+
+static unsigned int verify_patch_size(u8 family, u32 patch_size,
+				      unsigned int size)
+{
+	u32 max_size;
+
+#define F1XH_MPB_MAX_SIZE 2048
+#define F14H_MPB_MAX_SIZE 1824
+#define F15H_MPB_MAX_SIZE 4096
+#define F16H_MPB_MAX_SIZE 3458
+
+	switch (family) {
+	case 0x14:
+		max_size = F14H_MPB_MAX_SIZE;
+		break;
+	case 0x15:
+		max_size = F15H_MPB_MAX_SIZE;
+		break;
+	case 0x16:
+		max_size = F16H_MPB_MAX_SIZE;
+		break;
+	default:
+		max_size = F1XH_MPB_MAX_SIZE;
+		break;
+	}
+
+	if (patch_size > min_t(u32, size, max_size)) {
+		pr_err("patch size mismatch\n");
+		return 0;
+	}
+
+	return patch_size;
+}
+
+int __apply_microcode_amd(struct microcode_amd *mc_amd)
+{
+	u32 rev, dummy;
+
+	native_wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code);
+
+	/* verify patch application was successful */
+	native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
+	if (rev != mc_amd->hdr.patch_id)
+		return -1;
+
+	return 0;
+}
+
+int apply_microcode_amd(int cpu)
+{
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
+	struct microcode_amd *mc_amd;
+	struct ucode_cpu_info *uci;
+	struct ucode_patch *p;
+	u32 rev, dummy;
+
+	BUG_ON(raw_smp_processor_id() != cpu);
+
+	uci = ucode_cpu_info + cpu;
+
+	p = find_patch(cpu);
+	if (!p)
+		return 0;
+
+	mc_amd  = p->data;
+	uci->mc = p->data;
+
+	rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
+
+	/* need to apply patch? */
+	if (rev >= mc_amd->hdr.patch_id) {
+		c->microcode = rev;
+		uci->cpu_sig.rev = rev;
+		return 0;
+	}
+
+	if (__apply_microcode_amd(mc_amd)) {
+		pr_err("CPU%d: update failed for patch_level=0x%08x\n",
+			cpu, mc_amd->hdr.patch_id);
+		return -1;
+	}
+	pr_info("CPU%d: new patch_level=0x%08x\n", cpu,
+		mc_amd->hdr.patch_id);
+
+	uci->cpu_sig.rev = mc_amd->hdr.patch_id;
+	c->microcode = mc_amd->hdr.patch_id;
+
+	return 0;
+}
+
+static int install_equiv_cpu_table(const u8 *buf)
+{
+	unsigned int *ibuf = (unsigned int *)buf;
+	unsigned int type = ibuf[1];
+	unsigned int size = ibuf[2];
+
+	if (type != UCODE_EQUIV_CPU_TABLE_TYPE || !size) {
+		pr_err("empty section/"
+		       "invalid type field in container file section header\n");
+		return -EINVAL;
+	}
+
+	equiv_cpu_table = vmalloc(size);
+	if (!equiv_cpu_table) {
+		pr_err("failed to allocate equivalent CPU table\n");
+		return -ENOMEM;
+	}
+
+	memcpy(equiv_cpu_table, buf + CONTAINER_HDR_SZ, size);
+
+	/* add header length */
+	return size + CONTAINER_HDR_SZ;
+}
+
+static void free_equiv_cpu_table(void)
+{
+	vfree(equiv_cpu_table);
+	equiv_cpu_table = NULL;
+}
+
+static void cleanup(void)
+{
+	free_equiv_cpu_table();
+	free_cache();
+}
+
+/*
+ * We return the current size even if some of the checks failed so that
+ * we can skip over the next patch. If we return a negative value, we
+ * signal a grave error like a memory allocation has failed and the
+ * driver cannot continue functioning normally. In such cases, we tear
+ * down everything we've used up so far and exit.
+ */
+static int verify_and_add_patch(u8 family, u8 *fw, unsigned int leftover)
+{
+	struct microcode_header_amd *mc_hdr;
+	struct ucode_patch *patch;
+	unsigned int patch_size, crnt_size, ret;
+	u32 proc_fam;
+	u16 proc_id;
+
+	patch_size  = *(u32 *)(fw + 4);
+	crnt_size   = patch_size + SECTION_HDR_SIZE;
+	mc_hdr	    = (struct microcode_header_amd *)(fw + SECTION_HDR_SIZE);
+	proc_id	    = mc_hdr->processor_rev_id;
+
+	proc_fam = find_cpu_family_by_equiv_cpu(proc_id);
+	if (!proc_fam) {
+		pr_err("No patch family for equiv ID: 0x%04x\n", proc_id);
+		return crnt_size;
+	}
+
+	/* check if patch is for the current family */
+	proc_fam = ((proc_fam >> 8) & 0xf) + ((proc_fam >> 20) & 0xff);
+	if (proc_fam != family)
+		return crnt_size;
+
+	if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) {
+		pr_err("Patch-ID 0x%08x: chipset-specific code unsupported.\n",
+			mc_hdr->patch_id);
+		return crnt_size;
+	}
+
+	ret = verify_patch_size(family, patch_size, leftover);
+	if (!ret) {
+		pr_err("Patch-ID 0x%08x: size mismatch.\n", mc_hdr->patch_id);
+		return crnt_size;
+	}
+
+	patch = kzalloc(sizeof(*patch), GFP_KERNEL);
+	if (!patch) {
+		pr_err("Patch allocation failure.\n");
+		return -EINVAL;
+	}
+
+	patch->data = kzalloc(patch_size, GFP_KERNEL);
+	if (!patch->data) {
+		pr_err("Patch data allocation failure.\n");
+		kfree(patch);
+		return -EINVAL;
+	}
+
+	/* All looks ok, copy patch... */
+	memcpy(patch->data, fw + SECTION_HDR_SIZE, patch_size);
+	INIT_LIST_HEAD(&patch->plist);
+	patch->patch_id  = mc_hdr->patch_id;
+	patch->equiv_cpu = proc_id;
+
+	pr_debug("%s: Added patch_id: 0x%08x, proc_id: 0x%04x\n",
+		 __func__, patch->patch_id, proc_id);
+
+	/* ... and add to cache. */
+	update_cache(patch);
+
+	return crnt_size;
+}
+
+static enum ucode_state __load_microcode_amd(u8 family, const u8 *data,
+					     size_t size)
+{
+	enum ucode_state ret = UCODE_ERROR;
+	unsigned int leftover;
+	u8 *fw = (u8 *)data;
+	int crnt_size = 0;
+	int offset;
+
+	offset = install_equiv_cpu_table(data);
+	if (offset < 0) {
+		pr_err("failed to create equivalent cpu table\n");
+		return ret;
+	}
+	fw += offset;
+	leftover = size - offset;
+
+	if (*(u32 *)fw != UCODE_UCODE_TYPE) {
+		pr_err("invalid type field in container file section header\n");
+		free_equiv_cpu_table();
+		return ret;
+	}
+
+	while (leftover) {
+		crnt_size = verify_and_add_patch(family, fw, leftover);
+		if (crnt_size < 0)
+			return ret;
+
+		fw	 += crnt_size;
+		leftover -= crnt_size;
+	}
+
+	return UCODE_OK;
+}
+
+enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size)
+{
+	enum ucode_state ret;
+
+	/* free old equiv table */
+	free_equiv_cpu_table();
+
+	ret = __load_microcode_amd(family, data, size);
+
+	if (ret != UCODE_OK)
+		cleanup();
+
+#if defined(CONFIG_MICROCODE_AMD_EARLY) && defined(CONFIG_X86_32)
+	/* save BSP's matching patch for early load */
+	if (cpu_data(smp_processor_id()).cpu_index == boot_cpu_data.cpu_index) {
+		struct ucode_patch *p = find_patch(smp_processor_id());
+		if (p) {
+			memset(amd_ucode_patch, 0, PATCH_MAX_SIZE);
+			memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data),
+							       PATCH_MAX_SIZE));
+		}
+	}
+#endif
+	return ret;
+}
+
+/*
+ * AMD microcode firmware naming convention, up to family 15h they are in
+ * the legacy file:
+ *
+ *    amd-ucode/microcode_amd.bin
+ *
+ * This legacy file is always smaller than 2K in size.
+ *
+ * Beginning with family 15h, they are in family-specific firmware files:
+ *
+ *    amd-ucode/microcode_amd_fam15h.bin
+ *    amd-ucode/microcode_amd_fam16h.bin
+ *    ...
+ *
+ * These might be larger than 2K.
+ */
+static enum ucode_state request_microcode_amd(int cpu, struct device *device,
+					      bool refresh_fw)
+{
+	char fw_name[36] = "amd-ucode/microcode_amd.bin";
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
+	enum ucode_state ret = UCODE_NFOUND;
+	const struct firmware *fw;
+
+	/* reload ucode container only on the boot cpu */
+	if (!refresh_fw || c->cpu_index != boot_cpu_data.cpu_index)
+		return UCODE_OK;
+
+	if (c->x86 >= 0x15)
+		snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86);
+
+	if (request_firmware(&fw, (const char *)fw_name, device)) {
+		pr_debug("failed to load file %s\n", fw_name);
+		goto out;
+	}
+
+	ret = UCODE_ERROR;
+	if (*(u32 *)fw->data != UCODE_MAGIC) {
+		pr_err("invalid magic value (0x%08x)\n", *(u32 *)fw->data);
+		goto fw_release;
+	}
+
+	ret = load_microcode_amd(c->x86, fw->data, fw->size);
+
+ fw_release:
+	release_firmware(fw);
+
+ out:
+	return ret;
+}
+
+static enum ucode_state
+request_microcode_user(int cpu, const void __user *buf, size_t size)
+{
+	return UCODE_ERROR;
+}
+
+static void microcode_fini_cpu_amd(int cpu)
+{
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+
+	uci->mc = NULL;
+}
+
+static struct microcode_ops microcode_amd_ops = {
+	.request_microcode_user           = request_microcode_user,
+	.request_microcode_fw             = request_microcode_amd,
+	.collect_cpu_info                 = collect_cpu_info_amd,
+	.apply_microcode                  = apply_microcode_amd,
+	.microcode_fini_cpu               = microcode_fini_cpu_amd,
+};
+
+struct microcode_ops * __init init_amd_microcode(void)
+{
+	struct cpuinfo_x86 *c = &cpu_data(0);
+
+	if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) {
+		pr_warning("AMD CPU family 0x%x not supported\n", c->x86);
+		return NULL;
+	}
+
+	return &microcode_amd_ops;
+}
+
+void __exit exit_amd_microcode(void)
+{
+	cleanup();
+}
diff --git a/arch/x86/kernel/cpu/microcode/amd_early.c b/arch/x86/kernel/cpu/microcode/amd_early.c
new file mode 100644
index 000000000000..8384c0fa206f
--- /dev/null
+++ b/arch/x86/kernel/cpu/microcode/amd_early.c
@@ -0,0 +1,380 @@
+/*
+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ *
+ * Author: Jacob Shin <jacob.shin@amd.com>
+ * Fixes: Borislav Petkov <bp@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/earlycpio.h>
+#include <linux/initrd.h>
+
+#include <asm/cpu.h>
+#include <asm/setup.h>
+#include <asm/microcode_amd.h>
+
+/*
+ * This points to the current valid container of microcode patches which we will
+ * save from the initrd before jettisoning its contents.
+ */
+static u8 *container;
+static size_t container_size;
+
+static u32 ucode_new_rev;
+u8 amd_ucode_patch[PATCH_MAX_SIZE];
+static u16 this_equiv_id;
+
+struct cpio_data ucode_cpio;
+
+/*
+ * Microcode patch container file is prepended to the initrd in cpio format.
+ * See Documentation/x86/early-microcode.txt
+ */
+static __initdata char ucode_path[] = "kernel/x86/microcode/AuthenticAMD.bin";
+
+static struct cpio_data __init find_ucode_in_initrd(void)
+{
+	long offset = 0;
+	char *path;
+	void *start;
+	size_t size;
+
+#ifdef CONFIG_X86_32
+	struct boot_params *p;
+
+	/*
+	 * On 32-bit, early load occurs before paging is turned on so we need
+	 * to use physical addresses.
+	 */
+	p       = (struct boot_params *)__pa_nodebug(&boot_params);
+	path    = (char *)__pa_nodebug(ucode_path);
+	start   = (void *)p->hdr.ramdisk_image;
+	size    = p->hdr.ramdisk_size;
+#else
+	path    = ucode_path;
+	start   = (void *)(boot_params.hdr.ramdisk_image + PAGE_OFFSET);
+	size    = boot_params.hdr.ramdisk_size;
+#endif
+
+	return find_cpio_data(path, start, size, &offset);
+}
+
+static size_t compute_container_size(u8 *data, u32 total_size)
+{
+	size_t size = 0;
+	u32 *header = (u32 *)data;
+
+	if (header[0] != UCODE_MAGIC ||
+	    header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */
+	    header[2] == 0)                            /* size */
+		return size;
+
+	size = header[2] + CONTAINER_HDR_SZ;
+	total_size -= size;
+	data += size;
+
+	while (total_size) {
+		u16 patch_size;
+
+		header = (u32 *)data;
+
+		if (header[0] != UCODE_UCODE_TYPE)
+			break;
+
+		/*
+		 * Sanity-check patch size.
+		 */
+		patch_size = header[1];
+		if (patch_size > PATCH_MAX_SIZE)
+			break;
+
+		size	   += patch_size + SECTION_HDR_SIZE;
+		data	   += patch_size + SECTION_HDR_SIZE;
+		total_size -= patch_size + SECTION_HDR_SIZE;
+	}
+
+	return size;
+}
+
+/*
+ * Early load occurs before we can vmalloc(). So we look for the microcode
+ * patch container file in initrd, traverse equivalent cpu table, look for a
+ * matching microcode patch, and update, all in initrd memory in place.
+ * When vmalloc() is available for use later -- on 64-bit during first AP load,
+ * and on 32-bit during save_microcode_in_initrd_amd() -- we can call
+ * load_microcode_amd() to save equivalent cpu table and microcode patches in
+ * kernel heap memory.
+ */
+static void apply_ucode_in_initrd(void *ucode, size_t size)
+{
+	struct equiv_cpu_entry *eq;
+	size_t *cont_sz;
+	u32 *header;
+	u8  *data, **cont;
+	u16 eq_id = 0;
+	int offset, left;
+	u32 rev, eax, ebx, ecx, edx;
+	u32 *new_rev;
+
+#ifdef CONFIG_X86_32
+	new_rev = (u32 *)__pa_nodebug(&ucode_new_rev);
+	cont_sz = (size_t *)__pa_nodebug(&container_size);
+	cont	= (u8 **)__pa_nodebug(&container);
+#else
+	new_rev = &ucode_new_rev;
+	cont_sz = &container_size;
+	cont	= &container;
+#endif
+
+	data   = ucode;
+	left   = size;
+	header = (u32 *)data;
+
+	/* find equiv cpu table */
+	if (header[0] != UCODE_MAGIC ||
+	    header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */
+	    header[2] == 0)                            /* size */
+		return;
+
+	eax = 0x00000001;
+	ecx = 0;
+	native_cpuid(&eax, &ebx, &ecx, &edx);
+
+	while (left > 0) {
+		eq = (struct equiv_cpu_entry *)(data + CONTAINER_HDR_SZ);
+
+		*cont = data;
+
+		/* Advance past the container header */
+		offset = header[2] + CONTAINER_HDR_SZ;
+		data  += offset;
+		left  -= offset;
+
+		eq_id = find_equiv_id(eq, eax);
+		if (eq_id) {
+			this_equiv_id = eq_id;
+			*cont_sz = compute_container_size(*cont, left + offset);
+
+			/*
+			 * truncate how much we need to iterate over in the
+			 * ucode update loop below
+			 */
+			left = *cont_sz - offset;
+			break;
+		}
+
+		/*
+		 * support multiple container files appended together. if this
+		 * one does not have a matching equivalent cpu entry, we fast
+		 * forward to the next container file.
+		 */
+		while (left > 0) {
+			header = (u32 *)data;
+			if (header[0] == UCODE_MAGIC &&
+			    header[1] == UCODE_EQUIV_CPU_TABLE_TYPE)
+				break;
+
+			offset = header[1] + SECTION_HDR_SIZE;
+			data  += offset;
+			left  -= offset;
+		}
+
+		/* mark where the next microcode container file starts */
+		offset    = data - (u8 *)ucode;
+		ucode     = data;
+	}
+
+	if (!eq_id) {
+		*cont = NULL;
+		*cont_sz = 0;
+		return;
+	}
+
+	/* find ucode and update if needed */
+
+	native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, eax);
+
+	while (left > 0) {
+		struct microcode_amd *mc;
+
+		header = (u32 *)data;
+		if (header[0] != UCODE_UCODE_TYPE || /* type */
+		    header[1] == 0)                  /* size */
+			break;
+
+		mc = (struct microcode_amd *)(data + SECTION_HDR_SIZE);
+
+		if (eq_id == mc->hdr.processor_rev_id && rev < mc->hdr.patch_id) {
+
+			if (!__apply_microcode_amd(mc)) {
+				rev = mc->hdr.patch_id;
+				*new_rev = rev;
+
+				/* save ucode patch */
+				memcpy(amd_ucode_patch, mc,
+				       min_t(u32, header[1], PATCH_MAX_SIZE));
+			}
+		}
+
+		offset  = header[1] + SECTION_HDR_SIZE;
+		data   += offset;
+		left   -= offset;
+	}
+}
+
+void __init load_ucode_amd_bsp(void)
+{
+	struct cpio_data cp;
+	void **data;
+	size_t *size;
+
+#ifdef CONFIG_X86_32
+	data =  (void **)__pa_nodebug(&ucode_cpio.data);
+	size = (size_t *)__pa_nodebug(&ucode_cpio.size);
+#else
+	data = &ucode_cpio.data;
+	size = &ucode_cpio.size;
+#endif
+
+	cp = find_ucode_in_initrd();
+	if (!cp.data)
+		return;
+
+	*data = cp.data;
+	*size = cp.size;
+
+	apply_ucode_in_initrd(cp.data, cp.size);
+}
+
+#ifdef CONFIG_X86_32
+/*
+ * On 32-bit, since AP's early load occurs before paging is turned on, we
+ * cannot traverse cpu_equiv_table and pcache in kernel heap memory. So during
+ * cold boot, AP will apply_ucode_in_initrd() just like the BSP. During
+ * save_microcode_in_initrd_amd() BSP's patch is copied to amd_ucode_patch,
+ * which is used upon resume from suspend.
+ */
+void load_ucode_amd_ap(void)
+{
+	struct microcode_amd *mc;
+	size_t *usize;
+	void **ucode;
+
+	mc = (struct microcode_amd *)__pa(amd_ucode_patch);
+	if (mc->hdr.patch_id && mc->hdr.processor_rev_id) {
+		__apply_microcode_amd(mc);
+		return;
+	}
+
+	ucode = (void *)__pa_nodebug(&container);
+	usize = (size_t *)__pa_nodebug(&container_size);
+
+	if (!*ucode || !*usize)
+		return;
+
+	apply_ucode_in_initrd(*ucode, *usize);
+}
+
+static void __init collect_cpu_sig_on_bsp(void *arg)
+{
+	unsigned int cpu = smp_processor_id();
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+
+	uci->cpu_sig.sig = cpuid_eax(0x00000001);
+}
+#else
+void load_ucode_amd_ap(void)
+{
+	unsigned int cpu = smp_processor_id();
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+	struct equiv_cpu_entry *eq;
+	struct microcode_amd *mc;
+	u32 rev, eax;
+	u16 eq_id;
+
+	/* Exit if called on the BSP. */
+	if (!cpu)
+		return;
+
+	if (!container)
+		return;
+
+	rdmsr(MSR_AMD64_PATCH_LEVEL, rev, eax);
+
+	uci->cpu_sig.rev = rev;
+	uci->cpu_sig.sig = eax;
+
+	eax = cpuid_eax(0x00000001);
+	eq  = (struct equiv_cpu_entry *)(container + CONTAINER_HDR_SZ);
+
+	eq_id = find_equiv_id(eq, eax);
+	if (!eq_id)
+		return;
+
+	if (eq_id == this_equiv_id) {
+		mc = (struct microcode_amd *)amd_ucode_patch;
+
+		if (mc && rev < mc->hdr.patch_id) {
+			if (!__apply_microcode_amd(mc))
+				ucode_new_rev = mc->hdr.patch_id;
+		}
+
+	} else {
+		if (!ucode_cpio.data)
+			return;
+
+		/*
+		 * AP has a different equivalence ID than BSP, looks like
+		 * mixed-steppings silicon so go through the ucode blob anew.
+		 */
+		apply_ucode_in_initrd(ucode_cpio.data, ucode_cpio.size);
+	}
+}
+#endif
+
+int __init save_microcode_in_initrd_amd(void)
+{
+	enum ucode_state ret;
+	u32 eax;
+
+#ifdef CONFIG_X86_32
+	unsigned int bsp = boot_cpu_data.cpu_index;
+	struct ucode_cpu_info *uci = ucode_cpu_info + bsp;
+
+	if (!uci->cpu_sig.sig)
+		smp_call_function_single(bsp, collect_cpu_sig_on_bsp, NULL, 1);
+
+	/*
+	 * Take into account the fact that the ramdisk might get relocated
+	 * and therefore we need to recompute the container's position in
+	 * virtual memory space.
+	 */
+	container = (u8 *)(__va((u32)relocated_ramdisk) +
+			   ((u32)container - boot_params.hdr.ramdisk_image));
+#endif
+	if (ucode_new_rev)
+		pr_info("microcode: updated early to new patch_level=0x%08x\n",
+			ucode_new_rev);
+
+	if (!container)
+		return -EINVAL;
+
+	eax   = cpuid_eax(0x00000001);
+	eax   = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
+
+	ret = load_microcode_amd(eax, container, container_size);
+	if (ret != UCODE_OK)
+		return -EINVAL;
+
+	/*
+	 * This will be freed any msec now, stash patches for the current
+	 * family and switch to patch cache for cpu hotplug, etc later.
+	 */
+	container = NULL;
+	container_size = 0;
+
+	return 0;
+}
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
new file mode 100644
index 000000000000..15c987698b0f
--- /dev/null
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -0,0 +1,645 @@
+/*
+ *	Intel CPU Microcode Update Driver for Linux
+ *
+ *	Copyright (C) 2000-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
+ *		      2006	Shaohua Li <shaohua.li@intel.com>
+ *
+ *	This driver allows to upgrade microcode on Intel processors
+ *	belonging to IA-32 family - PentiumPro, Pentium II,
+ *	Pentium III, Xeon, Pentium 4, etc.
+ *
+ *	Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture
+ *	Software Developer's Manual
+ *	Order Number 253668 or free download from:
+ *
+ *	http://developer.intel.com/Assets/PDF/manual/253668.pdf	
+ *
+ *	For more information, go to http://www.urbanmyth.org/microcode
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ *	1.0	16 Feb 2000, Tigran Aivazian <tigran@sco.com>
+ *		Initial release.
+ *	1.01	18 Feb 2000, Tigran Aivazian <tigran@sco.com>
+ *		Added read() support + cleanups.
+ *	1.02	21 Feb 2000, Tigran Aivazian <tigran@sco.com>
+ *		Added 'device trimming' support. open(O_WRONLY) zeroes
+ *		and frees the saved copy of applied microcode.
+ *	1.03	29 Feb 2000, Tigran Aivazian <tigran@sco.com>
+ *		Made to use devfs (/dev/cpu/microcode) + cleanups.
+ *	1.04	06 Jun 2000, Simon Trimmer <simon@veritas.com>
+ *		Added misc device support (now uses both devfs and misc).
+ *		Added MICROCODE_IOCFREE ioctl to clear memory.
+ *	1.05	09 Jun 2000, Simon Trimmer <simon@veritas.com>
+ *		Messages for error cases (non Intel & no suitable microcode).
+ *	1.06	03 Aug 2000, Tigran Aivazian <tigran@veritas.com>
+ *		Removed ->release(). Removed exclusive open and status bitmap.
+ *		Added microcode_rwsem to serialize read()/write()/ioctl().
+ *		Removed global kernel lock usage.
+ *	1.07	07 Sep 2000, Tigran Aivazian <tigran@veritas.com>
+ *		Write 0 to 0x8B msr and then cpuid before reading revision,
+ *		so that it works even if there were no update done by the
+ *		BIOS. Otherwise, reading from 0x8B gives junk (which happened
+ *		to be 0 on my machine which is why it worked even when I
+ *		disabled update by the BIOS)
+ *		Thanks to Eric W. Biederman <ebiederman@lnxi.com> for the fix.
+ *	1.08	11 Dec 2000, Richard Schaal <richard.schaal@intel.com> and
+ *			     Tigran Aivazian <tigran@veritas.com>
+ *		Intel Pentium 4 processor support and bugfixes.
+ *	1.09	30 Oct 2001, Tigran Aivazian <tigran@veritas.com>
+ *		Bugfix for HT (Hyper-Threading) enabled processors
+ *		whereby processor resources are shared by all logical processors
+ *		in a single CPU package.
+ *	1.10	28 Feb 2002 Asit K Mallick <asit.k.mallick@intel.com> and
+ *		Tigran Aivazian <tigran@veritas.com>,
+ *		Serialize updates as required on HT processors due to
+ *		speculative nature of implementation.
+ *	1.11	22 Mar 2002 Tigran Aivazian <tigran@veritas.com>
+ *		Fix the panic when writing zero-length microcode chunk.
+ *	1.12	29 Sep 2003 Nitin Kamble <nitin.a.kamble@intel.com>,
+ *		Jun Nakajima <jun.nakajima@intel.com>
+ *		Support for the microcode updates in the new format.
+ *	1.13	10 Oct 2003 Tigran Aivazian <tigran@veritas.com>
+ *		Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl
+ *		because we no longer hold a copy of applied microcode
+ *		in kernel memory.
+ *	1.14	25 Jun 2004 Tigran Aivazian <tigran@veritas.com>
+ *		Fix sigmatch() macro to handle old CPUs with pf == 0.
+ *		Thanks to Stuart Swales for pointing out this bug.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/platform_device.h>
+#include <linux/miscdevice.h>
+#include <linux/capability.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/cpu.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/syscore_ops.h>
+
+#include <asm/microcode.h>
+#include <asm/processor.h>
+#include <asm/cpu_device_id.h>
+#include <asm/perf_event.h>
+
+MODULE_DESCRIPTION("Microcode Update Driver");
+MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
+MODULE_LICENSE("GPL");
+
+#define MICROCODE_VERSION	"2.00"
+
+static struct microcode_ops	*microcode_ops;
+
+/*
+ * Synchronization.
+ *
+ * All non cpu-hotplug-callback call sites use:
+ *
+ * - microcode_mutex to synchronize with each other;
+ * - get/put_online_cpus() to synchronize with
+ *   the cpu-hotplug-callback call sites.
+ *
+ * We guarantee that only a single cpu is being
+ * updated at any particular moment of time.
+ */
+static DEFINE_MUTEX(microcode_mutex);
+
+struct ucode_cpu_info		ucode_cpu_info[NR_CPUS];
+EXPORT_SYMBOL_GPL(ucode_cpu_info);
+
+/*
+ * Operations that are run on a target cpu:
+ */
+
+struct cpu_info_ctx {
+	struct cpu_signature	*cpu_sig;
+	int			err;
+};
+
+static void collect_cpu_info_local(void *arg)
+{
+	struct cpu_info_ctx *ctx = arg;
+
+	ctx->err = microcode_ops->collect_cpu_info(smp_processor_id(),
+						   ctx->cpu_sig);
+}
+
+static int collect_cpu_info_on_target(int cpu, struct cpu_signature *cpu_sig)
+{
+	struct cpu_info_ctx ctx = { .cpu_sig = cpu_sig, .err = 0 };
+	int ret;
+
+	ret = smp_call_function_single(cpu, collect_cpu_info_local, &ctx, 1);
+	if (!ret)
+		ret = ctx.err;
+
+	return ret;
+}
+
+static int collect_cpu_info(int cpu)
+{
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+	int ret;
+
+	memset(uci, 0, sizeof(*uci));
+
+	ret = collect_cpu_info_on_target(cpu, &uci->cpu_sig);
+	if (!ret)
+		uci->valid = 1;
+
+	return ret;
+}
+
+struct apply_microcode_ctx {
+	int err;
+};
+
+static void apply_microcode_local(void *arg)
+{
+	struct apply_microcode_ctx *ctx = arg;
+
+	ctx->err = microcode_ops->apply_microcode(smp_processor_id());
+}
+
+static int apply_microcode_on_target(int cpu)
+{
+	struct apply_microcode_ctx ctx = { .err = 0 };
+	int ret;
+
+	ret = smp_call_function_single(cpu, apply_microcode_local, &ctx, 1);
+	if (!ret)
+		ret = ctx.err;
+
+	return ret;
+}
+
+#ifdef CONFIG_MICROCODE_OLD_INTERFACE
+static int do_microcode_update(const void __user *buf, size_t size)
+{
+	int error = 0;
+	int cpu;
+
+	for_each_online_cpu(cpu) {
+		struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+		enum ucode_state ustate;
+
+		if (!uci->valid)
+			continue;
+
+		ustate = microcode_ops->request_microcode_user(cpu, buf, size);
+		if (ustate == UCODE_ERROR) {
+			error = -1;
+			break;
+		} else if (ustate == UCODE_OK)
+			apply_microcode_on_target(cpu);
+	}
+
+	return error;
+}
+
+static int microcode_open(struct inode *inode, struct file *file)
+{
+	return capable(CAP_SYS_RAWIO) ? nonseekable_open(inode, file) : -EPERM;
+}
+
+static ssize_t microcode_write(struct file *file, const char __user *buf,
+			       size_t len, loff_t *ppos)
+{
+	ssize_t ret = -EINVAL;
+
+	if ((len >> PAGE_SHIFT) > totalram_pages) {
+		pr_err("too much data (max %ld pages)\n", totalram_pages);
+		return ret;
+	}
+
+	get_online_cpus();
+	mutex_lock(&microcode_mutex);
+
+	if (do_microcode_update(buf, len) == 0)
+		ret = (ssize_t)len;
+
+	if (ret > 0)
+		perf_check_microcode();
+
+	mutex_unlock(&microcode_mutex);
+	put_online_cpus();
+
+	return ret;
+}
+
+static const struct file_operations microcode_fops = {
+	.owner			= THIS_MODULE,
+	.write			= microcode_write,
+	.open			= microcode_open,
+	.llseek		= no_llseek,
+};
+
+static struct miscdevice microcode_dev = {
+	.minor			= MICROCODE_MINOR,
+	.name			= "microcode",
+	.nodename		= "cpu/microcode",
+	.fops			= &microcode_fops,
+};
+
+static int __init microcode_dev_init(void)
+{
+	int error;
+
+	error = misc_register(&microcode_dev);
+	if (error) {
+		pr_err("can't misc_register on minor=%d\n", MICROCODE_MINOR);
+		return error;
+	}
+
+	return 0;
+}
+
+static void __exit microcode_dev_exit(void)
+{
+	misc_deregister(&microcode_dev);
+}
+
+MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
+MODULE_ALIAS("devname:cpu/microcode");
+#else
+#define microcode_dev_init()	0
+#define microcode_dev_exit()	do { } while (0)
+#endif
+
+/* fake device for request_firmware */
+static struct platform_device	*microcode_pdev;
+
+static int reload_for_cpu(int cpu)
+{
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+	enum ucode_state ustate;
+	int err = 0;
+
+	if (!uci->valid)
+		return err;
+
+	ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev, true);
+	if (ustate == UCODE_OK)
+		apply_microcode_on_target(cpu);
+	else
+		if (ustate == UCODE_ERROR)
+			err = -EINVAL;
+	return err;
+}
+
+static ssize_t reload_store(struct device *dev,
+			    struct device_attribute *attr,
+			    const char *buf, size_t size)
+{
+	unsigned long val;
+	int cpu;
+	ssize_t ret = 0, tmp_ret;
+
+	ret = kstrtoul(buf, 0, &val);
+	if (ret)
+		return ret;
+
+	if (val != 1)
+		return size;
+
+	get_online_cpus();
+	mutex_lock(&microcode_mutex);
+	for_each_online_cpu(cpu) {
+		tmp_ret = reload_for_cpu(cpu);
+		if (tmp_ret != 0)
+			pr_warn("Error reloading microcode on CPU %d\n", cpu);
+
+		/* save retval of the first encountered reload error */
+		if (!ret)
+			ret = tmp_ret;
+	}
+	if (!ret)
+		perf_check_microcode();
+	mutex_unlock(&microcode_mutex);
+	put_online_cpus();
+
+	if (!ret)
+		ret = size;
+
+	return ret;
+}
+
+static ssize_t version_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
+
+	return sprintf(buf, "0x%x\n", uci->cpu_sig.rev);
+}
+
+static ssize_t pf_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
+
+	return sprintf(buf, "0x%x\n", uci->cpu_sig.pf);
+}
+
+static DEVICE_ATTR(reload, 0200, NULL, reload_store);
+static DEVICE_ATTR(version, 0400, version_show, NULL);
+static DEVICE_ATTR(processor_flags, 0400, pf_show, NULL);
+
+static struct attribute *mc_default_attrs[] = {
+	&dev_attr_version.attr,
+	&dev_attr_processor_flags.attr,
+	NULL
+};
+
+static struct attribute_group mc_attr_group = {
+	.attrs			= mc_default_attrs,
+	.name			= "microcode",
+};
+
+static void microcode_fini_cpu(int cpu)
+{
+	microcode_ops->microcode_fini_cpu(cpu);
+}
+
+static enum ucode_state microcode_resume_cpu(int cpu)
+{
+	pr_debug("CPU%d updated upon resume\n", cpu);
+
+	if (apply_microcode_on_target(cpu))
+		return UCODE_ERROR;
+
+	return UCODE_OK;
+}
+
+static enum ucode_state microcode_init_cpu(int cpu, bool refresh_fw)
+{
+	enum ucode_state ustate;
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+
+	if (uci && uci->valid)
+		return UCODE_OK;
+
+	if (collect_cpu_info(cpu))
+		return UCODE_ERROR;
+
+	/* --dimm. Trigger a delayed update? */
+	if (system_state != SYSTEM_RUNNING)
+		return UCODE_NFOUND;
+
+	ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev,
+						     refresh_fw);
+
+	if (ustate == UCODE_OK) {
+		pr_debug("CPU%d updated upon init\n", cpu);
+		apply_microcode_on_target(cpu);
+	}
+
+	return ustate;
+}
+
+static enum ucode_state microcode_update_cpu(int cpu)
+{
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+
+	if (uci->valid)
+		return microcode_resume_cpu(cpu);
+
+	return microcode_init_cpu(cpu, false);
+}
+
+static int mc_device_add(struct device *dev, struct subsys_interface *sif)
+{
+	int err, cpu = dev->id;
+
+	if (!cpu_online(cpu))
+		return 0;
+
+	pr_debug("CPU%d added\n", cpu);
+
+	err = sysfs_create_group(&dev->kobj, &mc_attr_group);
+	if (err)
+		return err;
+
+	if (microcode_init_cpu(cpu, true) == UCODE_ERROR)
+		return -EINVAL;
+
+	return err;
+}
+
+static int mc_device_remove(struct device *dev, struct subsys_interface *sif)
+{
+	int cpu = dev->id;
+
+	if (!cpu_online(cpu))
+		return 0;
+
+	pr_debug("CPU%d removed\n", cpu);
+	microcode_fini_cpu(cpu);
+	sysfs_remove_group(&dev->kobj, &mc_attr_group);
+	return 0;
+}
+
+static struct subsys_interface mc_cpu_interface = {
+	.name			= "microcode",
+	.subsys			= &cpu_subsys,
+	.add_dev		= mc_device_add,
+	.remove_dev		= mc_device_remove,
+};
+
+/**
+ * mc_bp_resume - Update boot CPU microcode during resume.
+ */
+static void mc_bp_resume(void)
+{
+	int cpu = smp_processor_id();
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+
+	if (uci->valid && uci->mc)
+		microcode_ops->apply_microcode(cpu);
+}
+
+static struct syscore_ops mc_syscore_ops = {
+	.resume			= mc_bp_resume,
+};
+
+static int
+mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (unsigned long)hcpu;
+	struct device *dev;
+
+	dev = get_cpu_device(cpu);
+
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_ONLINE:
+		microcode_update_cpu(cpu);
+		pr_debug("CPU%d added\n", cpu);
+		/*
+		 * "break" is missing on purpose here because we want to fall
+		 * through in order to create the sysfs group.
+		 */
+
+	case CPU_DOWN_FAILED:
+		if (sysfs_create_group(&dev->kobj, &mc_attr_group))
+			pr_err("Failed to create group for CPU%d\n", cpu);
+		break;
+
+	case CPU_DOWN_PREPARE:
+		/* Suspend is in progress, only remove the interface */
+		sysfs_remove_group(&dev->kobj, &mc_attr_group);
+		pr_debug("CPU%d removed\n", cpu);
+		break;
+
+	/*
+	 * case CPU_DEAD:
+	 *
+	 * When a CPU goes offline, don't free up or invalidate the copy of
+	 * the microcode in kernel memory, so that we can reuse it when the
+	 * CPU comes back online without unnecessarily requesting the userspace
+	 * for it again.
+	 */
+	}
+
+	/* The CPU refused to come up during a system resume */
+	if (action == CPU_UP_CANCELED_FROZEN)
+		microcode_fini_cpu(cpu);
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block __refdata mc_cpu_notifier = {
+	.notifier_call	= mc_cpu_callback,
+};
+
+#ifdef MODULE
+/* Autoload on Intel and AMD systems */
+static const struct x86_cpu_id __initconst microcode_id[] = {
+#ifdef CONFIG_MICROCODE_INTEL
+	{ X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, },
+#endif
+#ifdef CONFIG_MICROCODE_AMD
+	{ X86_VENDOR_AMD, X86_FAMILY_ANY, X86_MODEL_ANY, },
+#endif
+	{}
+};
+MODULE_DEVICE_TABLE(x86cpu, microcode_id);
+#endif
+
+static struct attribute *cpu_root_microcode_attrs[] = {
+	&dev_attr_reload.attr,
+	NULL
+};
+
+static struct attribute_group cpu_root_microcode_group = {
+	.name  = "microcode",
+	.attrs = cpu_root_microcode_attrs,
+};
+
+static int __init microcode_init(void)
+{
+	struct cpuinfo_x86 *c = &cpu_data(0);
+	int error;
+
+	if (c->x86_vendor == X86_VENDOR_INTEL)
+		microcode_ops = init_intel_microcode();
+	else if (c->x86_vendor == X86_VENDOR_AMD)
+		microcode_ops = init_amd_microcode();
+	else
+		pr_err("no support for this CPU vendor\n");
+
+	if (!microcode_ops)
+		return -ENODEV;
+
+	microcode_pdev = platform_device_register_simple("microcode", -1,
+							 NULL, 0);
+	if (IS_ERR(microcode_pdev))
+		return PTR_ERR(microcode_pdev);
+
+	get_online_cpus();
+	mutex_lock(&microcode_mutex);
+
+	error = subsys_interface_register(&mc_cpu_interface);
+	if (!error)
+		perf_check_microcode();
+	mutex_unlock(&microcode_mutex);
+	put_online_cpus();
+
+	if (error)
+		goto out_pdev;
+
+	error = sysfs_create_group(&cpu_subsys.dev_root->kobj,
+				   &cpu_root_microcode_group);
+
+	if (error) {
+		pr_err("Error creating microcode group!\n");
+		goto out_driver;
+	}
+
+	error = microcode_dev_init();
+	if (error)
+		goto out_ucode_group;
+
+	register_syscore_ops(&mc_syscore_ops);
+	register_hotcpu_notifier(&mc_cpu_notifier);
+
+	pr_info("Microcode Update Driver: v" MICROCODE_VERSION
+		" <tigran@aivazian.fsnet.co.uk>, Peter Oruba\n");
+
+	return 0;
+
+ out_ucode_group:
+	sysfs_remove_group(&cpu_subsys.dev_root->kobj,
+			   &cpu_root_microcode_group);
+
+ out_driver:
+	get_online_cpus();
+	mutex_lock(&microcode_mutex);
+
+	subsys_interface_unregister(&mc_cpu_interface);
+
+	mutex_unlock(&microcode_mutex);
+	put_online_cpus();
+
+ out_pdev:
+	platform_device_unregister(microcode_pdev);
+	return error;
+
+}
+module_init(microcode_init);
+
+static void __exit microcode_exit(void)
+{
+	struct cpuinfo_x86 *c = &cpu_data(0);
+
+	microcode_dev_exit();
+
+	unregister_hotcpu_notifier(&mc_cpu_notifier);
+	unregister_syscore_ops(&mc_syscore_ops);
+
+	sysfs_remove_group(&cpu_subsys.dev_root->kobj,
+			   &cpu_root_microcode_group);
+
+	get_online_cpus();
+	mutex_lock(&microcode_mutex);
+
+	subsys_interface_unregister(&mc_cpu_interface);
+
+	mutex_unlock(&microcode_mutex);
+	put_online_cpus();
+
+	platform_device_unregister(microcode_pdev);
+
+	microcode_ops = NULL;
+
+	if (c->x86_vendor == X86_VENDOR_AMD)
+		exit_amd_microcode();
+
+	pr_info("Microcode Update Driver: v" MICROCODE_VERSION " removed.\n");
+}
+module_exit(microcode_exit);
diff --git a/arch/x86/kernel/cpu/microcode/core_early.c b/arch/x86/kernel/cpu/microcode/core_early.c
new file mode 100644
index 000000000000..be7f8514f577
--- /dev/null
+++ b/arch/x86/kernel/cpu/microcode/core_early.c
@@ -0,0 +1,141 @@
+/*
+ *	X86 CPU microcode early update for Linux
+ *
+ *	Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com>
+ *			   H Peter Anvin" <hpa@zytor.com>
+ *
+ *	This driver allows to early upgrade microcode on Intel processors
+ *	belonging to IA-32 family - PentiumPro, Pentium II,
+ *	Pentium III, Xeon, Pentium 4, etc.
+ *
+ *	Reference: Section 9.11 of Volume 3, IA-32 Intel Architecture
+ *	Software Developer's Manual.
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+#include <linux/module.h>
+#include <asm/microcode_intel.h>
+#include <asm/microcode_amd.h>
+#include <asm/processor.h>
+
+#define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24))
+#define CPUID_INTEL1 QCHAR('G', 'e', 'n', 'u')
+#define CPUID_INTEL2 QCHAR('i', 'n', 'e', 'I')
+#define CPUID_INTEL3 QCHAR('n', 't', 'e', 'l')
+#define CPUID_AMD1 QCHAR('A', 'u', 't', 'h')
+#define CPUID_AMD2 QCHAR('e', 'n', 't', 'i')
+#define CPUID_AMD3 QCHAR('c', 'A', 'M', 'D')
+
+#define CPUID_IS(a, b, c, ebx, ecx, edx)	\
+		(!((ebx ^ (a))|(edx ^ (b))|(ecx ^ (c))))
+
+/*
+ * In early loading microcode phase on BSP, boot_cpu_data is not set up yet.
+ * x86_vendor() gets vendor id for BSP.
+ *
+ * In 32 bit AP case, accessing boot_cpu_data needs linear address. To simplify
+ * coding, we still use x86_vendor() to get vendor id for AP.
+ *
+ * x86_vendor() gets vendor information directly through cpuid.
+ */
+static int x86_vendor(void)
+{
+	u32 eax = 0x00000000;
+	u32 ebx, ecx = 0, edx;
+
+	native_cpuid(&eax, &ebx, &ecx, &edx);
+
+	if (CPUID_IS(CPUID_INTEL1, CPUID_INTEL2, CPUID_INTEL3, ebx, ecx, edx))
+		return X86_VENDOR_INTEL;
+
+	if (CPUID_IS(CPUID_AMD1, CPUID_AMD2, CPUID_AMD3, ebx, ecx, edx))
+		return X86_VENDOR_AMD;
+
+	return X86_VENDOR_UNKNOWN;
+}
+
+static int x86_family(void)
+{
+	u32 eax = 0x00000001;
+	u32 ebx, ecx = 0, edx;
+	int x86;
+
+	native_cpuid(&eax, &ebx, &ecx, &edx);
+
+	x86 = (eax >> 8) & 0xf;
+	if (x86 == 15)
+		x86 += (eax >> 20) & 0xff;
+
+	return x86;
+}
+
+void __init load_ucode_bsp(void)
+{
+	int vendor, x86;
+
+	if (!have_cpuid_p())
+		return;
+
+	vendor = x86_vendor();
+	x86 = x86_family();
+
+	switch (vendor) {
+	case X86_VENDOR_INTEL:
+		if (x86 >= 6)
+			load_ucode_intel_bsp();
+		break;
+	case X86_VENDOR_AMD:
+		if (x86 >= 0x10)
+			load_ucode_amd_bsp();
+		break;
+	default:
+		break;
+	}
+}
+
+void load_ucode_ap(void)
+{
+	int vendor, x86;
+
+	if (!have_cpuid_p())
+		return;
+
+	vendor = x86_vendor();
+	x86 = x86_family();
+
+	switch (vendor) {
+	case X86_VENDOR_INTEL:
+		if (x86 >= 6)
+			load_ucode_intel_ap();
+		break;
+	case X86_VENDOR_AMD:
+		if (x86 >= 0x10)
+			load_ucode_amd_ap();
+		break;
+	default:
+		break;
+	}
+}
+
+int __init save_microcode_in_initrd(void)
+{
+	struct cpuinfo_x86 *c = &boot_cpu_data;
+
+	switch (c->x86_vendor) {
+	case X86_VENDOR_INTEL:
+		if (c->x86 >= 6)
+			save_microcode_in_initrd_intel();
+		break;
+	case X86_VENDOR_AMD:
+		if (c->x86 >= 0x10)
+			save_microcode_in_initrd_amd();
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
new file mode 100644
index 000000000000..5fb2cebf556b
--- /dev/null
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -0,0 +1,333 @@
+/*
+ *	Intel CPU Microcode Update Driver for Linux
+ *
+ *	Copyright (C) 2000-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
+ *		      2006	Shaohua Li <shaohua.li@intel.com>
+ *
+ *	This driver allows to upgrade microcode on Intel processors
+ *	belonging to IA-32 family - PentiumPro, Pentium II,
+ *	Pentium III, Xeon, Pentium 4, etc.
+ *
+ *	Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture
+ *	Software Developer's Manual
+ *	Order Number 253668 or free download from:
+ *
+ *	http://developer.intel.com/Assets/PDF/manual/253668.pdf	
+ *
+ *	For more information, go to http://www.urbanmyth.org/microcode
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ *	1.0	16 Feb 2000, Tigran Aivazian <tigran@sco.com>
+ *		Initial release.
+ *	1.01	18 Feb 2000, Tigran Aivazian <tigran@sco.com>
+ *		Added read() support + cleanups.
+ *	1.02	21 Feb 2000, Tigran Aivazian <tigran@sco.com>
+ *		Added 'device trimming' support. open(O_WRONLY) zeroes
+ *		and frees the saved copy of applied microcode.
+ *	1.03	29 Feb 2000, Tigran Aivazian <tigran@sco.com>
+ *		Made to use devfs (/dev/cpu/microcode) + cleanups.
+ *	1.04	06 Jun 2000, Simon Trimmer <simon@veritas.com>
+ *		Added misc device support (now uses both devfs and misc).
+ *		Added MICROCODE_IOCFREE ioctl to clear memory.
+ *	1.05	09 Jun 2000, Simon Trimmer <simon@veritas.com>
+ *		Messages for error cases (non Intel & no suitable microcode).
+ *	1.06	03 Aug 2000, Tigran Aivazian <tigran@veritas.com>
+ *		Removed ->release(). Removed exclusive open and status bitmap.
+ *		Added microcode_rwsem to serialize read()/write()/ioctl().
+ *		Removed global kernel lock usage.
+ *	1.07	07 Sep 2000, Tigran Aivazian <tigran@veritas.com>
+ *		Write 0 to 0x8B msr and then cpuid before reading revision,
+ *		so that it works even if there were no update done by the
+ *		BIOS. Otherwise, reading from 0x8B gives junk (which happened
+ *		to be 0 on my machine which is why it worked even when I
+ *		disabled update by the BIOS)
+ *		Thanks to Eric W. Biederman <ebiederman@lnxi.com> for the fix.
+ *	1.08	11 Dec 2000, Richard Schaal <richard.schaal@intel.com> and
+ *			     Tigran Aivazian <tigran@veritas.com>
+ *		Intel Pentium 4 processor support and bugfixes.
+ *	1.09	30 Oct 2001, Tigran Aivazian <tigran@veritas.com>
+ *		Bugfix for HT (Hyper-Threading) enabled processors
+ *		whereby processor resources are shared by all logical processors
+ *		in a single CPU package.
+ *	1.10	28 Feb 2002 Asit K Mallick <asit.k.mallick@intel.com> and
+ *		Tigran Aivazian <tigran@veritas.com>,
+ *		Serialize updates as required on HT processors due to
+ *		speculative nature of implementation.
+ *	1.11	22 Mar 2002 Tigran Aivazian <tigran@veritas.com>
+ *		Fix the panic when writing zero-length microcode chunk.
+ *	1.12	29 Sep 2003 Nitin Kamble <nitin.a.kamble@intel.com>,
+ *		Jun Nakajima <jun.nakajima@intel.com>
+ *		Support for the microcode updates in the new format.
+ *	1.13	10 Oct 2003 Tigran Aivazian <tigran@veritas.com>
+ *		Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl
+ *		because we no longer hold a copy of applied microcode
+ *		in kernel memory.
+ *	1.14	25 Jun 2004 Tigran Aivazian <tigran@veritas.com>
+ *		Fix sigmatch() macro to handle old CPUs with pf == 0.
+ *		Thanks to Stuart Swales for pointing out this bug.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/firmware.h>
+#include <linux/uaccess.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+
+#include <asm/microcode_intel.h>
+#include <asm/processor.h>
+#include <asm/msr.h>
+
+MODULE_DESCRIPTION("Microcode Update Driver");
+MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
+MODULE_LICENSE("GPL");
+
+static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
+{
+	struct cpuinfo_x86 *c = &cpu_data(cpu_num);
+	unsigned int val[2];
+
+	memset(csig, 0, sizeof(*csig));
+
+	csig->sig = cpuid_eax(0x00000001);
+
+	if ((c->x86_model >= 5) || (c->x86 > 6)) {
+		/* get processor flags from MSR 0x17 */
+		rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
+		csig->pf = 1 << ((val[1] >> 18) & 7);
+	}
+
+	csig->rev = c->microcode;
+	pr_info("CPU%d sig=0x%x, pf=0x%x, revision=0x%x\n",
+		cpu_num, csig->sig, csig->pf, csig->rev);
+
+	return 0;
+}
+
+/*
+ * return 0 - no update found
+ * return 1 - found update
+ */
+static int get_matching_mc(struct microcode_intel *mc_intel, int cpu)
+{
+	struct cpu_signature cpu_sig;
+	unsigned int csig, cpf, crev;
+
+	collect_cpu_info(cpu, &cpu_sig);
+
+	csig = cpu_sig.sig;
+	cpf = cpu_sig.pf;
+	crev = cpu_sig.rev;
+
+	return get_matching_microcode(csig, cpf, mc_intel, crev);
+}
+
+int apply_microcode(int cpu)
+{
+	struct microcode_intel *mc_intel;
+	struct ucode_cpu_info *uci;
+	unsigned int val[2];
+	int cpu_num = raw_smp_processor_id();
+	struct cpuinfo_x86 *c = &cpu_data(cpu_num);
+
+	uci = ucode_cpu_info + cpu;
+	mc_intel = uci->mc;
+
+	/* We should bind the task to the CPU */
+	BUG_ON(cpu_num != cpu);
+
+	if (mc_intel == NULL)
+		return 0;
+
+	/*
+	 * Microcode on this CPU could be updated earlier. Only apply the
+	 * microcode patch in mc_intel when it is newer than the one on this
+	 * CPU.
+	 */
+	if (get_matching_mc(mc_intel, cpu) == 0)
+		return 0;
+
+	/* write microcode via MSR 0x79 */
+	wrmsr(MSR_IA32_UCODE_WRITE,
+	      (unsigned long) mc_intel->bits,
+	      (unsigned long) mc_intel->bits >> 16 >> 16);
+	wrmsr(MSR_IA32_UCODE_REV, 0, 0);
+
+	/* As documented in the SDM: Do a CPUID 1 here */
+	sync_core();
+
+	/* get the current revision from MSR 0x8B */
+	rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
+
+	if (val[1] != mc_intel->hdr.rev) {
+		pr_err("CPU%d update to revision 0x%x failed\n",
+		       cpu_num, mc_intel->hdr.rev);
+		return -1;
+	}
+	pr_info("CPU%d updated to revision 0x%x, date = %04x-%02x-%02x\n",
+		cpu_num, val[1],
+		mc_intel->hdr.date & 0xffff,
+		mc_intel->hdr.date >> 24,
+		(mc_intel->hdr.date >> 16) & 0xff);
+
+	uci->cpu_sig.rev = val[1];
+	c->microcode = val[1];
+
+	return 0;
+}
+
+static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
+				int (*get_ucode_data)(void *, const void *, size_t))
+{
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+	u8 *ucode_ptr = data, *new_mc = NULL, *mc = NULL;
+	int new_rev = uci->cpu_sig.rev;
+	unsigned int leftover = size;
+	enum ucode_state state = UCODE_OK;
+	unsigned int curr_mc_size = 0;
+	unsigned int csig, cpf;
+
+	while (leftover) {
+		struct microcode_header_intel mc_header;
+		unsigned int mc_size;
+
+		if (get_ucode_data(&mc_header, ucode_ptr, sizeof(mc_header)))
+			break;
+
+		mc_size = get_totalsize(&mc_header);
+		if (!mc_size || mc_size > leftover) {
+			pr_err("error! Bad data in microcode data file\n");
+			break;
+		}
+
+		/* For performance reasons, reuse mc area when possible */
+		if (!mc || mc_size > curr_mc_size) {
+			vfree(mc);
+			mc = vmalloc(mc_size);
+			if (!mc)
+				break;
+			curr_mc_size = mc_size;
+		}
+
+		if (get_ucode_data(mc, ucode_ptr, mc_size) ||
+		    microcode_sanity_check(mc, 1) < 0) {
+			break;
+		}
+
+		csig = uci->cpu_sig.sig;
+		cpf = uci->cpu_sig.pf;
+		if (get_matching_microcode(csig, cpf, mc, new_rev)) {
+			vfree(new_mc);
+			new_rev = mc_header.rev;
+			new_mc  = mc;
+			mc = NULL;	/* trigger new vmalloc */
+		}
+
+		ucode_ptr += mc_size;
+		leftover  -= mc_size;
+	}
+
+	vfree(mc);
+
+	if (leftover) {
+		vfree(new_mc);
+		state = UCODE_ERROR;
+		goto out;
+	}
+
+	if (!new_mc) {
+		state = UCODE_NFOUND;
+		goto out;
+	}
+
+	vfree(uci->mc);
+	uci->mc = (struct microcode_intel *)new_mc;
+
+	/*
+	 * If early loading microcode is supported, save this mc into
+	 * permanent memory. So it will be loaded early when a CPU is hot added
+	 * or resumes.
+	 */
+	save_mc_for_early(new_mc);
+
+	pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n",
+		 cpu, new_rev, uci->cpu_sig.rev);
+out:
+	return state;
+}
+
+static int get_ucode_fw(void *to, const void *from, size_t n)
+{
+	memcpy(to, from, n);
+	return 0;
+}
+
+static enum ucode_state request_microcode_fw(int cpu, struct device *device,
+					     bool refresh_fw)
+{
+	char name[30];
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
+	const struct firmware *firmware;
+	enum ucode_state ret;
+
+	sprintf(name, "intel-ucode/%02x-%02x-%02x",
+		c->x86, c->x86_model, c->x86_mask);
+
+	if (request_firmware(&firmware, name, device)) {
+		pr_debug("data file %s load failed\n", name);
+		return UCODE_NFOUND;
+	}
+
+	ret = generic_load_microcode(cpu, (void *)firmware->data,
+				     firmware->size, &get_ucode_fw);
+
+	release_firmware(firmware);
+
+	return ret;
+}
+
+static int get_ucode_user(void *to, const void *from, size_t n)
+{
+	return copy_from_user(to, from, n);
+}
+
+static enum ucode_state
+request_microcode_user(int cpu, const void __user *buf, size_t size)
+{
+	return generic_load_microcode(cpu, (void *)buf, size, &get_ucode_user);
+}
+
+static void microcode_fini_cpu(int cpu)
+{
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+
+	vfree(uci->mc);
+	uci->mc = NULL;
+}
+
+static struct microcode_ops microcode_intel_ops = {
+	.request_microcode_user		  = request_microcode_user,
+	.request_microcode_fw             = request_microcode_fw,
+	.collect_cpu_info                 = collect_cpu_info,
+	.apply_microcode                  = apply_microcode,
+	.microcode_fini_cpu               = microcode_fini_cpu,
+};
+
+struct microcode_ops * __init init_intel_microcode(void)
+{
+	struct cpuinfo_x86 *c = &cpu_data(0);
+
+	if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 ||
+	    cpu_has(c, X86_FEATURE_IA64)) {
+		pr_err("Intel CPU family 0x%x not supported\n", c->x86);
+		return NULL;
+	}
+
+	return &microcode_intel_ops;
+}
+
diff --git a/arch/x86/kernel/cpu/microcode/intel_early.c b/arch/x86/kernel/cpu/microcode/intel_early.c
new file mode 100644
index 000000000000..18f739129e72
--- /dev/null
+++ b/arch/x86/kernel/cpu/microcode/intel_early.c
@@ -0,0 +1,787 @@
+/*
+ *	Intel CPU microcode early update for Linux
+ *
+ *	Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com>
+ *			   H Peter Anvin" <hpa@zytor.com>
+ *
+ *	This allows to early upgrade microcode on Intel processors
+ *	belonging to IA-32 family - PentiumPro, Pentium II,
+ *	Pentium III, Xeon, Pentium 4, etc.
+ *
+ *	Reference: Section 9.11 of Volume 3, IA-32 Intel Architecture
+ *	Software Developer's Manual.
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/earlycpio.h>
+#include <linux/initrd.h>
+#include <linux/cpu.h>
+#include <asm/msr.h>
+#include <asm/microcode_intel.h>
+#include <asm/processor.h>
+#include <asm/tlbflush.h>
+#include <asm/setup.h>
+
+unsigned long mc_saved_in_initrd[MAX_UCODE_COUNT];
+struct mc_saved_data {
+	unsigned int mc_saved_count;
+	struct microcode_intel **mc_saved;
+} mc_saved_data;
+
+static enum ucode_state
+generic_load_microcode_early(struct microcode_intel **mc_saved_p,
+			     unsigned int mc_saved_count,
+			     struct ucode_cpu_info *uci)
+{
+	struct microcode_intel *ucode_ptr, *new_mc = NULL;
+	int new_rev = uci->cpu_sig.rev;
+	enum ucode_state state = UCODE_OK;
+	unsigned int mc_size;
+	struct microcode_header_intel *mc_header;
+	unsigned int csig = uci->cpu_sig.sig;
+	unsigned int cpf = uci->cpu_sig.pf;
+	int i;
+
+	for (i = 0; i < mc_saved_count; i++) {
+		ucode_ptr = mc_saved_p[i];
+
+		mc_header = (struct microcode_header_intel *)ucode_ptr;
+		mc_size = get_totalsize(mc_header);
+		if (get_matching_microcode(csig, cpf, ucode_ptr, new_rev)) {
+			new_rev = mc_header->rev;
+			new_mc  = ucode_ptr;
+		}
+	}
+
+	if (!new_mc) {
+		state = UCODE_NFOUND;
+		goto out;
+	}
+
+	uci->mc = (struct microcode_intel *)new_mc;
+out:
+	return state;
+}
+
+static void
+microcode_pointer(struct microcode_intel **mc_saved,
+		  unsigned long *mc_saved_in_initrd,
+		  unsigned long initrd_start, int mc_saved_count)
+{
+	int i;
+
+	for (i = 0; i < mc_saved_count; i++)
+		mc_saved[i] = (struct microcode_intel *)
+			      (mc_saved_in_initrd[i] + initrd_start);
+}
+
+#ifdef CONFIG_X86_32
+static void
+microcode_phys(struct microcode_intel **mc_saved_tmp,
+	       struct mc_saved_data *mc_saved_data)
+{
+	int i;
+	struct microcode_intel ***mc_saved;
+
+	mc_saved = (struct microcode_intel ***)
+		   __pa_nodebug(&mc_saved_data->mc_saved);
+	for (i = 0; i < mc_saved_data->mc_saved_count; i++) {
+		struct microcode_intel *p;
+
+		p = *(struct microcode_intel **)
+			__pa_nodebug(mc_saved_data->mc_saved + i);
+		mc_saved_tmp[i] = (struct microcode_intel *)__pa_nodebug(p);
+	}
+}
+#endif
+
+static enum ucode_state
+load_microcode(struct mc_saved_data *mc_saved_data,
+	       unsigned long *mc_saved_in_initrd,
+	       unsigned long initrd_start,
+	       struct ucode_cpu_info *uci)
+{
+	struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
+	unsigned int count = mc_saved_data->mc_saved_count;
+
+	if (!mc_saved_data->mc_saved) {
+		microcode_pointer(mc_saved_tmp, mc_saved_in_initrd,
+				  initrd_start, count);
+
+		return generic_load_microcode_early(mc_saved_tmp, count, uci);
+	} else {
+#ifdef CONFIG_X86_32
+		microcode_phys(mc_saved_tmp, mc_saved_data);
+		return generic_load_microcode_early(mc_saved_tmp, count, uci);
+#else
+		return generic_load_microcode_early(mc_saved_data->mc_saved,
+						    count, uci);
+#endif
+	}
+}
+
+static u8 get_x86_family(unsigned long sig)
+{
+	u8 x86;
+
+	x86 = (sig >> 8) & 0xf;
+
+	if (x86 == 0xf)
+		x86 += (sig >> 20) & 0xff;
+
+	return x86;
+}
+
+static u8 get_x86_model(unsigned long sig)
+{
+	u8 x86, x86_model;
+
+	x86 = get_x86_family(sig);
+	x86_model = (sig >> 4) & 0xf;
+
+	if (x86 == 0x6 || x86 == 0xf)
+		x86_model += ((sig >> 16) & 0xf) << 4;
+
+	return x86_model;
+}
+
+/*
+ * Given CPU signature and a microcode patch, this function finds if the
+ * microcode patch has matching family and model with the CPU.
+ */
+static enum ucode_state
+matching_model_microcode(struct microcode_header_intel *mc_header,
+			unsigned long sig)
+{
+	u8 x86, x86_model;
+	u8 x86_ucode, x86_model_ucode;
+	struct extended_sigtable *ext_header;
+	unsigned long total_size = get_totalsize(mc_header);
+	unsigned long data_size = get_datasize(mc_header);
+	int ext_sigcount, i;
+	struct extended_signature *ext_sig;
+
+	x86 = get_x86_family(sig);
+	x86_model = get_x86_model(sig);
+
+	x86_ucode = get_x86_family(mc_header->sig);
+	x86_model_ucode = get_x86_model(mc_header->sig);
+
+	if (x86 == x86_ucode && x86_model == x86_model_ucode)
+		return UCODE_OK;
+
+	/* Look for ext. headers: */
+	if (total_size <= data_size + MC_HEADER_SIZE)
+		return UCODE_NFOUND;
+
+	ext_header = (struct extended_sigtable *)
+		     mc_header + data_size + MC_HEADER_SIZE;
+	ext_sigcount = ext_header->count;
+	ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
+
+	for (i = 0; i < ext_sigcount; i++) {
+		x86_ucode = get_x86_family(ext_sig->sig);
+		x86_model_ucode = get_x86_model(ext_sig->sig);
+
+		if (x86 == x86_ucode && x86_model == x86_model_ucode)
+			return UCODE_OK;
+
+		ext_sig++;
+	}
+
+	return UCODE_NFOUND;
+}
+
+static int
+save_microcode(struct mc_saved_data *mc_saved_data,
+	       struct microcode_intel **mc_saved_src,
+	       unsigned int mc_saved_count)
+{
+	int i, j;
+	struct microcode_intel **mc_saved_p;
+	int ret;
+
+	if (!mc_saved_count)
+		return -EINVAL;
+
+	/*
+	 * Copy new microcode data.
+	 */
+	mc_saved_p = kmalloc(mc_saved_count*sizeof(struct microcode_intel *),
+			     GFP_KERNEL);
+	if (!mc_saved_p)
+		return -ENOMEM;
+
+	for (i = 0; i < mc_saved_count; i++) {
+		struct microcode_intel *mc = mc_saved_src[i];
+		struct microcode_header_intel *mc_header = &mc->hdr;
+		unsigned long mc_size = get_totalsize(mc_header);
+		mc_saved_p[i] = kmalloc(mc_size, GFP_KERNEL);
+		if (!mc_saved_p[i]) {
+			ret = -ENOMEM;
+			goto err;
+		}
+		if (!mc_saved_src[i]) {
+			ret = -EINVAL;
+			goto err;
+		}
+		memcpy(mc_saved_p[i], mc, mc_size);
+	}
+
+	/*
+	 * Point to newly saved microcode.
+	 */
+	mc_saved_data->mc_saved = mc_saved_p;
+	mc_saved_data->mc_saved_count = mc_saved_count;
+
+	return 0;
+
+err:
+	for (j = 0; j <= i; j++)
+		kfree(mc_saved_p[j]);
+	kfree(mc_saved_p);
+
+	return ret;
+}
+
+/*
+ * A microcode patch in ucode_ptr is saved into mc_saved
+ * - if it has matching signature and newer revision compared to an existing
+ *   patch mc_saved.
+ * - or if it is a newly discovered microcode patch.
+ *
+ * The microcode patch should have matching model with CPU.
+ */
+static void _save_mc(struct microcode_intel **mc_saved, u8 *ucode_ptr,
+		     unsigned int *mc_saved_count_p)
+{
+	int i;
+	int found = 0;
+	unsigned int mc_saved_count = *mc_saved_count_p;
+	struct microcode_header_intel *mc_header;
+
+	mc_header = (struct microcode_header_intel *)ucode_ptr;
+	for (i = 0; i < mc_saved_count; i++) {
+		unsigned int sig, pf;
+		unsigned int new_rev;
+		struct microcode_header_intel *mc_saved_header =
+			     (struct microcode_header_intel *)mc_saved[i];
+		sig = mc_saved_header->sig;
+		pf = mc_saved_header->pf;
+		new_rev = mc_header->rev;
+
+		if (get_matching_sig(sig, pf, ucode_ptr, new_rev)) {
+			found = 1;
+			if (update_match_revision(mc_header, new_rev)) {
+				/*
+				 * Found an older ucode saved before.
+				 * Replace the older one with this newer
+				 * one.
+				 */
+				mc_saved[i] =
+					(struct microcode_intel *)ucode_ptr;
+				break;
+			}
+		}
+	}
+	if (i >= mc_saved_count && !found)
+		/*
+		 * This ucode is first time discovered in ucode file.
+		 * Save it to memory.
+		 */
+		mc_saved[mc_saved_count++] =
+				 (struct microcode_intel *)ucode_ptr;
+
+	*mc_saved_count_p = mc_saved_count;
+}
+
+/*
+ * Get microcode matching with BSP's model. Only CPUs with the same model as
+ * BSP can stay in the platform.
+ */
+static enum ucode_state __init
+get_matching_model_microcode(int cpu, unsigned long start,
+			     void *data, size_t size,
+			     struct mc_saved_data *mc_saved_data,
+			     unsigned long *mc_saved_in_initrd,
+			     struct ucode_cpu_info *uci)
+{
+	u8 *ucode_ptr = data;
+	unsigned int leftover = size;
+	enum ucode_state state = UCODE_OK;
+	unsigned int mc_size;
+	struct microcode_header_intel *mc_header;
+	struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
+	unsigned int mc_saved_count = mc_saved_data->mc_saved_count;
+	int i;
+
+	while (leftover) {
+		mc_header = (struct microcode_header_intel *)ucode_ptr;
+
+		mc_size = get_totalsize(mc_header);
+		if (!mc_size || mc_size > leftover ||
+			microcode_sanity_check(ucode_ptr, 0) < 0)
+			break;
+
+		leftover -= mc_size;
+
+		/*
+		 * Since APs with same family and model as the BSP may boot in
+		 * the platform, we need to find and save microcode patches
+		 * with the same family and model as the BSP.
+		 */
+		if (matching_model_microcode(mc_header, uci->cpu_sig.sig) !=
+			 UCODE_OK) {
+			ucode_ptr += mc_size;
+			continue;
+		}
+
+		_save_mc(mc_saved_tmp, ucode_ptr, &mc_saved_count);
+
+		ucode_ptr += mc_size;
+	}
+
+	if (leftover) {
+		state = UCODE_ERROR;
+		goto out;
+	}
+
+	if (mc_saved_count == 0) {
+		state = UCODE_NFOUND;
+		goto out;
+	}
+
+	for (i = 0; i < mc_saved_count; i++)
+		mc_saved_in_initrd[i] = (unsigned long)mc_saved_tmp[i] - start;
+
+	mc_saved_data->mc_saved_count = mc_saved_count;
+out:
+	return state;
+}
+
+static int collect_cpu_info_early(struct ucode_cpu_info *uci)
+{
+	unsigned int val[2];
+	u8 x86, x86_model;
+	struct cpu_signature csig;
+	unsigned int eax, ebx, ecx, edx;
+
+	csig.sig = 0;
+	csig.pf = 0;
+	csig.rev = 0;
+
+	memset(uci, 0, sizeof(*uci));
+
+	eax = 0x00000001;
+	ecx = 0;
+	native_cpuid(&eax, &ebx, &ecx, &edx);
+	csig.sig = eax;
+
+	x86 = get_x86_family(csig.sig);
+	x86_model = get_x86_model(csig.sig);
+
+	if ((x86_model >= 5) || (x86 > 6)) {
+		/* get processor flags from MSR 0x17 */
+		native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
+		csig.pf = 1 << ((val[1] >> 18) & 7);
+	}
+	native_wrmsr(MSR_IA32_UCODE_REV, 0, 0);
+
+	/* As documented in the SDM: Do a CPUID 1 here */
+	sync_core();
+
+	/* get the current revision from MSR 0x8B */
+	native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
+
+	csig.rev = val[1];
+
+	uci->cpu_sig = csig;
+	uci->valid = 1;
+
+	return 0;
+}
+
+#ifdef DEBUG
+static void __ref show_saved_mc(void)
+{
+	int i, j;
+	unsigned int sig, pf, rev, total_size, data_size, date;
+	struct ucode_cpu_info uci;
+
+	if (mc_saved_data.mc_saved_count == 0) {
+		pr_debug("no micorcode data saved.\n");
+		return;
+	}
+	pr_debug("Total microcode saved: %d\n", mc_saved_data.mc_saved_count);
+
+	collect_cpu_info_early(&uci);
+
+	sig = uci.cpu_sig.sig;
+	pf = uci.cpu_sig.pf;
+	rev = uci.cpu_sig.rev;
+	pr_debug("CPU%d: sig=0x%x, pf=0x%x, rev=0x%x\n",
+		 smp_processor_id(), sig, pf, rev);
+
+	for (i = 0; i < mc_saved_data.mc_saved_count; i++) {
+		struct microcode_header_intel *mc_saved_header;
+		struct extended_sigtable *ext_header;
+		int ext_sigcount;
+		struct extended_signature *ext_sig;
+
+		mc_saved_header = (struct microcode_header_intel *)
+				  mc_saved_data.mc_saved[i];
+		sig = mc_saved_header->sig;
+		pf = mc_saved_header->pf;
+		rev = mc_saved_header->rev;
+		total_size = get_totalsize(mc_saved_header);
+		data_size = get_datasize(mc_saved_header);
+		date = mc_saved_header->date;
+
+		pr_debug("mc_saved[%d]: sig=0x%x, pf=0x%x, rev=0x%x, toal size=0x%x, date = %04x-%02x-%02x\n",
+			 i, sig, pf, rev, total_size,
+			 date & 0xffff,
+			 date >> 24,
+			 (date >> 16) & 0xff);
+
+		/* Look for ext. headers: */
+		if (total_size <= data_size + MC_HEADER_SIZE)
+			continue;
+
+		ext_header = (struct extended_sigtable *)
+			     mc_saved_header + data_size + MC_HEADER_SIZE;
+		ext_sigcount = ext_header->count;
+		ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
+
+		for (j = 0; j < ext_sigcount; j++) {
+			sig = ext_sig->sig;
+			pf = ext_sig->pf;
+
+			pr_debug("\tExtended[%d]: sig=0x%x, pf=0x%x\n",
+				 j, sig, pf);
+
+			ext_sig++;
+		}
+
+	}
+}
+#else
+static inline void show_saved_mc(void)
+{
+}
+#endif
+
+#if defined(CONFIG_MICROCODE_INTEL_EARLY) && defined(CONFIG_HOTPLUG_CPU)
+static DEFINE_MUTEX(x86_cpu_microcode_mutex);
+/*
+ * Save this mc into mc_saved_data. So it will be loaded early when a CPU is
+ * hot added or resumes.
+ *
+ * Please make sure this mc should be a valid microcode patch before calling
+ * this function.
+ */
+int save_mc_for_early(u8 *mc)
+{
+	struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
+	unsigned int mc_saved_count_init;
+	unsigned int mc_saved_count;
+	struct microcode_intel **mc_saved;
+	int ret = 0;
+	int i;
+
+	/*
+	 * Hold hotplug lock so mc_saved_data is not accessed by a CPU in
+	 * hotplug.
+	 */
+	mutex_lock(&x86_cpu_microcode_mutex);
+
+	mc_saved_count_init = mc_saved_data.mc_saved_count;
+	mc_saved_count = mc_saved_data.mc_saved_count;
+	mc_saved = mc_saved_data.mc_saved;
+
+	if (mc_saved && mc_saved_count)
+		memcpy(mc_saved_tmp, mc_saved,
+		       mc_saved_count * sizeof(struct mirocode_intel *));
+	/*
+	 * Save the microcode patch mc in mc_save_tmp structure if it's a newer
+	 * version.
+	 */
+
+	_save_mc(mc_saved_tmp, mc, &mc_saved_count);
+
+	/*
+	 * Save the mc_save_tmp in global mc_saved_data.
+	 */
+	ret = save_microcode(&mc_saved_data, mc_saved_tmp, mc_saved_count);
+	if (ret) {
+		pr_err("Cannot save microcode patch.\n");
+		goto out;
+	}
+
+	show_saved_mc();
+
+	/*
+	 * Free old saved microcod data.
+	 */
+	if (mc_saved) {
+		for (i = 0; i < mc_saved_count_init; i++)
+			kfree(mc_saved[i]);
+		kfree(mc_saved);
+	}
+
+out:
+	mutex_unlock(&x86_cpu_microcode_mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(save_mc_for_early);
+#endif
+
+static __initdata char ucode_name[] = "kernel/x86/microcode/GenuineIntel.bin";
+static __init enum ucode_state
+scan_microcode(unsigned long start, unsigned long end,
+		struct mc_saved_data *mc_saved_data,
+		unsigned long *mc_saved_in_initrd,
+		struct ucode_cpu_info *uci)
+{
+	unsigned int size = end - start + 1;
+	struct cpio_data cd;
+	long offset = 0;
+#ifdef CONFIG_X86_32
+	char *p = (char *)__pa_nodebug(ucode_name);
+#else
+	char *p = ucode_name;
+#endif
+
+	cd.data = NULL;
+	cd.size = 0;
+
+	cd = find_cpio_data(p, (void *)start, size, &offset);
+	if (!cd.data)
+		return UCODE_ERROR;
+
+
+	return get_matching_model_microcode(0, start, cd.data, cd.size,
+					    mc_saved_data, mc_saved_in_initrd,
+					    uci);
+}
+
+/*
+ * Print ucode update info.
+ */
+static void
+print_ucode_info(struct ucode_cpu_info *uci, unsigned int date)
+{
+	int cpu = smp_processor_id();
+
+	pr_info("CPU%d microcode updated early to revision 0x%x, date = %04x-%02x-%02x\n",
+		cpu,
+		uci->cpu_sig.rev,
+		date & 0xffff,
+		date >> 24,
+		(date >> 16) & 0xff);
+}
+
+#ifdef CONFIG_X86_32
+
+static int delay_ucode_info;
+static int current_mc_date;
+
+/*
+ * Print early updated ucode info after printk works. This is delayed info dump.
+ */
+void show_ucode_info_early(void)
+{
+	struct ucode_cpu_info uci;
+
+	if (delay_ucode_info) {
+		collect_cpu_info_early(&uci);
+		print_ucode_info(&uci, current_mc_date);
+		delay_ucode_info = 0;
+	}
+}
+
+/*
+ * At this point, we can not call printk() yet. Keep microcode patch number in
+ * mc_saved_data.mc_saved and delay printing microcode info in
+ * show_ucode_info_early() until printk() works.
+ */
+static void print_ucode(struct ucode_cpu_info *uci)
+{
+	struct microcode_intel *mc_intel;
+	int *delay_ucode_info_p;
+	int *current_mc_date_p;
+
+	mc_intel = uci->mc;
+	if (mc_intel == NULL)
+		return;
+
+	delay_ucode_info_p = (int *)__pa_nodebug(&delay_ucode_info);
+	current_mc_date_p = (int *)__pa_nodebug(&current_mc_date);
+
+	*delay_ucode_info_p = 1;
+	*current_mc_date_p = mc_intel->hdr.date;
+}
+#else
+
+/*
+ * Flush global tlb. We only do this in x86_64 where paging has been enabled
+ * already and PGE should be enabled as well.
+ */
+static inline void flush_tlb_early(void)
+{
+	__native_flush_tlb_global_irq_disabled();
+}
+
+static inline void print_ucode(struct ucode_cpu_info *uci)
+{
+	struct microcode_intel *mc_intel;
+
+	mc_intel = uci->mc;
+	if (mc_intel == NULL)
+		return;
+
+	print_ucode_info(uci, mc_intel->hdr.date);
+}
+#endif
+
+static int apply_microcode_early(struct mc_saved_data *mc_saved_data,
+				 struct ucode_cpu_info *uci)
+{
+	struct microcode_intel *mc_intel;
+	unsigned int val[2];
+
+	mc_intel = uci->mc;
+	if (mc_intel == NULL)
+		return 0;
+
+	/* write microcode via MSR 0x79 */
+	native_wrmsr(MSR_IA32_UCODE_WRITE,
+	      (unsigned long) mc_intel->bits,
+	      (unsigned long) mc_intel->bits >> 16 >> 16);
+	native_wrmsr(MSR_IA32_UCODE_REV, 0, 0);
+
+	/* As documented in the SDM: Do a CPUID 1 here */
+	sync_core();
+
+	/* get the current revision from MSR 0x8B */
+	native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
+	if (val[1] != mc_intel->hdr.rev)
+		return -1;
+
+#ifdef CONFIG_X86_64
+	/* Flush global tlb. This is precaution. */
+	flush_tlb_early();
+#endif
+	uci->cpu_sig.rev = val[1];
+
+	print_ucode(uci);
+
+	return 0;
+}
+
+/*
+ * This function converts microcode patch offsets previously stored in
+ * mc_saved_in_initrd to pointers and stores the pointers in mc_saved_data.
+ */
+int __init save_microcode_in_initrd_intel(void)
+{
+	unsigned int count = mc_saved_data.mc_saved_count;
+	struct microcode_intel *mc_saved[MAX_UCODE_COUNT];
+	int ret = 0;
+
+	if (count == 0)
+		return ret;
+
+	microcode_pointer(mc_saved, mc_saved_in_initrd, initrd_start, count);
+	ret = save_microcode(&mc_saved_data, mc_saved, count);
+	if (ret)
+		pr_err("Cannot save microcode patches from initrd.\n");
+
+	show_saved_mc();
+
+	return ret;
+}
+
+static void __init
+_load_ucode_intel_bsp(struct mc_saved_data *mc_saved_data,
+		      unsigned long *mc_saved_in_initrd,
+		      unsigned long initrd_start_early,
+		      unsigned long initrd_end_early,
+		      struct ucode_cpu_info *uci)
+{
+	collect_cpu_info_early(uci);
+	scan_microcode(initrd_start_early, initrd_end_early, mc_saved_data,
+		       mc_saved_in_initrd, uci);
+	load_microcode(mc_saved_data, mc_saved_in_initrd,
+		       initrd_start_early, uci);
+	apply_microcode_early(mc_saved_data, uci);
+}
+
+void __init
+load_ucode_intel_bsp(void)
+{
+	u64 ramdisk_image, ramdisk_size;
+	unsigned long initrd_start_early, initrd_end_early;
+	struct ucode_cpu_info uci;
+#ifdef CONFIG_X86_32
+	struct boot_params *boot_params_p;
+
+	boot_params_p = (struct boot_params *)__pa_nodebug(&boot_params);
+	ramdisk_image = boot_params_p->hdr.ramdisk_image;
+	ramdisk_size  = boot_params_p->hdr.ramdisk_size;
+	initrd_start_early = ramdisk_image;
+	initrd_end_early = initrd_start_early + ramdisk_size;
+
+	_load_ucode_intel_bsp(
+		(struct mc_saved_data *)__pa_nodebug(&mc_saved_data),
+		(unsigned long *)__pa_nodebug(&mc_saved_in_initrd),
+		initrd_start_early, initrd_end_early, &uci);
+#else
+	ramdisk_image = boot_params.hdr.ramdisk_image;
+	ramdisk_size  = boot_params.hdr.ramdisk_size;
+	initrd_start_early = ramdisk_image + PAGE_OFFSET;
+	initrd_end_early = initrd_start_early + ramdisk_size;
+
+	_load_ucode_intel_bsp(&mc_saved_data, mc_saved_in_initrd,
+			      initrd_start_early, initrd_end_early, &uci);
+#endif
+}
+
+void load_ucode_intel_ap(void)
+{
+	struct mc_saved_data *mc_saved_data_p;
+	struct ucode_cpu_info uci;
+	unsigned long *mc_saved_in_initrd_p;
+	unsigned long initrd_start_addr;
+#ifdef CONFIG_X86_32
+	unsigned long *initrd_start_p;
+
+	mc_saved_in_initrd_p =
+		(unsigned long *)__pa_nodebug(mc_saved_in_initrd);
+	mc_saved_data_p = (struct mc_saved_data *)__pa_nodebug(&mc_saved_data);
+	initrd_start_p = (unsigned long *)__pa_nodebug(&initrd_start);
+	initrd_start_addr = (unsigned long)__pa_nodebug(*initrd_start_p);
+#else
+	mc_saved_data_p = &mc_saved_data;
+	mc_saved_in_initrd_p = mc_saved_in_initrd;
+	initrd_start_addr = initrd_start;
+#endif
+
+	/*
+	 * If there is no valid ucode previously saved in memory, no need to
+	 * update ucode on this AP.
+	 */
+	if (mc_saved_data_p->mc_saved_count == 0)
+		return;
+
+	collect_cpu_info_early(&uci);
+	load_microcode(mc_saved_data_p, mc_saved_in_initrd_p,
+		       initrd_start_addr, &uci);
+	apply_microcode_early(mc_saved_data_p, &uci);
+}
diff --git a/arch/x86/kernel/cpu/microcode/intel_lib.c b/arch/x86/kernel/cpu/microcode/intel_lib.c
new file mode 100644
index 000000000000..ce69320d0179
--- /dev/null
+++ b/arch/x86/kernel/cpu/microcode/intel_lib.c
@@ -0,0 +1,174 @@
+/*
+ *	Intel CPU Microcode Update Driver for Linux
+ *
+ *	Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com>
+ *			   H Peter Anvin" <hpa@zytor.com>
+ *
+ *	This driver allows to upgrade microcode on Intel processors
+ *	belonging to IA-32 family - PentiumPro, Pentium II,
+ *	Pentium III, Xeon, Pentium 4, etc.
+ *
+ *	Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture
+ *	Software Developer's Manual
+ *	Order Number 253668 or free download from:
+ *
+ *	http://developer.intel.com/Assets/PDF/manual/253668.pdf
+ *
+ *	For more information, go to http://www.urbanmyth.org/microcode
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ */
+#include <linux/firmware.h>
+#include <linux/uaccess.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include <asm/microcode_intel.h>
+#include <asm/processor.h>
+#include <asm/msr.h>
+
+static inline int
+update_match_cpu(unsigned int csig, unsigned int cpf,
+		 unsigned int sig, unsigned int pf)
+{
+	return (!sigmatch(sig, csig, pf, cpf)) ? 0 : 1;
+}
+
+int
+update_match_revision(struct microcode_header_intel *mc_header, int rev)
+{
+	return (mc_header->rev <= rev) ? 0 : 1;
+}
+
+int microcode_sanity_check(void *mc, int print_err)
+{
+	unsigned long total_size, data_size, ext_table_size;
+	struct microcode_header_intel *mc_header = mc;
+	struct extended_sigtable *ext_header = NULL;
+	int sum, orig_sum, ext_sigcount = 0, i;
+	struct extended_signature *ext_sig;
+
+	total_size = get_totalsize(mc_header);
+	data_size = get_datasize(mc_header);
+
+	if (data_size + MC_HEADER_SIZE > total_size) {
+		if (print_err)
+			pr_err("error! Bad data size in microcode data file\n");
+		return -EINVAL;
+	}
+
+	if (mc_header->ldrver != 1 || mc_header->hdrver != 1) {
+		if (print_err)
+			pr_err("error! Unknown microcode update format\n");
+		return -EINVAL;
+	}
+	ext_table_size = total_size - (MC_HEADER_SIZE + data_size);
+	if (ext_table_size) {
+		if ((ext_table_size < EXT_HEADER_SIZE)
+		 || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) {
+			if (print_err)
+				pr_err("error! Small exttable size in microcode data file\n");
+			return -EINVAL;
+		}
+		ext_header = mc + MC_HEADER_SIZE + data_size;
+		if (ext_table_size != exttable_size(ext_header)) {
+			if (print_err)
+				pr_err("error! Bad exttable size in microcode data file\n");
+			return -EFAULT;
+		}
+		ext_sigcount = ext_header->count;
+	}
+
+	/* check extended table checksum */
+	if (ext_table_size) {
+		int ext_table_sum = 0;
+		int *ext_tablep = (int *)ext_header;
+
+		i = ext_table_size / DWSIZE;
+		while (i--)
+			ext_table_sum += ext_tablep[i];
+		if (ext_table_sum) {
+			if (print_err)
+				pr_warn("aborting, bad extended signature table checksum\n");
+			return -EINVAL;
+		}
+	}
+
+	/* calculate the checksum */
+	orig_sum = 0;
+	i = (MC_HEADER_SIZE + data_size) / DWSIZE;
+	while (i--)
+		orig_sum += ((int *)mc)[i];
+	if (orig_sum) {
+		if (print_err)
+			pr_err("aborting, bad checksum\n");
+		return -EINVAL;
+	}
+	if (!ext_table_size)
+		return 0;
+	/* check extended signature checksum */
+	for (i = 0; i < ext_sigcount; i++) {
+		ext_sig = (void *)ext_header + EXT_HEADER_SIZE +
+			  EXT_SIGNATURE_SIZE * i;
+		sum = orig_sum
+			- (mc_header->sig + mc_header->pf + mc_header->cksum)
+			+ (ext_sig->sig + ext_sig->pf + ext_sig->cksum);
+		if (sum) {
+			if (print_err)
+				pr_err("aborting, bad checksum\n");
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(microcode_sanity_check);
+
+/*
+ * return 0 - no update found
+ * return 1 - found update
+ */
+int get_matching_sig(unsigned int csig, int cpf, void *mc, int rev)
+{
+	struct microcode_header_intel *mc_header = mc;
+	struct extended_sigtable *ext_header;
+	unsigned long total_size = get_totalsize(mc_header);
+	int ext_sigcount, i;
+	struct extended_signature *ext_sig;
+
+	if (update_match_cpu(csig, cpf, mc_header->sig, mc_header->pf))
+		return 1;
+
+	/* Look for ext. headers: */
+	if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE)
+		return 0;
+
+	ext_header = mc + get_datasize(mc_header) + MC_HEADER_SIZE;
+	ext_sigcount = ext_header->count;
+	ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
+
+	for (i = 0; i < ext_sigcount; i++) {
+		if (update_match_cpu(csig, cpf, ext_sig->sig, ext_sig->pf))
+			return 1;
+		ext_sig++;
+	}
+	return 0;
+}
+
+/*
+ * return 0 - no update found
+ * return 1 - found update
+ */
+int get_matching_microcode(unsigned int csig, int cpf, void *mc, int rev)
+{
+	struct microcode_header_intel *mc_header = mc;
+
+	if (!update_match_revision(mc_header, rev))
+		return 0;
+
+	return get_matching_sig(csig, cpf, mc, rev);
+}
+EXPORT_SYMBOL_GPL(get_matching_microcode);
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
deleted file mode 100644
index 4a6ff747aaad..000000000000
--- a/arch/x86/kernel/microcode_amd.c
+++ /dev/null
@@ -1,492 +0,0 @@
-/*
- *  AMD CPU Microcode Update Driver for Linux
- *  Copyright (C) 2008-2011 Advanced Micro Devices Inc.
- *
- *  Author: Peter Oruba <peter.oruba@amd.com>
- *
- *  Based on work by:
- *  Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
- *
- *  Maintainers:
- *  Andreas Herrmann <herrmann.der.user@googlemail.com>
- *  Borislav Petkov <bp@alien8.de>
- *
- *  This driver allows to upgrade microcode on F10h AMD
- *  CPUs and later.
- *
- *  Licensed under the terms of the GNU General Public
- *  License version 2. See file COPYING for details.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/firmware.h>
-#include <linux/pci_ids.h>
-#include <linux/uaccess.h>
-#include <linux/vmalloc.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-
-#include <asm/microcode.h>
-#include <asm/processor.h>
-#include <asm/msr.h>
-#include <asm/microcode_amd.h>
-
-MODULE_DESCRIPTION("AMD Microcode Update Driver");
-MODULE_AUTHOR("Peter Oruba");
-MODULE_LICENSE("GPL v2");
-
-static struct equiv_cpu_entry *equiv_cpu_table;
-
-struct ucode_patch {
-	struct list_head plist;
-	void *data;
-	u32 patch_id;
-	u16 equiv_cpu;
-};
-
-static LIST_HEAD(pcache);
-
-static u16 __find_equiv_id(unsigned int cpu)
-{
-	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-	return find_equiv_id(equiv_cpu_table, uci->cpu_sig.sig);
-}
-
-static u32 find_cpu_family_by_equiv_cpu(u16 equiv_cpu)
-{
-	int i = 0;
-
-	BUG_ON(!equiv_cpu_table);
-
-	while (equiv_cpu_table[i].equiv_cpu != 0) {
-		if (equiv_cpu == equiv_cpu_table[i].equiv_cpu)
-			return equiv_cpu_table[i].installed_cpu;
-		i++;
-	}
-	return 0;
-}
-
-/*
- * a small, trivial cache of per-family ucode patches
- */
-static struct ucode_patch *cache_find_patch(u16 equiv_cpu)
-{
-	struct ucode_patch *p;
-
-	list_for_each_entry(p, &pcache, plist)
-		if (p->equiv_cpu == equiv_cpu)
-			return p;
-	return NULL;
-}
-
-static void update_cache(struct ucode_patch *new_patch)
-{
-	struct ucode_patch *p;
-
-	list_for_each_entry(p, &pcache, plist) {
-		if (p->equiv_cpu == new_patch->equiv_cpu) {
-			if (p->patch_id >= new_patch->patch_id)
-				/* we already have the latest patch */
-				return;
-
-			list_replace(&p->plist, &new_patch->plist);
-			kfree(p->data);
-			kfree(p);
-			return;
-		}
-	}
-	/* no patch found, add it */
-	list_add_tail(&new_patch->plist, &pcache);
-}
-
-static void free_cache(void)
-{
-	struct ucode_patch *p, *tmp;
-
-	list_for_each_entry_safe(p, tmp, &pcache, plist) {
-		__list_del(p->plist.prev, p->plist.next);
-		kfree(p->data);
-		kfree(p);
-	}
-}
-
-static struct ucode_patch *find_patch(unsigned int cpu)
-{
-	u16 equiv_id;
-
-	equiv_id = __find_equiv_id(cpu);
-	if (!equiv_id)
-		return NULL;
-
-	return cache_find_patch(equiv_id);
-}
-
-static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
-{
-	struct cpuinfo_x86 *c = &cpu_data(cpu);
-	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-	struct ucode_patch *p;
-
-	csig->sig = cpuid_eax(0x00000001);
-	csig->rev = c->microcode;
-
-	/*
-	 * a patch could have been loaded early, set uci->mc so that
-	 * mc_bp_resume() can call apply_microcode()
-	 */
-	p = find_patch(cpu);
-	if (p && (p->patch_id == csig->rev))
-		uci->mc = p->data;
-
-	pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev);
-
-	return 0;
-}
-
-static unsigned int verify_patch_size(u8 family, u32 patch_size,
-				      unsigned int size)
-{
-	u32 max_size;
-
-#define F1XH_MPB_MAX_SIZE 2048
-#define F14H_MPB_MAX_SIZE 1824
-#define F15H_MPB_MAX_SIZE 4096
-#define F16H_MPB_MAX_SIZE 3458
-
-	switch (family) {
-	case 0x14:
-		max_size = F14H_MPB_MAX_SIZE;
-		break;
-	case 0x15:
-		max_size = F15H_MPB_MAX_SIZE;
-		break;
-	case 0x16:
-		max_size = F16H_MPB_MAX_SIZE;
-		break;
-	default:
-		max_size = F1XH_MPB_MAX_SIZE;
-		break;
-	}
-
-	if (patch_size > min_t(u32, size, max_size)) {
-		pr_err("patch size mismatch\n");
-		return 0;
-	}
-
-	return patch_size;
-}
-
-int __apply_microcode_amd(struct microcode_amd *mc_amd)
-{
-	u32 rev, dummy;
-
-	native_wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code);
-
-	/* verify patch application was successful */
-	native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
-	if (rev != mc_amd->hdr.patch_id)
-		return -1;
-
-	return 0;
-}
-
-int apply_microcode_amd(int cpu)
-{
-	struct cpuinfo_x86 *c = &cpu_data(cpu);
-	struct microcode_amd *mc_amd;
-	struct ucode_cpu_info *uci;
-	struct ucode_patch *p;
-	u32 rev, dummy;
-
-	BUG_ON(raw_smp_processor_id() != cpu);
-
-	uci = ucode_cpu_info + cpu;
-
-	p = find_patch(cpu);
-	if (!p)
-		return 0;
-
-	mc_amd  = p->data;
-	uci->mc = p->data;
-
-	rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
-
-	/* need to apply patch? */
-	if (rev >= mc_amd->hdr.patch_id) {
-		c->microcode = rev;
-		uci->cpu_sig.rev = rev;
-		return 0;
-	}
-
-	if (__apply_microcode_amd(mc_amd)) {
-		pr_err("CPU%d: update failed for patch_level=0x%08x\n",
-			cpu, mc_amd->hdr.patch_id);
-		return -1;
-	}
-	pr_info("CPU%d: new patch_level=0x%08x\n", cpu,
-		mc_amd->hdr.patch_id);
-
-	uci->cpu_sig.rev = mc_amd->hdr.patch_id;
-	c->microcode = mc_amd->hdr.patch_id;
-
-	return 0;
-}
-
-static int install_equiv_cpu_table(const u8 *buf)
-{
-	unsigned int *ibuf = (unsigned int *)buf;
-	unsigned int type = ibuf[1];
-	unsigned int size = ibuf[2];
-
-	if (type != UCODE_EQUIV_CPU_TABLE_TYPE || !size) {
-		pr_err("empty section/"
-		       "invalid type field in container file section header\n");
-		return -EINVAL;
-	}
-
-	equiv_cpu_table = vmalloc(size);
-	if (!equiv_cpu_table) {
-		pr_err("failed to allocate equivalent CPU table\n");
-		return -ENOMEM;
-	}
-
-	memcpy(equiv_cpu_table, buf + CONTAINER_HDR_SZ, size);
-
-	/* add header length */
-	return size + CONTAINER_HDR_SZ;
-}
-
-static void free_equiv_cpu_table(void)
-{
-	vfree(equiv_cpu_table);
-	equiv_cpu_table = NULL;
-}
-
-static void cleanup(void)
-{
-	free_equiv_cpu_table();
-	free_cache();
-}
-
-/*
- * We return the current size even if some of the checks failed so that
- * we can skip over the next patch. If we return a negative value, we
- * signal a grave error like a memory allocation has failed and the
- * driver cannot continue functioning normally. In such cases, we tear
- * down everything we've used up so far and exit.
- */
-static int verify_and_add_patch(u8 family, u8 *fw, unsigned int leftover)
-{
-	struct microcode_header_amd *mc_hdr;
-	struct ucode_patch *patch;
-	unsigned int patch_size, crnt_size, ret;
-	u32 proc_fam;
-	u16 proc_id;
-
-	patch_size  = *(u32 *)(fw + 4);
-	crnt_size   = patch_size + SECTION_HDR_SIZE;
-	mc_hdr	    = (struct microcode_header_amd *)(fw + SECTION_HDR_SIZE);
-	proc_id	    = mc_hdr->processor_rev_id;
-
-	proc_fam = find_cpu_family_by_equiv_cpu(proc_id);
-	if (!proc_fam) {
-		pr_err("No patch family for equiv ID: 0x%04x\n", proc_id);
-		return crnt_size;
-	}
-
-	/* check if patch is for the current family */
-	proc_fam = ((proc_fam >> 8) & 0xf) + ((proc_fam >> 20) & 0xff);
-	if (proc_fam != family)
-		return crnt_size;
-
-	if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) {
-		pr_err("Patch-ID 0x%08x: chipset-specific code unsupported.\n",
-			mc_hdr->patch_id);
-		return crnt_size;
-	}
-
-	ret = verify_patch_size(family, patch_size, leftover);
-	if (!ret) {
-		pr_err("Patch-ID 0x%08x: size mismatch.\n", mc_hdr->patch_id);
-		return crnt_size;
-	}
-
-	patch = kzalloc(sizeof(*patch), GFP_KERNEL);
-	if (!patch) {
-		pr_err("Patch allocation failure.\n");
-		return -EINVAL;
-	}
-
-	patch->data = kzalloc(patch_size, GFP_KERNEL);
-	if (!patch->data) {
-		pr_err("Patch data allocation failure.\n");
-		kfree(patch);
-		return -EINVAL;
-	}
-
-	/* All looks ok, copy patch... */
-	memcpy(patch->data, fw + SECTION_HDR_SIZE, patch_size);
-	INIT_LIST_HEAD(&patch->plist);
-	patch->patch_id  = mc_hdr->patch_id;
-	patch->equiv_cpu = proc_id;
-
-	pr_debug("%s: Added patch_id: 0x%08x, proc_id: 0x%04x\n",
-		 __func__, patch->patch_id, proc_id);
-
-	/* ... and add to cache. */
-	update_cache(patch);
-
-	return crnt_size;
-}
-
-static enum ucode_state __load_microcode_amd(u8 family, const u8 *data,
-					     size_t size)
-{
-	enum ucode_state ret = UCODE_ERROR;
-	unsigned int leftover;
-	u8 *fw = (u8 *)data;
-	int crnt_size = 0;
-	int offset;
-
-	offset = install_equiv_cpu_table(data);
-	if (offset < 0) {
-		pr_err("failed to create equivalent cpu table\n");
-		return ret;
-	}
-	fw += offset;
-	leftover = size - offset;
-
-	if (*(u32 *)fw != UCODE_UCODE_TYPE) {
-		pr_err("invalid type field in container file section header\n");
-		free_equiv_cpu_table();
-		return ret;
-	}
-
-	while (leftover) {
-		crnt_size = verify_and_add_patch(family, fw, leftover);
-		if (crnt_size < 0)
-			return ret;
-
-		fw	 += crnt_size;
-		leftover -= crnt_size;
-	}
-
-	return UCODE_OK;
-}
-
-enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size)
-{
-	enum ucode_state ret;
-
-	/* free old equiv table */
-	free_equiv_cpu_table();
-
-	ret = __load_microcode_amd(family, data, size);
-
-	if (ret != UCODE_OK)
-		cleanup();
-
-#if defined(CONFIG_MICROCODE_AMD_EARLY) && defined(CONFIG_X86_32)
-	/* save BSP's matching patch for early load */
-	if (cpu_data(smp_processor_id()).cpu_index == boot_cpu_data.cpu_index) {
-		struct ucode_patch *p = find_patch(smp_processor_id());
-		if (p) {
-			memset(amd_ucode_patch, 0, PATCH_MAX_SIZE);
-			memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data),
-							       PATCH_MAX_SIZE));
-		}
-	}
-#endif
-	return ret;
-}
-
-/*
- * AMD microcode firmware naming convention, up to family 15h they are in
- * the legacy file:
- *
- *    amd-ucode/microcode_amd.bin
- *
- * This legacy file is always smaller than 2K in size.
- *
- * Beginning with family 15h, they are in family-specific firmware files:
- *
- *    amd-ucode/microcode_amd_fam15h.bin
- *    amd-ucode/microcode_amd_fam16h.bin
- *    ...
- *
- * These might be larger than 2K.
- */
-static enum ucode_state request_microcode_amd(int cpu, struct device *device,
-					      bool refresh_fw)
-{
-	char fw_name[36] = "amd-ucode/microcode_amd.bin";
-	struct cpuinfo_x86 *c = &cpu_data(cpu);
-	enum ucode_state ret = UCODE_NFOUND;
-	const struct firmware *fw;
-
-	/* reload ucode container only on the boot cpu */
-	if (!refresh_fw || c->cpu_index != boot_cpu_data.cpu_index)
-		return UCODE_OK;
-
-	if (c->x86 >= 0x15)
-		snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86);
-
-	if (request_firmware(&fw, (const char *)fw_name, device)) {
-		pr_debug("failed to load file %s\n", fw_name);
-		goto out;
-	}
-
-	ret = UCODE_ERROR;
-	if (*(u32 *)fw->data != UCODE_MAGIC) {
-		pr_err("invalid magic value (0x%08x)\n", *(u32 *)fw->data);
-		goto fw_release;
-	}
-
-	ret = load_microcode_amd(c->x86, fw->data, fw->size);
-
- fw_release:
-	release_firmware(fw);
-
- out:
-	return ret;
-}
-
-static enum ucode_state
-request_microcode_user(int cpu, const void __user *buf, size_t size)
-{
-	return UCODE_ERROR;
-}
-
-static void microcode_fini_cpu_amd(int cpu)
-{
-	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-
-	uci->mc = NULL;
-}
-
-static struct microcode_ops microcode_amd_ops = {
-	.request_microcode_user           = request_microcode_user,
-	.request_microcode_fw             = request_microcode_amd,
-	.collect_cpu_info                 = collect_cpu_info_amd,
-	.apply_microcode                  = apply_microcode_amd,
-	.microcode_fini_cpu               = microcode_fini_cpu_amd,
-};
-
-struct microcode_ops * __init init_amd_microcode(void)
-{
-	struct cpuinfo_x86 *c = &cpu_data(0);
-
-	if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) {
-		pr_warning("AMD CPU family 0x%x not supported\n", c->x86);
-		return NULL;
-	}
-
-	return &microcode_amd_ops;
-}
-
-void __exit exit_amd_microcode(void)
-{
-	cleanup();
-}
diff --git a/arch/x86/kernel/microcode_amd_early.c b/arch/x86/kernel/microcode_amd_early.c
deleted file mode 100644
index 8384c0fa206f..000000000000
--- a/arch/x86/kernel/microcode_amd_early.c
+++ /dev/null
@@ -1,380 +0,0 @@
-/*
- * Copyright (C) 2013 Advanced Micro Devices, Inc.
- *
- * Author: Jacob Shin <jacob.shin@amd.com>
- * Fixes: Borislav Petkov <bp@suse.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/earlycpio.h>
-#include <linux/initrd.h>
-
-#include <asm/cpu.h>
-#include <asm/setup.h>
-#include <asm/microcode_amd.h>
-
-/*
- * This points to the current valid container of microcode patches which we will
- * save from the initrd before jettisoning its contents.
- */
-static u8 *container;
-static size_t container_size;
-
-static u32 ucode_new_rev;
-u8 amd_ucode_patch[PATCH_MAX_SIZE];
-static u16 this_equiv_id;
-
-struct cpio_data ucode_cpio;
-
-/*
- * Microcode patch container file is prepended to the initrd in cpio format.
- * See Documentation/x86/early-microcode.txt
- */
-static __initdata char ucode_path[] = "kernel/x86/microcode/AuthenticAMD.bin";
-
-static struct cpio_data __init find_ucode_in_initrd(void)
-{
-	long offset = 0;
-	char *path;
-	void *start;
-	size_t size;
-
-#ifdef CONFIG_X86_32
-	struct boot_params *p;
-
-	/*
-	 * On 32-bit, early load occurs before paging is turned on so we need
-	 * to use physical addresses.
-	 */
-	p       = (struct boot_params *)__pa_nodebug(&boot_params);
-	path    = (char *)__pa_nodebug(ucode_path);
-	start   = (void *)p->hdr.ramdisk_image;
-	size    = p->hdr.ramdisk_size;
-#else
-	path    = ucode_path;
-	start   = (void *)(boot_params.hdr.ramdisk_image + PAGE_OFFSET);
-	size    = boot_params.hdr.ramdisk_size;
-#endif
-
-	return find_cpio_data(path, start, size, &offset);
-}
-
-static size_t compute_container_size(u8 *data, u32 total_size)
-{
-	size_t size = 0;
-	u32 *header = (u32 *)data;
-
-	if (header[0] != UCODE_MAGIC ||
-	    header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */
-	    header[2] == 0)                            /* size */
-		return size;
-
-	size = header[2] + CONTAINER_HDR_SZ;
-	total_size -= size;
-	data += size;
-
-	while (total_size) {
-		u16 patch_size;
-
-		header = (u32 *)data;
-
-		if (header[0] != UCODE_UCODE_TYPE)
-			break;
-
-		/*
-		 * Sanity-check patch size.
-		 */
-		patch_size = header[1];
-		if (patch_size > PATCH_MAX_SIZE)
-			break;
-
-		size	   += patch_size + SECTION_HDR_SIZE;
-		data	   += patch_size + SECTION_HDR_SIZE;
-		total_size -= patch_size + SECTION_HDR_SIZE;
-	}
-
-	return size;
-}
-
-/*
- * Early load occurs before we can vmalloc(). So we look for the microcode
- * patch container file in initrd, traverse equivalent cpu table, look for a
- * matching microcode patch, and update, all in initrd memory in place.
- * When vmalloc() is available for use later -- on 64-bit during first AP load,
- * and on 32-bit during save_microcode_in_initrd_amd() -- we can call
- * load_microcode_amd() to save equivalent cpu table and microcode patches in
- * kernel heap memory.
- */
-static void apply_ucode_in_initrd(void *ucode, size_t size)
-{
-	struct equiv_cpu_entry *eq;
-	size_t *cont_sz;
-	u32 *header;
-	u8  *data, **cont;
-	u16 eq_id = 0;
-	int offset, left;
-	u32 rev, eax, ebx, ecx, edx;
-	u32 *new_rev;
-
-#ifdef CONFIG_X86_32
-	new_rev = (u32 *)__pa_nodebug(&ucode_new_rev);
-	cont_sz = (size_t *)__pa_nodebug(&container_size);
-	cont	= (u8 **)__pa_nodebug(&container);
-#else
-	new_rev = &ucode_new_rev;
-	cont_sz = &container_size;
-	cont	= &container;
-#endif
-
-	data   = ucode;
-	left   = size;
-	header = (u32 *)data;
-
-	/* find equiv cpu table */
-	if (header[0] != UCODE_MAGIC ||
-	    header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */
-	    header[2] == 0)                            /* size */
-		return;
-
-	eax = 0x00000001;
-	ecx = 0;
-	native_cpuid(&eax, &ebx, &ecx, &edx);
-
-	while (left > 0) {
-		eq = (struct equiv_cpu_entry *)(data + CONTAINER_HDR_SZ);
-
-		*cont = data;
-
-		/* Advance past the container header */
-		offset = header[2] + CONTAINER_HDR_SZ;
-		data  += offset;
-		left  -= offset;
-
-		eq_id = find_equiv_id(eq, eax);
-		if (eq_id) {
-			this_equiv_id = eq_id;
-			*cont_sz = compute_container_size(*cont, left + offset);
-
-			/*
-			 * truncate how much we need to iterate over in the
-			 * ucode update loop below
-			 */
-			left = *cont_sz - offset;
-			break;
-		}
-
-		/*
-		 * support multiple container files appended together. if this
-		 * one does not have a matching equivalent cpu entry, we fast
-		 * forward to the next container file.
-		 */
-		while (left > 0) {
-			header = (u32 *)data;
-			if (header[0] == UCODE_MAGIC &&
-			    header[1] == UCODE_EQUIV_CPU_TABLE_TYPE)
-				break;
-
-			offset = header[1] + SECTION_HDR_SIZE;
-			data  += offset;
-			left  -= offset;
-		}
-
-		/* mark where the next microcode container file starts */
-		offset    = data - (u8 *)ucode;
-		ucode     = data;
-	}
-
-	if (!eq_id) {
-		*cont = NULL;
-		*cont_sz = 0;
-		return;
-	}
-
-	/* find ucode and update if needed */
-
-	native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, eax);
-
-	while (left > 0) {
-		struct microcode_amd *mc;
-
-		header = (u32 *)data;
-		if (header[0] != UCODE_UCODE_TYPE || /* type */
-		    header[1] == 0)                  /* size */
-			break;
-
-		mc = (struct microcode_amd *)(data + SECTION_HDR_SIZE);
-
-		if (eq_id == mc->hdr.processor_rev_id && rev < mc->hdr.patch_id) {
-
-			if (!__apply_microcode_amd(mc)) {
-				rev = mc->hdr.patch_id;
-				*new_rev = rev;
-
-				/* save ucode patch */
-				memcpy(amd_ucode_patch, mc,
-				       min_t(u32, header[1], PATCH_MAX_SIZE));
-			}
-		}
-
-		offset  = header[1] + SECTION_HDR_SIZE;
-		data   += offset;
-		left   -= offset;
-	}
-}
-
-void __init load_ucode_amd_bsp(void)
-{
-	struct cpio_data cp;
-	void **data;
-	size_t *size;
-
-#ifdef CONFIG_X86_32
-	data =  (void **)__pa_nodebug(&ucode_cpio.data);
-	size = (size_t *)__pa_nodebug(&ucode_cpio.size);
-#else
-	data = &ucode_cpio.data;
-	size = &ucode_cpio.size;
-#endif
-
-	cp = find_ucode_in_initrd();
-	if (!cp.data)
-		return;
-
-	*data = cp.data;
-	*size = cp.size;
-
-	apply_ucode_in_initrd(cp.data, cp.size);
-}
-
-#ifdef CONFIG_X86_32
-/*
- * On 32-bit, since AP's early load occurs before paging is turned on, we
- * cannot traverse cpu_equiv_table and pcache in kernel heap memory. So during
- * cold boot, AP will apply_ucode_in_initrd() just like the BSP. During
- * save_microcode_in_initrd_amd() BSP's patch is copied to amd_ucode_patch,
- * which is used upon resume from suspend.
- */
-void load_ucode_amd_ap(void)
-{
-	struct microcode_amd *mc;
-	size_t *usize;
-	void **ucode;
-
-	mc = (struct microcode_amd *)__pa(amd_ucode_patch);
-	if (mc->hdr.patch_id && mc->hdr.processor_rev_id) {
-		__apply_microcode_amd(mc);
-		return;
-	}
-
-	ucode = (void *)__pa_nodebug(&container);
-	usize = (size_t *)__pa_nodebug(&container_size);
-
-	if (!*ucode || !*usize)
-		return;
-
-	apply_ucode_in_initrd(*ucode, *usize);
-}
-
-static void __init collect_cpu_sig_on_bsp(void *arg)
-{
-	unsigned int cpu = smp_processor_id();
-	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-
-	uci->cpu_sig.sig = cpuid_eax(0x00000001);
-}
-#else
-void load_ucode_amd_ap(void)
-{
-	unsigned int cpu = smp_processor_id();
-	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-	struct equiv_cpu_entry *eq;
-	struct microcode_amd *mc;
-	u32 rev, eax;
-	u16 eq_id;
-
-	/* Exit if called on the BSP. */
-	if (!cpu)
-		return;
-
-	if (!container)
-		return;
-
-	rdmsr(MSR_AMD64_PATCH_LEVEL, rev, eax);
-
-	uci->cpu_sig.rev = rev;
-	uci->cpu_sig.sig = eax;
-
-	eax = cpuid_eax(0x00000001);
-	eq  = (struct equiv_cpu_entry *)(container + CONTAINER_HDR_SZ);
-
-	eq_id = find_equiv_id(eq, eax);
-	if (!eq_id)
-		return;
-
-	if (eq_id == this_equiv_id) {
-		mc = (struct microcode_amd *)amd_ucode_patch;
-
-		if (mc && rev < mc->hdr.patch_id) {
-			if (!__apply_microcode_amd(mc))
-				ucode_new_rev = mc->hdr.patch_id;
-		}
-
-	} else {
-		if (!ucode_cpio.data)
-			return;
-
-		/*
-		 * AP has a different equivalence ID than BSP, looks like
-		 * mixed-steppings silicon so go through the ucode blob anew.
-		 */
-		apply_ucode_in_initrd(ucode_cpio.data, ucode_cpio.size);
-	}
-}
-#endif
-
-int __init save_microcode_in_initrd_amd(void)
-{
-	enum ucode_state ret;
-	u32 eax;
-
-#ifdef CONFIG_X86_32
-	unsigned int bsp = boot_cpu_data.cpu_index;
-	struct ucode_cpu_info *uci = ucode_cpu_info + bsp;
-
-	if (!uci->cpu_sig.sig)
-		smp_call_function_single(bsp, collect_cpu_sig_on_bsp, NULL, 1);
-
-	/*
-	 * Take into account the fact that the ramdisk might get relocated
-	 * and therefore we need to recompute the container's position in
-	 * virtual memory space.
-	 */
-	container = (u8 *)(__va((u32)relocated_ramdisk) +
-			   ((u32)container - boot_params.hdr.ramdisk_image));
-#endif
-	if (ucode_new_rev)
-		pr_info("microcode: updated early to new patch_level=0x%08x\n",
-			ucode_new_rev);
-
-	if (!container)
-		return -EINVAL;
-
-	eax   = cpuid_eax(0x00000001);
-	eax   = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
-
-	ret = load_microcode_amd(eax, container, container_size);
-	if (ret != UCODE_OK)
-		return -EINVAL;
-
-	/*
-	 * This will be freed any msec now, stash patches for the current
-	 * family and switch to patch cache for cpu hotplug, etc later.
-	 */
-	container = NULL;
-	container_size = 0;
-
-	return 0;
-}
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
deleted file mode 100644
index 15c987698b0f..000000000000
--- a/arch/x86/kernel/microcode_core.c
+++ /dev/null
@@ -1,645 +0,0 @@
-/*
- *	Intel CPU Microcode Update Driver for Linux
- *
- *	Copyright (C) 2000-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
- *		      2006	Shaohua Li <shaohua.li@intel.com>
- *
- *	This driver allows to upgrade microcode on Intel processors
- *	belonging to IA-32 family - PentiumPro, Pentium II,
- *	Pentium III, Xeon, Pentium 4, etc.
- *
- *	Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture
- *	Software Developer's Manual
- *	Order Number 253668 or free download from:
- *
- *	http://developer.intel.com/Assets/PDF/manual/253668.pdf	
- *
- *	For more information, go to http://www.urbanmyth.org/microcode
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- *
- *	1.0	16 Feb 2000, Tigran Aivazian <tigran@sco.com>
- *		Initial release.
- *	1.01	18 Feb 2000, Tigran Aivazian <tigran@sco.com>
- *		Added read() support + cleanups.
- *	1.02	21 Feb 2000, Tigran Aivazian <tigran@sco.com>
- *		Added 'device trimming' support. open(O_WRONLY) zeroes
- *		and frees the saved copy of applied microcode.
- *	1.03	29 Feb 2000, Tigran Aivazian <tigran@sco.com>
- *		Made to use devfs (/dev/cpu/microcode) + cleanups.
- *	1.04	06 Jun 2000, Simon Trimmer <simon@veritas.com>
- *		Added misc device support (now uses both devfs and misc).
- *		Added MICROCODE_IOCFREE ioctl to clear memory.
- *	1.05	09 Jun 2000, Simon Trimmer <simon@veritas.com>
- *		Messages for error cases (non Intel & no suitable microcode).
- *	1.06	03 Aug 2000, Tigran Aivazian <tigran@veritas.com>
- *		Removed ->release(). Removed exclusive open and status bitmap.
- *		Added microcode_rwsem to serialize read()/write()/ioctl().
- *		Removed global kernel lock usage.
- *	1.07	07 Sep 2000, Tigran Aivazian <tigran@veritas.com>
- *		Write 0 to 0x8B msr and then cpuid before reading revision,
- *		so that it works even if there were no update done by the
- *		BIOS. Otherwise, reading from 0x8B gives junk (which happened
- *		to be 0 on my machine which is why it worked even when I
- *		disabled update by the BIOS)
- *		Thanks to Eric W. Biederman <ebiederman@lnxi.com> for the fix.
- *	1.08	11 Dec 2000, Richard Schaal <richard.schaal@intel.com> and
- *			     Tigran Aivazian <tigran@veritas.com>
- *		Intel Pentium 4 processor support and bugfixes.
- *	1.09	30 Oct 2001, Tigran Aivazian <tigran@veritas.com>
- *		Bugfix for HT (Hyper-Threading) enabled processors
- *		whereby processor resources are shared by all logical processors
- *		in a single CPU package.
- *	1.10	28 Feb 2002 Asit K Mallick <asit.k.mallick@intel.com> and
- *		Tigran Aivazian <tigran@veritas.com>,
- *		Serialize updates as required on HT processors due to
- *		speculative nature of implementation.
- *	1.11	22 Mar 2002 Tigran Aivazian <tigran@veritas.com>
- *		Fix the panic when writing zero-length microcode chunk.
- *	1.12	29 Sep 2003 Nitin Kamble <nitin.a.kamble@intel.com>,
- *		Jun Nakajima <jun.nakajima@intel.com>
- *		Support for the microcode updates in the new format.
- *	1.13	10 Oct 2003 Tigran Aivazian <tigran@veritas.com>
- *		Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl
- *		because we no longer hold a copy of applied microcode
- *		in kernel memory.
- *	1.14	25 Jun 2004 Tigran Aivazian <tigran@veritas.com>
- *		Fix sigmatch() macro to handle old CPUs with pf == 0.
- *		Thanks to Stuart Swales for pointing out this bug.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/platform_device.h>
-#include <linux/miscdevice.h>
-#include <linux/capability.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/cpu.h>
-#include <linux/fs.h>
-#include <linux/mm.h>
-#include <linux/syscore_ops.h>
-
-#include <asm/microcode.h>
-#include <asm/processor.h>
-#include <asm/cpu_device_id.h>
-#include <asm/perf_event.h>
-
-MODULE_DESCRIPTION("Microcode Update Driver");
-MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
-MODULE_LICENSE("GPL");
-
-#define MICROCODE_VERSION	"2.00"
-
-static struct microcode_ops	*microcode_ops;
-
-/*
- * Synchronization.
- *
- * All non cpu-hotplug-callback call sites use:
- *
- * - microcode_mutex to synchronize with each other;
- * - get/put_online_cpus() to synchronize with
- *   the cpu-hotplug-callback call sites.
- *
- * We guarantee that only a single cpu is being
- * updated at any particular moment of time.
- */
-static DEFINE_MUTEX(microcode_mutex);
-
-struct ucode_cpu_info		ucode_cpu_info[NR_CPUS];
-EXPORT_SYMBOL_GPL(ucode_cpu_info);
-
-/*
- * Operations that are run on a target cpu:
- */
-
-struct cpu_info_ctx {
-	struct cpu_signature	*cpu_sig;
-	int			err;
-};
-
-static void collect_cpu_info_local(void *arg)
-{
-	struct cpu_info_ctx *ctx = arg;
-
-	ctx->err = microcode_ops->collect_cpu_info(smp_processor_id(),
-						   ctx->cpu_sig);
-}
-
-static int collect_cpu_info_on_target(int cpu, struct cpu_signature *cpu_sig)
-{
-	struct cpu_info_ctx ctx = { .cpu_sig = cpu_sig, .err = 0 };
-	int ret;
-
-	ret = smp_call_function_single(cpu, collect_cpu_info_local, &ctx, 1);
-	if (!ret)
-		ret = ctx.err;
-
-	return ret;
-}
-
-static int collect_cpu_info(int cpu)
-{
-	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-	int ret;
-
-	memset(uci, 0, sizeof(*uci));
-
-	ret = collect_cpu_info_on_target(cpu, &uci->cpu_sig);
-	if (!ret)
-		uci->valid = 1;
-
-	return ret;
-}
-
-struct apply_microcode_ctx {
-	int err;
-};
-
-static void apply_microcode_local(void *arg)
-{
-	struct apply_microcode_ctx *ctx = arg;
-
-	ctx->err = microcode_ops->apply_microcode(smp_processor_id());
-}
-
-static int apply_microcode_on_target(int cpu)
-{
-	struct apply_microcode_ctx ctx = { .err = 0 };
-	int ret;
-
-	ret = smp_call_function_single(cpu, apply_microcode_local, &ctx, 1);
-	if (!ret)
-		ret = ctx.err;
-
-	return ret;
-}
-
-#ifdef CONFIG_MICROCODE_OLD_INTERFACE
-static int do_microcode_update(const void __user *buf, size_t size)
-{
-	int error = 0;
-	int cpu;
-
-	for_each_online_cpu(cpu) {
-		struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-		enum ucode_state ustate;
-
-		if (!uci->valid)
-			continue;
-
-		ustate = microcode_ops->request_microcode_user(cpu, buf, size);
-		if (ustate == UCODE_ERROR) {
-			error = -1;
-			break;
-		} else if (ustate == UCODE_OK)
-			apply_microcode_on_target(cpu);
-	}
-
-	return error;
-}
-
-static int microcode_open(struct inode *inode, struct file *file)
-{
-	return capable(CAP_SYS_RAWIO) ? nonseekable_open(inode, file) : -EPERM;
-}
-
-static ssize_t microcode_write(struct file *file, const char __user *buf,
-			       size_t len, loff_t *ppos)
-{
-	ssize_t ret = -EINVAL;
-
-	if ((len >> PAGE_SHIFT) > totalram_pages) {
-		pr_err("too much data (max %ld pages)\n", totalram_pages);
-		return ret;
-	}
-
-	get_online_cpus();
-	mutex_lock(&microcode_mutex);
-
-	if (do_microcode_update(buf, len) == 0)
-		ret = (ssize_t)len;
-
-	if (ret > 0)
-		perf_check_microcode();
-
-	mutex_unlock(&microcode_mutex);
-	put_online_cpus();
-
-	return ret;
-}
-
-static const struct file_operations microcode_fops = {
-	.owner			= THIS_MODULE,
-	.write			= microcode_write,
-	.open			= microcode_open,
-	.llseek		= no_llseek,
-};
-
-static struct miscdevice microcode_dev = {
-	.minor			= MICROCODE_MINOR,
-	.name			= "microcode",
-	.nodename		= "cpu/microcode",
-	.fops			= &microcode_fops,
-};
-
-static int __init microcode_dev_init(void)
-{
-	int error;
-
-	error = misc_register(&microcode_dev);
-	if (error) {
-		pr_err("can't misc_register on minor=%d\n", MICROCODE_MINOR);
-		return error;
-	}
-
-	return 0;
-}
-
-static void __exit microcode_dev_exit(void)
-{
-	misc_deregister(&microcode_dev);
-}
-
-MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
-MODULE_ALIAS("devname:cpu/microcode");
-#else
-#define microcode_dev_init()	0
-#define microcode_dev_exit()	do { } while (0)
-#endif
-
-/* fake device for request_firmware */
-static struct platform_device	*microcode_pdev;
-
-static int reload_for_cpu(int cpu)
-{
-	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-	enum ucode_state ustate;
-	int err = 0;
-
-	if (!uci->valid)
-		return err;
-
-	ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev, true);
-	if (ustate == UCODE_OK)
-		apply_microcode_on_target(cpu);
-	else
-		if (ustate == UCODE_ERROR)
-			err = -EINVAL;
-	return err;
-}
-
-static ssize_t reload_store(struct device *dev,
-			    struct device_attribute *attr,
-			    const char *buf, size_t size)
-{
-	unsigned long val;
-	int cpu;
-	ssize_t ret = 0, tmp_ret;
-
-	ret = kstrtoul(buf, 0, &val);
-	if (ret)
-		return ret;
-
-	if (val != 1)
-		return size;
-
-	get_online_cpus();
-	mutex_lock(&microcode_mutex);
-	for_each_online_cpu(cpu) {
-		tmp_ret = reload_for_cpu(cpu);
-		if (tmp_ret != 0)
-			pr_warn("Error reloading microcode on CPU %d\n", cpu);
-
-		/* save retval of the first encountered reload error */
-		if (!ret)
-			ret = tmp_ret;
-	}
-	if (!ret)
-		perf_check_microcode();
-	mutex_unlock(&microcode_mutex);
-	put_online_cpus();
-
-	if (!ret)
-		ret = size;
-
-	return ret;
-}
-
-static ssize_t version_show(struct device *dev,
-			struct device_attribute *attr, char *buf)
-{
-	struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
-
-	return sprintf(buf, "0x%x\n", uci->cpu_sig.rev);
-}
-
-static ssize_t pf_show(struct device *dev,
-			struct device_attribute *attr, char *buf)
-{
-	struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
-
-	return sprintf(buf, "0x%x\n", uci->cpu_sig.pf);
-}
-
-static DEVICE_ATTR(reload, 0200, NULL, reload_store);
-static DEVICE_ATTR(version, 0400, version_show, NULL);
-static DEVICE_ATTR(processor_flags, 0400, pf_show, NULL);
-
-static struct attribute *mc_default_attrs[] = {
-	&dev_attr_version.attr,
-	&dev_attr_processor_flags.attr,
-	NULL
-};
-
-static struct attribute_group mc_attr_group = {
-	.attrs			= mc_default_attrs,
-	.name			= "microcode",
-};
-
-static void microcode_fini_cpu(int cpu)
-{
-	microcode_ops->microcode_fini_cpu(cpu);
-}
-
-static enum ucode_state microcode_resume_cpu(int cpu)
-{
-	pr_debug("CPU%d updated upon resume\n", cpu);
-
-	if (apply_microcode_on_target(cpu))
-		return UCODE_ERROR;
-
-	return UCODE_OK;
-}
-
-static enum ucode_state microcode_init_cpu(int cpu, bool refresh_fw)
-{
-	enum ucode_state ustate;
-	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-
-	if (uci && uci->valid)
-		return UCODE_OK;
-
-	if (collect_cpu_info(cpu))
-		return UCODE_ERROR;
-
-	/* --dimm. Trigger a delayed update? */
-	if (system_state != SYSTEM_RUNNING)
-		return UCODE_NFOUND;
-
-	ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev,
-						     refresh_fw);
-
-	if (ustate == UCODE_OK) {
-		pr_debug("CPU%d updated upon init\n", cpu);
-		apply_microcode_on_target(cpu);
-	}
-
-	return ustate;
-}
-
-static enum ucode_state microcode_update_cpu(int cpu)
-{
-	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-
-	if (uci->valid)
-		return microcode_resume_cpu(cpu);
-
-	return microcode_init_cpu(cpu, false);
-}
-
-static int mc_device_add(struct device *dev, struct subsys_interface *sif)
-{
-	int err, cpu = dev->id;
-
-	if (!cpu_online(cpu))
-		return 0;
-
-	pr_debug("CPU%d added\n", cpu);
-
-	err = sysfs_create_group(&dev->kobj, &mc_attr_group);
-	if (err)
-		return err;
-
-	if (microcode_init_cpu(cpu, true) == UCODE_ERROR)
-		return -EINVAL;
-
-	return err;
-}
-
-static int mc_device_remove(struct device *dev, struct subsys_interface *sif)
-{
-	int cpu = dev->id;
-
-	if (!cpu_online(cpu))
-		return 0;
-
-	pr_debug("CPU%d removed\n", cpu);
-	microcode_fini_cpu(cpu);
-	sysfs_remove_group(&dev->kobj, &mc_attr_group);
-	return 0;
-}
-
-static struct subsys_interface mc_cpu_interface = {
-	.name			= "microcode",
-	.subsys			= &cpu_subsys,
-	.add_dev		= mc_device_add,
-	.remove_dev		= mc_device_remove,
-};
-
-/**
- * mc_bp_resume - Update boot CPU microcode during resume.
- */
-static void mc_bp_resume(void)
-{
-	int cpu = smp_processor_id();
-	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-
-	if (uci->valid && uci->mc)
-		microcode_ops->apply_microcode(cpu);
-}
-
-static struct syscore_ops mc_syscore_ops = {
-	.resume			= mc_bp_resume,
-};
-
-static int
-mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
-{
-	unsigned int cpu = (unsigned long)hcpu;
-	struct device *dev;
-
-	dev = get_cpu_device(cpu);
-
-	switch (action & ~CPU_TASKS_FROZEN) {
-	case CPU_ONLINE:
-		microcode_update_cpu(cpu);
-		pr_debug("CPU%d added\n", cpu);
-		/*
-		 * "break" is missing on purpose here because we want to fall
-		 * through in order to create the sysfs group.
-		 */
-
-	case CPU_DOWN_FAILED:
-		if (sysfs_create_group(&dev->kobj, &mc_attr_group))
-			pr_err("Failed to create group for CPU%d\n", cpu);
-		break;
-
-	case CPU_DOWN_PREPARE:
-		/* Suspend is in progress, only remove the interface */
-		sysfs_remove_group(&dev->kobj, &mc_attr_group);
-		pr_debug("CPU%d removed\n", cpu);
-		break;
-
-	/*
-	 * case CPU_DEAD:
-	 *
-	 * When a CPU goes offline, don't free up or invalidate the copy of
-	 * the microcode in kernel memory, so that we can reuse it when the
-	 * CPU comes back online without unnecessarily requesting the userspace
-	 * for it again.
-	 */
-	}
-
-	/* The CPU refused to come up during a system resume */
-	if (action == CPU_UP_CANCELED_FROZEN)
-		microcode_fini_cpu(cpu);
-
-	return NOTIFY_OK;
-}
-
-static struct notifier_block __refdata mc_cpu_notifier = {
-	.notifier_call	= mc_cpu_callback,
-};
-
-#ifdef MODULE
-/* Autoload on Intel and AMD systems */
-static const struct x86_cpu_id __initconst microcode_id[] = {
-#ifdef CONFIG_MICROCODE_INTEL
-	{ X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, },
-#endif
-#ifdef CONFIG_MICROCODE_AMD
-	{ X86_VENDOR_AMD, X86_FAMILY_ANY, X86_MODEL_ANY, },
-#endif
-	{}
-};
-MODULE_DEVICE_TABLE(x86cpu, microcode_id);
-#endif
-
-static struct attribute *cpu_root_microcode_attrs[] = {
-	&dev_attr_reload.attr,
-	NULL
-};
-
-static struct attribute_group cpu_root_microcode_group = {
-	.name  = "microcode",
-	.attrs = cpu_root_microcode_attrs,
-};
-
-static int __init microcode_init(void)
-{
-	struct cpuinfo_x86 *c = &cpu_data(0);
-	int error;
-
-	if (c->x86_vendor == X86_VENDOR_INTEL)
-		microcode_ops = init_intel_microcode();
-	else if (c->x86_vendor == X86_VENDOR_AMD)
-		microcode_ops = init_amd_microcode();
-	else
-		pr_err("no support for this CPU vendor\n");
-
-	if (!microcode_ops)
-		return -ENODEV;
-
-	microcode_pdev = platform_device_register_simple("microcode", -1,
-							 NULL, 0);
-	if (IS_ERR(microcode_pdev))
-		return PTR_ERR(microcode_pdev);
-
-	get_online_cpus();
-	mutex_lock(&microcode_mutex);
-
-	error = subsys_interface_register(&mc_cpu_interface);
-	if (!error)
-		perf_check_microcode();
-	mutex_unlock(&microcode_mutex);
-	put_online_cpus();
-
-	if (error)
-		goto out_pdev;
-
-	error = sysfs_create_group(&cpu_subsys.dev_root->kobj,
-				   &cpu_root_microcode_group);
-
-	if (error) {
-		pr_err("Error creating microcode group!\n");
-		goto out_driver;
-	}
-
-	error = microcode_dev_init();
-	if (error)
-		goto out_ucode_group;
-
-	register_syscore_ops(&mc_syscore_ops);
-	register_hotcpu_notifier(&mc_cpu_notifier);
-
-	pr_info("Microcode Update Driver: v" MICROCODE_VERSION
-		" <tigran@aivazian.fsnet.co.uk>, Peter Oruba\n");
-
-	return 0;
-
- out_ucode_group:
-	sysfs_remove_group(&cpu_subsys.dev_root->kobj,
-			   &cpu_root_microcode_group);
-
- out_driver:
-	get_online_cpus();
-	mutex_lock(&microcode_mutex);
-
-	subsys_interface_unregister(&mc_cpu_interface);
-
-	mutex_unlock(&microcode_mutex);
-	put_online_cpus();
-
- out_pdev:
-	platform_device_unregister(microcode_pdev);
-	return error;
-
-}
-module_init(microcode_init);
-
-static void __exit microcode_exit(void)
-{
-	struct cpuinfo_x86 *c = &cpu_data(0);
-
-	microcode_dev_exit();
-
-	unregister_hotcpu_notifier(&mc_cpu_notifier);
-	unregister_syscore_ops(&mc_syscore_ops);
-
-	sysfs_remove_group(&cpu_subsys.dev_root->kobj,
-			   &cpu_root_microcode_group);
-
-	get_online_cpus();
-	mutex_lock(&microcode_mutex);
-
-	subsys_interface_unregister(&mc_cpu_interface);
-
-	mutex_unlock(&microcode_mutex);
-	put_online_cpus();
-
-	platform_device_unregister(microcode_pdev);
-
-	microcode_ops = NULL;
-
-	if (c->x86_vendor == X86_VENDOR_AMD)
-		exit_amd_microcode();
-
-	pr_info("Microcode Update Driver: v" MICROCODE_VERSION " removed.\n");
-}
-module_exit(microcode_exit);
diff --git a/arch/x86/kernel/microcode_core_early.c b/arch/x86/kernel/microcode_core_early.c
deleted file mode 100644
index be7f8514f577..000000000000
--- a/arch/x86/kernel/microcode_core_early.c
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- *	X86 CPU microcode early update for Linux
- *
- *	Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com>
- *			   H Peter Anvin" <hpa@zytor.com>
- *
- *	This driver allows to early upgrade microcode on Intel processors
- *	belonging to IA-32 family - PentiumPro, Pentium II,
- *	Pentium III, Xeon, Pentium 4, etc.
- *
- *	Reference: Section 9.11 of Volume 3, IA-32 Intel Architecture
- *	Software Developer's Manual.
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- */
-#include <linux/module.h>
-#include <asm/microcode_intel.h>
-#include <asm/microcode_amd.h>
-#include <asm/processor.h>
-
-#define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24))
-#define CPUID_INTEL1 QCHAR('G', 'e', 'n', 'u')
-#define CPUID_INTEL2 QCHAR('i', 'n', 'e', 'I')
-#define CPUID_INTEL3 QCHAR('n', 't', 'e', 'l')
-#define CPUID_AMD1 QCHAR('A', 'u', 't', 'h')
-#define CPUID_AMD2 QCHAR('e', 'n', 't', 'i')
-#define CPUID_AMD3 QCHAR('c', 'A', 'M', 'D')
-
-#define CPUID_IS(a, b, c, ebx, ecx, edx)	\
-		(!((ebx ^ (a))|(edx ^ (b))|(ecx ^ (c))))
-
-/*
- * In early loading microcode phase on BSP, boot_cpu_data is not set up yet.
- * x86_vendor() gets vendor id for BSP.
- *
- * In 32 bit AP case, accessing boot_cpu_data needs linear address. To simplify
- * coding, we still use x86_vendor() to get vendor id for AP.
- *
- * x86_vendor() gets vendor information directly through cpuid.
- */
-static int x86_vendor(void)
-{
-	u32 eax = 0x00000000;
-	u32 ebx, ecx = 0, edx;
-
-	native_cpuid(&eax, &ebx, &ecx, &edx);
-
-	if (CPUID_IS(CPUID_INTEL1, CPUID_INTEL2, CPUID_INTEL3, ebx, ecx, edx))
-		return X86_VENDOR_INTEL;
-
-	if (CPUID_IS(CPUID_AMD1, CPUID_AMD2, CPUID_AMD3, ebx, ecx, edx))
-		return X86_VENDOR_AMD;
-
-	return X86_VENDOR_UNKNOWN;
-}
-
-static int x86_family(void)
-{
-	u32 eax = 0x00000001;
-	u32 ebx, ecx = 0, edx;
-	int x86;
-
-	native_cpuid(&eax, &ebx, &ecx, &edx);
-
-	x86 = (eax >> 8) & 0xf;
-	if (x86 == 15)
-		x86 += (eax >> 20) & 0xff;
-
-	return x86;
-}
-
-void __init load_ucode_bsp(void)
-{
-	int vendor, x86;
-
-	if (!have_cpuid_p())
-		return;
-
-	vendor = x86_vendor();
-	x86 = x86_family();
-
-	switch (vendor) {
-	case X86_VENDOR_INTEL:
-		if (x86 >= 6)
-			load_ucode_intel_bsp();
-		break;
-	case X86_VENDOR_AMD:
-		if (x86 >= 0x10)
-			load_ucode_amd_bsp();
-		break;
-	default:
-		break;
-	}
-}
-
-void load_ucode_ap(void)
-{
-	int vendor, x86;
-
-	if (!have_cpuid_p())
-		return;
-
-	vendor = x86_vendor();
-	x86 = x86_family();
-
-	switch (vendor) {
-	case X86_VENDOR_INTEL:
-		if (x86 >= 6)
-			load_ucode_intel_ap();
-		break;
-	case X86_VENDOR_AMD:
-		if (x86 >= 0x10)
-			load_ucode_amd_ap();
-		break;
-	default:
-		break;
-	}
-}
-
-int __init save_microcode_in_initrd(void)
-{
-	struct cpuinfo_x86 *c = &boot_cpu_data;
-
-	switch (c->x86_vendor) {
-	case X86_VENDOR_INTEL:
-		if (c->x86 >= 6)
-			save_microcode_in_initrd_intel();
-		break;
-	case X86_VENDOR_AMD:
-		if (c->x86 >= 0x10)
-			save_microcode_in_initrd_amd();
-		break;
-	default:
-		break;
-	}
-
-	return 0;
-}
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
deleted file mode 100644
index 5fb2cebf556b..000000000000
--- a/arch/x86/kernel/microcode_intel.c
+++ /dev/null
@@ -1,333 +0,0 @@
-/*
- *	Intel CPU Microcode Update Driver for Linux
- *
- *	Copyright (C) 2000-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
- *		      2006	Shaohua Li <shaohua.li@intel.com>
- *
- *	This driver allows to upgrade microcode on Intel processors
- *	belonging to IA-32 family - PentiumPro, Pentium II,
- *	Pentium III, Xeon, Pentium 4, etc.
- *
- *	Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture
- *	Software Developer's Manual
- *	Order Number 253668 or free download from:
- *
- *	http://developer.intel.com/Assets/PDF/manual/253668.pdf	
- *
- *	For more information, go to http://www.urbanmyth.org/microcode
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- *
- *	1.0	16 Feb 2000, Tigran Aivazian <tigran@sco.com>
- *		Initial release.
- *	1.01	18 Feb 2000, Tigran Aivazian <tigran@sco.com>
- *		Added read() support + cleanups.
- *	1.02	21 Feb 2000, Tigran Aivazian <tigran@sco.com>
- *		Added 'device trimming' support. open(O_WRONLY) zeroes
- *		and frees the saved copy of applied microcode.
- *	1.03	29 Feb 2000, Tigran Aivazian <tigran@sco.com>
- *		Made to use devfs (/dev/cpu/microcode) + cleanups.
- *	1.04	06 Jun 2000, Simon Trimmer <simon@veritas.com>
- *		Added misc device support (now uses both devfs and misc).
- *		Added MICROCODE_IOCFREE ioctl to clear memory.
- *	1.05	09 Jun 2000, Simon Trimmer <simon@veritas.com>
- *		Messages for error cases (non Intel & no suitable microcode).
- *	1.06	03 Aug 2000, Tigran Aivazian <tigran@veritas.com>
- *		Removed ->release(). Removed exclusive open and status bitmap.
- *		Added microcode_rwsem to serialize read()/write()/ioctl().
- *		Removed global kernel lock usage.
- *	1.07	07 Sep 2000, Tigran Aivazian <tigran@veritas.com>
- *		Write 0 to 0x8B msr and then cpuid before reading revision,
- *		so that it works even if there were no update done by the
- *		BIOS. Otherwise, reading from 0x8B gives junk (which happened
- *		to be 0 on my machine which is why it worked even when I
- *		disabled update by the BIOS)
- *		Thanks to Eric W. Biederman <ebiederman@lnxi.com> for the fix.
- *	1.08	11 Dec 2000, Richard Schaal <richard.schaal@intel.com> and
- *			     Tigran Aivazian <tigran@veritas.com>
- *		Intel Pentium 4 processor support and bugfixes.
- *	1.09	30 Oct 2001, Tigran Aivazian <tigran@veritas.com>
- *		Bugfix for HT (Hyper-Threading) enabled processors
- *		whereby processor resources are shared by all logical processors
- *		in a single CPU package.
- *	1.10	28 Feb 2002 Asit K Mallick <asit.k.mallick@intel.com> and
- *		Tigran Aivazian <tigran@veritas.com>,
- *		Serialize updates as required on HT processors due to
- *		speculative nature of implementation.
- *	1.11	22 Mar 2002 Tigran Aivazian <tigran@veritas.com>
- *		Fix the panic when writing zero-length microcode chunk.
- *	1.12	29 Sep 2003 Nitin Kamble <nitin.a.kamble@intel.com>,
- *		Jun Nakajima <jun.nakajima@intel.com>
- *		Support for the microcode updates in the new format.
- *	1.13	10 Oct 2003 Tigran Aivazian <tigran@veritas.com>
- *		Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl
- *		because we no longer hold a copy of applied microcode
- *		in kernel memory.
- *	1.14	25 Jun 2004 Tigran Aivazian <tigran@veritas.com>
- *		Fix sigmatch() macro to handle old CPUs with pf == 0.
- *		Thanks to Stuart Swales for pointing out this bug.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/firmware.h>
-#include <linux/uaccess.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/vmalloc.h>
-
-#include <asm/microcode_intel.h>
-#include <asm/processor.h>
-#include <asm/msr.h>
-
-MODULE_DESCRIPTION("Microcode Update Driver");
-MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
-MODULE_LICENSE("GPL");
-
-static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
-{
-	struct cpuinfo_x86 *c = &cpu_data(cpu_num);
-	unsigned int val[2];
-
-	memset(csig, 0, sizeof(*csig));
-
-	csig->sig = cpuid_eax(0x00000001);
-
-	if ((c->x86_model >= 5) || (c->x86 > 6)) {
-		/* get processor flags from MSR 0x17 */
-		rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
-		csig->pf = 1 << ((val[1] >> 18) & 7);
-	}
-
-	csig->rev = c->microcode;
-	pr_info("CPU%d sig=0x%x, pf=0x%x, revision=0x%x\n",
-		cpu_num, csig->sig, csig->pf, csig->rev);
-
-	return 0;
-}
-
-/*
- * return 0 - no update found
- * return 1 - found update
- */
-static int get_matching_mc(struct microcode_intel *mc_intel, int cpu)
-{
-	struct cpu_signature cpu_sig;
-	unsigned int csig, cpf, crev;
-
-	collect_cpu_info(cpu, &cpu_sig);
-
-	csig = cpu_sig.sig;
-	cpf = cpu_sig.pf;
-	crev = cpu_sig.rev;
-
-	return get_matching_microcode(csig, cpf, mc_intel, crev);
-}
-
-int apply_microcode(int cpu)
-{
-	struct microcode_intel *mc_intel;
-	struct ucode_cpu_info *uci;
-	unsigned int val[2];
-	int cpu_num = raw_smp_processor_id();
-	struct cpuinfo_x86 *c = &cpu_data(cpu_num);
-
-	uci = ucode_cpu_info + cpu;
-	mc_intel = uci->mc;
-
-	/* We should bind the task to the CPU */
-	BUG_ON(cpu_num != cpu);
-
-	if (mc_intel == NULL)
-		return 0;
-
-	/*
-	 * Microcode on this CPU could be updated earlier. Only apply the
-	 * microcode patch in mc_intel when it is newer than the one on this
-	 * CPU.
-	 */
-	if (get_matching_mc(mc_intel, cpu) == 0)
-		return 0;
-
-	/* write microcode via MSR 0x79 */
-	wrmsr(MSR_IA32_UCODE_WRITE,
-	      (unsigned long) mc_intel->bits,
-	      (unsigned long) mc_intel->bits >> 16 >> 16);
-	wrmsr(MSR_IA32_UCODE_REV, 0, 0);
-
-	/* As documented in the SDM: Do a CPUID 1 here */
-	sync_core();
-
-	/* get the current revision from MSR 0x8B */
-	rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
-
-	if (val[1] != mc_intel->hdr.rev) {
-		pr_err("CPU%d update to revision 0x%x failed\n",
-		       cpu_num, mc_intel->hdr.rev);
-		return -1;
-	}
-	pr_info("CPU%d updated to revision 0x%x, date = %04x-%02x-%02x\n",
-		cpu_num, val[1],
-		mc_intel->hdr.date & 0xffff,
-		mc_intel->hdr.date >> 24,
-		(mc_intel->hdr.date >> 16) & 0xff);
-
-	uci->cpu_sig.rev = val[1];
-	c->microcode = val[1];
-
-	return 0;
-}
-
-static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
-				int (*get_ucode_data)(void *, const void *, size_t))
-{
-	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-	u8 *ucode_ptr = data, *new_mc = NULL, *mc = NULL;
-	int new_rev = uci->cpu_sig.rev;
-	unsigned int leftover = size;
-	enum ucode_state state = UCODE_OK;
-	unsigned int curr_mc_size = 0;
-	unsigned int csig, cpf;
-
-	while (leftover) {
-		struct microcode_header_intel mc_header;
-		unsigned int mc_size;
-
-		if (get_ucode_data(&mc_header, ucode_ptr, sizeof(mc_header)))
-			break;
-
-		mc_size = get_totalsize(&mc_header);
-		if (!mc_size || mc_size > leftover) {
-			pr_err("error! Bad data in microcode data file\n");
-			break;
-		}
-
-		/* For performance reasons, reuse mc area when possible */
-		if (!mc || mc_size > curr_mc_size) {
-			vfree(mc);
-			mc = vmalloc(mc_size);
-			if (!mc)
-				break;
-			curr_mc_size = mc_size;
-		}
-
-		if (get_ucode_data(mc, ucode_ptr, mc_size) ||
-		    microcode_sanity_check(mc, 1) < 0) {
-			break;
-		}
-
-		csig = uci->cpu_sig.sig;
-		cpf = uci->cpu_sig.pf;
-		if (get_matching_microcode(csig, cpf, mc, new_rev)) {
-			vfree(new_mc);
-			new_rev = mc_header.rev;
-			new_mc  = mc;
-			mc = NULL;	/* trigger new vmalloc */
-		}
-
-		ucode_ptr += mc_size;
-		leftover  -= mc_size;
-	}
-
-	vfree(mc);
-
-	if (leftover) {
-		vfree(new_mc);
-		state = UCODE_ERROR;
-		goto out;
-	}
-
-	if (!new_mc) {
-		state = UCODE_NFOUND;
-		goto out;
-	}
-
-	vfree(uci->mc);
-	uci->mc = (struct microcode_intel *)new_mc;
-
-	/*
-	 * If early loading microcode is supported, save this mc into
-	 * permanent memory. So it will be loaded early when a CPU is hot added
-	 * or resumes.
-	 */
-	save_mc_for_early(new_mc);
-
-	pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n",
-		 cpu, new_rev, uci->cpu_sig.rev);
-out:
-	return state;
-}
-
-static int get_ucode_fw(void *to, const void *from, size_t n)
-{
-	memcpy(to, from, n);
-	return 0;
-}
-
-static enum ucode_state request_microcode_fw(int cpu, struct device *device,
-					     bool refresh_fw)
-{
-	char name[30];
-	struct cpuinfo_x86 *c = &cpu_data(cpu);
-	const struct firmware *firmware;
-	enum ucode_state ret;
-
-	sprintf(name, "intel-ucode/%02x-%02x-%02x",
-		c->x86, c->x86_model, c->x86_mask);
-
-	if (request_firmware(&firmware, name, device)) {
-		pr_debug("data file %s load failed\n", name);
-		return UCODE_NFOUND;
-	}
-
-	ret = generic_load_microcode(cpu, (void *)firmware->data,
-				     firmware->size, &get_ucode_fw);
-
-	release_firmware(firmware);
-
-	return ret;
-}
-
-static int get_ucode_user(void *to, const void *from, size_t n)
-{
-	return copy_from_user(to, from, n);
-}
-
-static enum ucode_state
-request_microcode_user(int cpu, const void __user *buf, size_t size)
-{
-	return generic_load_microcode(cpu, (void *)buf, size, &get_ucode_user);
-}
-
-static void microcode_fini_cpu(int cpu)
-{
-	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-
-	vfree(uci->mc);
-	uci->mc = NULL;
-}
-
-static struct microcode_ops microcode_intel_ops = {
-	.request_microcode_user		  = request_microcode_user,
-	.request_microcode_fw             = request_microcode_fw,
-	.collect_cpu_info                 = collect_cpu_info,
-	.apply_microcode                  = apply_microcode,
-	.microcode_fini_cpu               = microcode_fini_cpu,
-};
-
-struct microcode_ops * __init init_intel_microcode(void)
-{
-	struct cpuinfo_x86 *c = &cpu_data(0);
-
-	if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 ||
-	    cpu_has(c, X86_FEATURE_IA64)) {
-		pr_err("Intel CPU family 0x%x not supported\n", c->x86);
-		return NULL;
-	}
-
-	return &microcode_intel_ops;
-}
-
diff --git a/arch/x86/kernel/microcode_intel_early.c b/arch/x86/kernel/microcode_intel_early.c
deleted file mode 100644
index 18f739129e72..000000000000
--- a/arch/x86/kernel/microcode_intel_early.c
+++ /dev/null
@@ -1,787 +0,0 @@
-/*
- *	Intel CPU microcode early update for Linux
- *
- *	Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com>
- *			   H Peter Anvin" <hpa@zytor.com>
- *
- *	This allows to early upgrade microcode on Intel processors
- *	belonging to IA-32 family - PentiumPro, Pentium II,
- *	Pentium III, Xeon, Pentium 4, etc.
- *
- *	Reference: Section 9.11 of Volume 3, IA-32 Intel Architecture
- *	Software Developer's Manual.
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- */
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/slab.h>
-#include <linux/earlycpio.h>
-#include <linux/initrd.h>
-#include <linux/cpu.h>
-#include <asm/msr.h>
-#include <asm/microcode_intel.h>
-#include <asm/processor.h>
-#include <asm/tlbflush.h>
-#include <asm/setup.h>
-
-unsigned long mc_saved_in_initrd[MAX_UCODE_COUNT];
-struct mc_saved_data {
-	unsigned int mc_saved_count;
-	struct microcode_intel **mc_saved;
-} mc_saved_data;
-
-static enum ucode_state
-generic_load_microcode_early(struct microcode_intel **mc_saved_p,
-			     unsigned int mc_saved_count,
-			     struct ucode_cpu_info *uci)
-{
-	struct microcode_intel *ucode_ptr, *new_mc = NULL;
-	int new_rev = uci->cpu_sig.rev;
-	enum ucode_state state = UCODE_OK;
-	unsigned int mc_size;
-	struct microcode_header_intel *mc_header;
-	unsigned int csig = uci->cpu_sig.sig;
-	unsigned int cpf = uci->cpu_sig.pf;
-	int i;
-
-	for (i = 0; i < mc_saved_count; i++) {
-		ucode_ptr = mc_saved_p[i];
-
-		mc_header = (struct microcode_header_intel *)ucode_ptr;
-		mc_size = get_totalsize(mc_header);
-		if (get_matching_microcode(csig, cpf, ucode_ptr, new_rev)) {
-			new_rev = mc_header->rev;
-			new_mc  = ucode_ptr;
-		}
-	}
-
-	if (!new_mc) {
-		state = UCODE_NFOUND;
-		goto out;
-	}
-
-	uci->mc = (struct microcode_intel *)new_mc;
-out:
-	return state;
-}
-
-static void
-microcode_pointer(struct microcode_intel **mc_saved,
-		  unsigned long *mc_saved_in_initrd,
-		  unsigned long initrd_start, int mc_saved_count)
-{
-	int i;
-
-	for (i = 0; i < mc_saved_count; i++)
-		mc_saved[i] = (struct microcode_intel *)
-			      (mc_saved_in_initrd[i] + initrd_start);
-}
-
-#ifdef CONFIG_X86_32
-static void
-microcode_phys(struct microcode_intel **mc_saved_tmp,
-	       struct mc_saved_data *mc_saved_data)
-{
-	int i;
-	struct microcode_intel ***mc_saved;
-
-	mc_saved = (struct microcode_intel ***)
-		   __pa_nodebug(&mc_saved_data->mc_saved);
-	for (i = 0; i < mc_saved_data->mc_saved_count; i++) {
-		struct microcode_intel *p;
-
-		p = *(struct microcode_intel **)
-			__pa_nodebug(mc_saved_data->mc_saved + i);
-		mc_saved_tmp[i] = (struct microcode_intel *)__pa_nodebug(p);
-	}
-}
-#endif
-
-static enum ucode_state
-load_microcode(struct mc_saved_data *mc_saved_data,
-	       unsigned long *mc_saved_in_initrd,
-	       unsigned long initrd_start,
-	       struct ucode_cpu_info *uci)
-{
-	struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
-	unsigned int count = mc_saved_data->mc_saved_count;
-
-	if (!mc_saved_data->mc_saved) {
-		microcode_pointer(mc_saved_tmp, mc_saved_in_initrd,
-				  initrd_start, count);
-
-		return generic_load_microcode_early(mc_saved_tmp, count, uci);
-	} else {
-#ifdef CONFIG_X86_32
-		microcode_phys(mc_saved_tmp, mc_saved_data);
-		return generic_load_microcode_early(mc_saved_tmp, count, uci);
-#else
-		return generic_load_microcode_early(mc_saved_data->mc_saved,
-						    count, uci);
-#endif
-	}
-}
-
-static u8 get_x86_family(unsigned long sig)
-{
-	u8 x86;
-
-	x86 = (sig >> 8) & 0xf;
-
-	if (x86 == 0xf)
-		x86 += (sig >> 20) & 0xff;
-
-	return x86;
-}
-
-static u8 get_x86_model(unsigned long sig)
-{
-	u8 x86, x86_model;
-
-	x86 = get_x86_family(sig);
-	x86_model = (sig >> 4) & 0xf;
-
-	if (x86 == 0x6 || x86 == 0xf)
-		x86_model += ((sig >> 16) & 0xf) << 4;
-
-	return x86_model;
-}
-
-/*
- * Given CPU signature and a microcode patch, this function finds if the
- * microcode patch has matching family and model with the CPU.
- */
-static enum ucode_state
-matching_model_microcode(struct microcode_header_intel *mc_header,
-			unsigned long sig)
-{
-	u8 x86, x86_model;
-	u8 x86_ucode, x86_model_ucode;
-	struct extended_sigtable *ext_header;
-	unsigned long total_size = get_totalsize(mc_header);
-	unsigned long data_size = get_datasize(mc_header);
-	int ext_sigcount, i;
-	struct extended_signature *ext_sig;
-
-	x86 = get_x86_family(sig);
-	x86_model = get_x86_model(sig);
-
-	x86_ucode = get_x86_family(mc_header->sig);
-	x86_model_ucode = get_x86_model(mc_header->sig);
-
-	if (x86 == x86_ucode && x86_model == x86_model_ucode)
-		return UCODE_OK;
-
-	/* Look for ext. headers: */
-	if (total_size <= data_size + MC_HEADER_SIZE)
-		return UCODE_NFOUND;
-
-	ext_header = (struct extended_sigtable *)
-		     mc_header + data_size + MC_HEADER_SIZE;
-	ext_sigcount = ext_header->count;
-	ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
-
-	for (i = 0; i < ext_sigcount; i++) {
-		x86_ucode = get_x86_family(ext_sig->sig);
-		x86_model_ucode = get_x86_model(ext_sig->sig);
-
-		if (x86 == x86_ucode && x86_model == x86_model_ucode)
-			return UCODE_OK;
-
-		ext_sig++;
-	}
-
-	return UCODE_NFOUND;
-}
-
-static int
-save_microcode(struct mc_saved_data *mc_saved_data,
-	       struct microcode_intel **mc_saved_src,
-	       unsigned int mc_saved_count)
-{
-	int i, j;
-	struct microcode_intel **mc_saved_p;
-	int ret;
-
-	if (!mc_saved_count)
-		return -EINVAL;
-
-	/*
-	 * Copy new microcode data.
-	 */
-	mc_saved_p = kmalloc(mc_saved_count*sizeof(struct microcode_intel *),
-			     GFP_KERNEL);
-	if (!mc_saved_p)
-		return -ENOMEM;
-
-	for (i = 0; i < mc_saved_count; i++) {
-		struct microcode_intel *mc = mc_saved_src[i];
-		struct microcode_header_intel *mc_header = &mc->hdr;
-		unsigned long mc_size = get_totalsize(mc_header);
-		mc_saved_p[i] = kmalloc(mc_size, GFP_KERNEL);
-		if (!mc_saved_p[i]) {
-			ret = -ENOMEM;
-			goto err;
-		}
-		if (!mc_saved_src[i]) {
-			ret = -EINVAL;
-			goto err;
-		}
-		memcpy(mc_saved_p[i], mc, mc_size);
-	}
-
-	/*
-	 * Point to newly saved microcode.
-	 */
-	mc_saved_data->mc_saved = mc_saved_p;
-	mc_saved_data->mc_saved_count = mc_saved_count;
-
-	return 0;
-
-err:
-	for (j = 0; j <= i; j++)
-		kfree(mc_saved_p[j]);
-	kfree(mc_saved_p);
-
-	return ret;
-}
-
-/*
- * A microcode patch in ucode_ptr is saved into mc_saved
- * - if it has matching signature and newer revision compared to an existing
- *   patch mc_saved.
- * - or if it is a newly discovered microcode patch.
- *
- * The microcode patch should have matching model with CPU.
- */
-static void _save_mc(struct microcode_intel **mc_saved, u8 *ucode_ptr,
-		     unsigned int *mc_saved_count_p)
-{
-	int i;
-	int found = 0;
-	unsigned int mc_saved_count = *mc_saved_count_p;
-	struct microcode_header_intel *mc_header;
-
-	mc_header = (struct microcode_header_intel *)ucode_ptr;
-	for (i = 0; i < mc_saved_count; i++) {
-		unsigned int sig, pf;
-		unsigned int new_rev;
-		struct microcode_header_intel *mc_saved_header =
-			     (struct microcode_header_intel *)mc_saved[i];
-		sig = mc_saved_header->sig;
-		pf = mc_saved_header->pf;
-		new_rev = mc_header->rev;
-
-		if (get_matching_sig(sig, pf, ucode_ptr, new_rev)) {
-			found = 1;
-			if (update_match_revision(mc_header, new_rev)) {
-				/*
-				 * Found an older ucode saved before.
-				 * Replace the older one with this newer
-				 * one.
-				 */
-				mc_saved[i] =
-					(struct microcode_intel *)ucode_ptr;
-				break;
-			}
-		}
-	}
-	if (i >= mc_saved_count && !found)
-		/*
-		 * This ucode is first time discovered in ucode file.
-		 * Save it to memory.
-		 */
-		mc_saved[mc_saved_count++] =
-				 (struct microcode_intel *)ucode_ptr;
-
-	*mc_saved_count_p = mc_saved_count;
-}
-
-/*
- * Get microcode matching with BSP's model. Only CPUs with the same model as
- * BSP can stay in the platform.
- */
-static enum ucode_state __init
-get_matching_model_microcode(int cpu, unsigned long start,
-			     void *data, size_t size,
-			     struct mc_saved_data *mc_saved_data,
-			     unsigned long *mc_saved_in_initrd,
-			     struct ucode_cpu_info *uci)
-{
-	u8 *ucode_ptr = data;
-	unsigned int leftover = size;
-	enum ucode_state state = UCODE_OK;
-	unsigned int mc_size;
-	struct microcode_header_intel *mc_header;
-	struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
-	unsigned int mc_saved_count = mc_saved_data->mc_saved_count;
-	int i;
-
-	while (leftover) {
-		mc_header = (struct microcode_header_intel *)ucode_ptr;
-
-		mc_size = get_totalsize(mc_header);
-		if (!mc_size || mc_size > leftover ||
-			microcode_sanity_check(ucode_ptr, 0) < 0)
-			break;
-
-		leftover -= mc_size;
-
-		/*
-		 * Since APs with same family and model as the BSP may boot in
-		 * the platform, we need to find and save microcode patches
-		 * with the same family and model as the BSP.
-		 */
-		if (matching_model_microcode(mc_header, uci->cpu_sig.sig) !=
-			 UCODE_OK) {
-			ucode_ptr += mc_size;
-			continue;
-		}
-
-		_save_mc(mc_saved_tmp, ucode_ptr, &mc_saved_count);
-
-		ucode_ptr += mc_size;
-	}
-
-	if (leftover) {
-		state = UCODE_ERROR;
-		goto out;
-	}
-
-	if (mc_saved_count == 0) {
-		state = UCODE_NFOUND;
-		goto out;
-	}
-
-	for (i = 0; i < mc_saved_count; i++)
-		mc_saved_in_initrd[i] = (unsigned long)mc_saved_tmp[i] - start;
-
-	mc_saved_data->mc_saved_count = mc_saved_count;
-out:
-	return state;
-}
-
-static int collect_cpu_info_early(struct ucode_cpu_info *uci)
-{
-	unsigned int val[2];
-	u8 x86, x86_model;
-	struct cpu_signature csig;
-	unsigned int eax, ebx, ecx, edx;
-
-	csig.sig = 0;
-	csig.pf = 0;
-	csig.rev = 0;
-
-	memset(uci, 0, sizeof(*uci));
-
-	eax = 0x00000001;
-	ecx = 0;
-	native_cpuid(&eax, &ebx, &ecx, &edx);
-	csig.sig = eax;
-
-	x86 = get_x86_family(csig.sig);
-	x86_model = get_x86_model(csig.sig);
-
-	if ((x86_model >= 5) || (x86 > 6)) {
-		/* get processor flags from MSR 0x17 */
-		native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
-		csig.pf = 1 << ((val[1] >> 18) & 7);
-	}
-	native_wrmsr(MSR_IA32_UCODE_REV, 0, 0);
-
-	/* As documented in the SDM: Do a CPUID 1 here */
-	sync_core();
-
-	/* get the current revision from MSR 0x8B */
-	native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
-
-	csig.rev = val[1];
-
-	uci->cpu_sig = csig;
-	uci->valid = 1;
-
-	return 0;
-}
-
-#ifdef DEBUG
-static void __ref show_saved_mc(void)
-{
-	int i, j;
-	unsigned int sig, pf, rev, total_size, data_size, date;
-	struct ucode_cpu_info uci;
-
-	if (mc_saved_data.mc_saved_count == 0) {
-		pr_debug("no micorcode data saved.\n");
-		return;
-	}
-	pr_debug("Total microcode saved: %d\n", mc_saved_data.mc_saved_count);
-
-	collect_cpu_info_early(&uci);
-
-	sig = uci.cpu_sig.sig;
-	pf = uci.cpu_sig.pf;
-	rev = uci.cpu_sig.rev;
-	pr_debug("CPU%d: sig=0x%x, pf=0x%x, rev=0x%x\n",
-		 smp_processor_id(), sig, pf, rev);
-
-	for (i = 0; i < mc_saved_data.mc_saved_count; i++) {
-		struct microcode_header_intel *mc_saved_header;
-		struct extended_sigtable *ext_header;
-		int ext_sigcount;
-		struct extended_signature *ext_sig;
-
-		mc_saved_header = (struct microcode_header_intel *)
-				  mc_saved_data.mc_saved[i];
-		sig = mc_saved_header->sig;
-		pf = mc_saved_header->pf;
-		rev = mc_saved_header->rev;
-		total_size = get_totalsize(mc_saved_header);
-		data_size = get_datasize(mc_saved_header);
-		date = mc_saved_header->date;
-
-		pr_debug("mc_saved[%d]: sig=0x%x, pf=0x%x, rev=0x%x, toal size=0x%x, date = %04x-%02x-%02x\n",
-			 i, sig, pf, rev, total_size,
-			 date & 0xffff,
-			 date >> 24,
-			 (date >> 16) & 0xff);
-
-		/* Look for ext. headers: */
-		if (total_size <= data_size + MC_HEADER_SIZE)
-			continue;
-
-		ext_header = (struct extended_sigtable *)
-			     mc_saved_header + data_size + MC_HEADER_SIZE;
-		ext_sigcount = ext_header->count;
-		ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
-
-		for (j = 0; j < ext_sigcount; j++) {
-			sig = ext_sig->sig;
-			pf = ext_sig->pf;
-
-			pr_debug("\tExtended[%d]: sig=0x%x, pf=0x%x\n",
-				 j, sig, pf);
-
-			ext_sig++;
-		}
-
-	}
-}
-#else
-static inline void show_saved_mc(void)
-{
-}
-#endif
-
-#if defined(CONFIG_MICROCODE_INTEL_EARLY) && defined(CONFIG_HOTPLUG_CPU)
-static DEFINE_MUTEX(x86_cpu_microcode_mutex);
-/*
- * Save this mc into mc_saved_data. So it will be loaded early when a CPU is
- * hot added or resumes.
- *
- * Please make sure this mc should be a valid microcode patch before calling
- * this function.
- */
-int save_mc_for_early(u8 *mc)
-{
-	struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
-	unsigned int mc_saved_count_init;
-	unsigned int mc_saved_count;
-	struct microcode_intel **mc_saved;
-	int ret = 0;
-	int i;
-
-	/*
-	 * Hold hotplug lock so mc_saved_data is not accessed by a CPU in
-	 * hotplug.
-	 */
-	mutex_lock(&x86_cpu_microcode_mutex);
-
-	mc_saved_count_init = mc_saved_data.mc_saved_count;
-	mc_saved_count = mc_saved_data.mc_saved_count;
-	mc_saved = mc_saved_data.mc_saved;
-
-	if (mc_saved && mc_saved_count)
-		memcpy(mc_saved_tmp, mc_saved,
-		       mc_saved_count * sizeof(struct mirocode_intel *));
-	/*
-	 * Save the microcode patch mc in mc_save_tmp structure if it's a newer
-	 * version.
-	 */
-
-	_save_mc(mc_saved_tmp, mc, &mc_saved_count);
-
-	/*
-	 * Save the mc_save_tmp in global mc_saved_data.
-	 */
-	ret = save_microcode(&mc_saved_data, mc_saved_tmp, mc_saved_count);
-	if (ret) {
-		pr_err("Cannot save microcode patch.\n");
-		goto out;
-	}
-
-	show_saved_mc();
-
-	/*
-	 * Free old saved microcod data.
-	 */
-	if (mc_saved) {
-		for (i = 0; i < mc_saved_count_init; i++)
-			kfree(mc_saved[i]);
-		kfree(mc_saved);
-	}
-
-out:
-	mutex_unlock(&x86_cpu_microcode_mutex);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(save_mc_for_early);
-#endif
-
-static __initdata char ucode_name[] = "kernel/x86/microcode/GenuineIntel.bin";
-static __init enum ucode_state
-scan_microcode(unsigned long start, unsigned long end,
-		struct mc_saved_data *mc_saved_data,
-		unsigned long *mc_saved_in_initrd,
-		struct ucode_cpu_info *uci)
-{
-	unsigned int size = end - start + 1;
-	struct cpio_data cd;
-	long offset = 0;
-#ifdef CONFIG_X86_32
-	char *p = (char *)__pa_nodebug(ucode_name);
-#else
-	char *p = ucode_name;
-#endif
-
-	cd.data = NULL;
-	cd.size = 0;
-
-	cd = find_cpio_data(p, (void *)start, size, &offset);
-	if (!cd.data)
-		return UCODE_ERROR;
-
-
-	return get_matching_model_microcode(0, start, cd.data, cd.size,
-					    mc_saved_data, mc_saved_in_initrd,
-					    uci);
-}
-
-/*
- * Print ucode update info.
- */
-static void
-print_ucode_info(struct ucode_cpu_info *uci, unsigned int date)
-{
-	int cpu = smp_processor_id();
-
-	pr_info("CPU%d microcode updated early to revision 0x%x, date = %04x-%02x-%02x\n",
-		cpu,
-		uci->cpu_sig.rev,
-		date & 0xffff,
-		date >> 24,
-		(date >> 16) & 0xff);
-}
-
-#ifdef CONFIG_X86_32
-
-static int delay_ucode_info;
-static int current_mc_date;
-
-/*
- * Print early updated ucode info after printk works. This is delayed info dump.
- */
-void show_ucode_info_early(void)
-{
-	struct ucode_cpu_info uci;
-
-	if (delay_ucode_info) {
-		collect_cpu_info_early(&uci);
-		print_ucode_info(&uci, current_mc_date);
-		delay_ucode_info = 0;
-	}
-}
-
-/*
- * At this point, we can not call printk() yet. Keep microcode patch number in
- * mc_saved_data.mc_saved and delay printing microcode info in
- * show_ucode_info_early() until printk() works.
- */
-static void print_ucode(struct ucode_cpu_info *uci)
-{
-	struct microcode_intel *mc_intel;
-	int *delay_ucode_info_p;
-	int *current_mc_date_p;
-
-	mc_intel = uci->mc;
-	if (mc_intel == NULL)
-		return;
-
-	delay_ucode_info_p = (int *)__pa_nodebug(&delay_ucode_info);
-	current_mc_date_p = (int *)__pa_nodebug(&current_mc_date);
-
-	*delay_ucode_info_p = 1;
-	*current_mc_date_p = mc_intel->hdr.date;
-}
-#else
-
-/*
- * Flush global tlb. We only do this in x86_64 where paging has been enabled
- * already and PGE should be enabled as well.
- */
-static inline void flush_tlb_early(void)
-{
-	__native_flush_tlb_global_irq_disabled();
-}
-
-static inline void print_ucode(struct ucode_cpu_info *uci)
-{
-	struct microcode_intel *mc_intel;
-
-	mc_intel = uci->mc;
-	if (mc_intel == NULL)
-		return;
-
-	print_ucode_info(uci, mc_intel->hdr.date);
-}
-#endif
-
-static int apply_microcode_early(struct mc_saved_data *mc_saved_data,
-				 struct ucode_cpu_info *uci)
-{
-	struct microcode_intel *mc_intel;
-	unsigned int val[2];
-
-	mc_intel = uci->mc;
-	if (mc_intel == NULL)
-		return 0;
-
-	/* write microcode via MSR 0x79 */
-	native_wrmsr(MSR_IA32_UCODE_WRITE,
-	      (unsigned long) mc_intel->bits,
-	      (unsigned long) mc_intel->bits >> 16 >> 16);
-	native_wrmsr(MSR_IA32_UCODE_REV, 0, 0);
-
-	/* As documented in the SDM: Do a CPUID 1 here */
-	sync_core();
-
-	/* get the current revision from MSR 0x8B */
-	native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
-	if (val[1] != mc_intel->hdr.rev)
-		return -1;
-
-#ifdef CONFIG_X86_64
-	/* Flush global tlb. This is precaution. */
-	flush_tlb_early();
-#endif
-	uci->cpu_sig.rev = val[1];
-
-	print_ucode(uci);
-
-	return 0;
-}
-
-/*
- * This function converts microcode patch offsets previously stored in
- * mc_saved_in_initrd to pointers and stores the pointers in mc_saved_data.
- */
-int __init save_microcode_in_initrd_intel(void)
-{
-	unsigned int count = mc_saved_data.mc_saved_count;
-	struct microcode_intel *mc_saved[MAX_UCODE_COUNT];
-	int ret = 0;
-
-	if (count == 0)
-		return ret;
-
-	microcode_pointer(mc_saved, mc_saved_in_initrd, initrd_start, count);
-	ret = save_microcode(&mc_saved_data, mc_saved, count);
-	if (ret)
-		pr_err("Cannot save microcode patches from initrd.\n");
-
-	show_saved_mc();
-
-	return ret;
-}
-
-static void __init
-_load_ucode_intel_bsp(struct mc_saved_data *mc_saved_data,
-		      unsigned long *mc_saved_in_initrd,
-		      unsigned long initrd_start_early,
-		      unsigned long initrd_end_early,
-		      struct ucode_cpu_info *uci)
-{
-	collect_cpu_info_early(uci);
-	scan_microcode(initrd_start_early, initrd_end_early, mc_saved_data,
-		       mc_saved_in_initrd, uci);
-	load_microcode(mc_saved_data, mc_saved_in_initrd,
-		       initrd_start_early, uci);
-	apply_microcode_early(mc_saved_data, uci);
-}
-
-void __init
-load_ucode_intel_bsp(void)
-{
-	u64 ramdisk_image, ramdisk_size;
-	unsigned long initrd_start_early, initrd_end_early;
-	struct ucode_cpu_info uci;
-#ifdef CONFIG_X86_32
-	struct boot_params *boot_params_p;
-
-	boot_params_p = (struct boot_params *)__pa_nodebug(&boot_params);
-	ramdisk_image = boot_params_p->hdr.ramdisk_image;
-	ramdisk_size  = boot_params_p->hdr.ramdisk_size;
-	initrd_start_early = ramdisk_image;
-	initrd_end_early = initrd_start_early + ramdisk_size;
-
-	_load_ucode_intel_bsp(
-		(struct mc_saved_data *)__pa_nodebug(&mc_saved_data),
-		(unsigned long *)__pa_nodebug(&mc_saved_in_initrd),
-		initrd_start_early, initrd_end_early, &uci);
-#else
-	ramdisk_image = boot_params.hdr.ramdisk_image;
-	ramdisk_size  = boot_params.hdr.ramdisk_size;
-	initrd_start_early = ramdisk_image + PAGE_OFFSET;
-	initrd_end_early = initrd_start_early + ramdisk_size;
-
-	_load_ucode_intel_bsp(&mc_saved_data, mc_saved_in_initrd,
-			      initrd_start_early, initrd_end_early, &uci);
-#endif
-}
-
-void load_ucode_intel_ap(void)
-{
-	struct mc_saved_data *mc_saved_data_p;
-	struct ucode_cpu_info uci;
-	unsigned long *mc_saved_in_initrd_p;
-	unsigned long initrd_start_addr;
-#ifdef CONFIG_X86_32
-	unsigned long *initrd_start_p;
-
-	mc_saved_in_initrd_p =
-		(unsigned long *)__pa_nodebug(mc_saved_in_initrd);
-	mc_saved_data_p = (struct mc_saved_data *)__pa_nodebug(&mc_saved_data);
-	initrd_start_p = (unsigned long *)__pa_nodebug(&initrd_start);
-	initrd_start_addr = (unsigned long)__pa_nodebug(*initrd_start_p);
-#else
-	mc_saved_data_p = &mc_saved_data;
-	mc_saved_in_initrd_p = mc_saved_in_initrd;
-	initrd_start_addr = initrd_start;
-#endif
-
-	/*
-	 * If there is no valid ucode previously saved in memory, no need to
-	 * update ucode on this AP.
-	 */
-	if (mc_saved_data_p->mc_saved_count == 0)
-		return;
-
-	collect_cpu_info_early(&uci);
-	load_microcode(mc_saved_data_p, mc_saved_in_initrd_p,
-		       initrd_start_addr, &uci);
-	apply_microcode_early(mc_saved_data_p, &uci);
-}
diff --git a/arch/x86/kernel/microcode_intel_lib.c b/arch/x86/kernel/microcode_intel_lib.c
deleted file mode 100644
index ce69320d0179..000000000000
--- a/arch/x86/kernel/microcode_intel_lib.c
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- *	Intel CPU Microcode Update Driver for Linux
- *
- *	Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com>
- *			   H Peter Anvin" <hpa@zytor.com>
- *
- *	This driver allows to upgrade microcode on Intel processors
- *	belonging to IA-32 family - PentiumPro, Pentium II,
- *	Pentium III, Xeon, Pentium 4, etc.
- *
- *	Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture
- *	Software Developer's Manual
- *	Order Number 253668 or free download from:
- *
- *	http://developer.intel.com/Assets/PDF/manual/253668.pdf
- *
- *	For more information, go to http://www.urbanmyth.org/microcode
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- *
- */
-#include <linux/firmware.h>
-#include <linux/uaccess.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-
-#include <asm/microcode_intel.h>
-#include <asm/processor.h>
-#include <asm/msr.h>
-
-static inline int
-update_match_cpu(unsigned int csig, unsigned int cpf,
-		 unsigned int sig, unsigned int pf)
-{
-	return (!sigmatch(sig, csig, pf, cpf)) ? 0 : 1;
-}
-
-int
-update_match_revision(struct microcode_header_intel *mc_header, int rev)
-{
-	return (mc_header->rev <= rev) ? 0 : 1;
-}
-
-int microcode_sanity_check(void *mc, int print_err)
-{
-	unsigned long total_size, data_size, ext_table_size;
-	struct microcode_header_intel *mc_header = mc;
-	struct extended_sigtable *ext_header = NULL;
-	int sum, orig_sum, ext_sigcount = 0, i;
-	struct extended_signature *ext_sig;
-
-	total_size = get_totalsize(mc_header);
-	data_size = get_datasize(mc_header);
-
-	if (data_size + MC_HEADER_SIZE > total_size) {
-		if (print_err)
-			pr_err("error! Bad data size in microcode data file\n");
-		return -EINVAL;
-	}
-
-	if (mc_header->ldrver != 1 || mc_header->hdrver != 1) {
-		if (print_err)
-			pr_err("error! Unknown microcode update format\n");
-		return -EINVAL;
-	}
-	ext_table_size = total_size - (MC_HEADER_SIZE + data_size);
-	if (ext_table_size) {
-		if ((ext_table_size < EXT_HEADER_SIZE)
-		 || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) {
-			if (print_err)
-				pr_err("error! Small exttable size in microcode data file\n");
-			return -EINVAL;
-		}
-		ext_header = mc + MC_HEADER_SIZE + data_size;
-		if (ext_table_size != exttable_size(ext_header)) {
-			if (print_err)
-				pr_err("error! Bad exttable size in microcode data file\n");
-			return -EFAULT;
-		}
-		ext_sigcount = ext_header->count;
-	}
-
-	/* check extended table checksum */
-	if (ext_table_size) {
-		int ext_table_sum = 0;
-		int *ext_tablep = (int *)ext_header;
-
-		i = ext_table_size / DWSIZE;
-		while (i--)
-			ext_table_sum += ext_tablep[i];
-		if (ext_table_sum) {
-			if (print_err)
-				pr_warn("aborting, bad extended signature table checksum\n");
-			return -EINVAL;
-		}
-	}
-
-	/* calculate the checksum */
-	orig_sum = 0;
-	i = (MC_HEADER_SIZE + data_size) / DWSIZE;
-	while (i--)
-		orig_sum += ((int *)mc)[i];
-	if (orig_sum) {
-		if (print_err)
-			pr_err("aborting, bad checksum\n");
-		return -EINVAL;
-	}
-	if (!ext_table_size)
-		return 0;
-	/* check extended signature checksum */
-	for (i = 0; i < ext_sigcount; i++) {
-		ext_sig = (void *)ext_header + EXT_HEADER_SIZE +
-			  EXT_SIGNATURE_SIZE * i;
-		sum = orig_sum
-			- (mc_header->sig + mc_header->pf + mc_header->cksum)
-			+ (ext_sig->sig + ext_sig->pf + ext_sig->cksum);
-		if (sum) {
-			if (print_err)
-				pr_err("aborting, bad checksum\n");
-			return -EINVAL;
-		}
-	}
-	return 0;
-}
-EXPORT_SYMBOL_GPL(microcode_sanity_check);
-
-/*
- * return 0 - no update found
- * return 1 - found update
- */
-int get_matching_sig(unsigned int csig, int cpf, void *mc, int rev)
-{
-	struct microcode_header_intel *mc_header = mc;
-	struct extended_sigtable *ext_header;
-	unsigned long total_size = get_totalsize(mc_header);
-	int ext_sigcount, i;
-	struct extended_signature *ext_sig;
-
-	if (update_match_cpu(csig, cpf, mc_header->sig, mc_header->pf))
-		return 1;
-
-	/* Look for ext. headers: */
-	if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE)
-		return 0;
-
-	ext_header = mc + get_datasize(mc_header) + MC_HEADER_SIZE;
-	ext_sigcount = ext_header->count;
-	ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
-
-	for (i = 0; i < ext_sigcount; i++) {
-		if (update_match_cpu(csig, cpf, ext_sig->sig, ext_sig->pf))
-			return 1;
-		ext_sig++;
-	}
-	return 0;
-}
-
-/*
- * return 0 - no update found
- * return 1 - found update
- */
-int get_matching_microcode(unsigned int csig, int cpf, void *mc, int rev)
-{
-	struct microcode_header_intel *mc_header = mc;
-
-	if (!update_match_revision(mc_header, rev))
-		return 0;
-
-	return get_matching_sig(csig, cpf, mc, rev);
-}
-EXPORT_SYMBOL_GPL(get_matching_microcode);
-- 
cgit v1.2.3


From f549ed1abc7e4f0292ce08c4143c64a610c8b2cb Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Tue, 14 Jan 2014 16:36:10 +1100
Subject: arch: Re-sort some Kbuild files to hopefully help avoid some
 conflicts

Checkin:

    93ea02bb8435 arch: Clean up asm/barrier.h implementations using asm-generic/barrier.h

... unfortunately left some Kbuild files out of order, which caused
unnecessary merge conflicts, in particular with checkin:

    e3fec2f74f7f lib: Add missing arch generic-y entries for asm-generic/hash.h

Put them back in order to make the upcoming merges cleaner.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Link: http://lkml.kernel.org/r/20140114164420.d296fbcc4be3a5f126c86069@canb.auug.org.au
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: David Miller <davem@davemloft.net>
---
 arch/arc/include/asm/Kbuild        | 2 +-
 arch/cris/include/asm/Kbuild       | 2 +-
 arch/hexagon/include/asm/Kbuild    | 2 +-
 arch/microblaze/include/asm/Kbuild | 2 +-
 arch/mn10300/include/asm/Kbuild    | 2 +-
 arch/parisc/include/asm/Kbuild     | 2 +-
 arch/score/include/asm/Kbuild      | 2 +-
 7 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild
index e07c786011af..9ae21c198007 100644
--- a/arch/arc/include/asm/Kbuild
+++ b/arch/arc/include/asm/Kbuild
@@ -1,4 +1,5 @@
 generic-y += auxvec.h
+generic-y += barrier.h
 generic-y += bugs.h
 generic-y += bitsperlong.h
 generic-y += clkdev.h
@@ -47,4 +48,3 @@ generic-y += user.h
 generic-y += vga.h
 generic-y += xor.h
 generic-y += preempt.h
-generic-y += barrier.h
diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild
index 35ec2e5ca832..199b1a9dab89 100644
--- a/arch/cris/include/asm/Kbuild
+++ b/arch/cris/include/asm/Kbuild
@@ -3,6 +3,7 @@ header-y += arch-v10/
 header-y += arch-v32/
 
 
+generic-y += barrier.h
 generic-y += clkdev.h
 generic-y += exec.h
 generic-y += kvm_para.h
@@ -12,4 +13,3 @@ generic-y += trace_clock.h
 generic-y += vga.h
 generic-y += xor.h
 generic-y += preempt.h
-generic-y += barrier.h
diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild
index a614ec9747a6..ada843c701ef 100644
--- a/arch/hexagon/include/asm/Kbuild
+++ b/arch/hexagon/include/asm/Kbuild
@@ -2,6 +2,7 @@
 header-y += ucontext.h
 
 generic-y += auxvec.h
+generic-y += barrier.h
 generic-y += bug.h
 generic-y += bugs.h
 generic-y += clkdev.h
@@ -54,4 +55,3 @@ generic-y += ucontext.h
 generic-y += unaligned.h
 generic-y += xor.h
 generic-y += preempt.h
-generic-y += barrier.h
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index f77fb6630b11..a82426589fff 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -1,7 +1,7 @@
 
+generic-y += barrier.h
 generic-y += clkdev.h
 generic-y += exec.h
 generic-y += trace_clock.h
 generic-y += syscalls.h
 generic-y += preempt.h
-generic-y += barrier.h
diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild
index 367ef399ddf7..032143ec2324 100644
--- a/arch/mn10300/include/asm/Kbuild
+++ b/arch/mn10300/include/asm/Kbuild
@@ -1,6 +1,6 @@
 
+generic-y += barrier.h
 generic-y += clkdev.h
 generic-y += exec.h
 generic-y += trace_clock.h
 generic-y += preempt.h
-generic-y += barrier.h
diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild
index 8df06d0074f4..34b0be4ca52d 100644
--- a/arch/parisc/include/asm/Kbuild
+++ b/arch/parisc/include/asm/Kbuild
@@ -1,8 +1,8 @@
 
+generic-y += barrier.h
 generic-y += word-at-a-time.h auxvec.h user.h cputime.h emergency-restart.h \
 	  segment.h topology.h vga.h device.h percpu.h hw_irq.h mutex.h \
 	  div64.h irq_regs.h kdebug.h kvm_para.h local64.h local.h param.h \
 	  poll.h xor.h clkdev.h exec.h
 generic-y += trace_clock.h
 generic-y += preempt.h
-generic-y += barrier.h
diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild
index ee2993b6e5d1..fe7471eb0167 100644
--- a/arch/score/include/asm/Kbuild
+++ b/arch/score/include/asm/Kbuild
@@ -1,8 +1,8 @@
 
 header-y +=
 
+generic-y += barrier.h
 generic-y += clkdev.h
 generic-y += trace_clock.h
 generic-y += xor.h
 generic-y += preempt.h
-generic-y += barrier.h
-- 
cgit v1.2.3


From 60283df7ac26a4fe2d56631ca2946e04725e7eaf Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Tue, 14 Jan 2014 08:44:47 +0100
Subject: x86/apic: Read Error Status Register correctly

Currently we do a read, a dummy write and a final read to fetch
the error code. The value from the final read is taken.
This is not the recommended way and leads to corrupted/lost ESR
values.

Intel(c) 64 and IA-32 Architectures Software Developer's Manual,
Combined Volumes 1, 2ABC, 3ABC, Section 10.5.3 states:

  Before attempt to read from the ESR, software should first
  write to it. (The value written does not affect the values read
  subsequently; only zero may be written in x2APIC mode.) This
  write clears any previously logged errors and updates the ESR
  with any errors detected since the last write to the ESR.
  This write also rearms the APIC error interrupt triggering
  mechanism.

This patch removes the first read such that we are conform with
the manual.

On my (very old) Pentium MMX SMP system this patch fixes the
issue that APIC errors:

  a) are not always reported and
  b) are reported with false error numbers.

Signed-off-by: Richard Weinberger <richard@nod.at>
Cc: seiji.aguchi@hds.com
Cc: rientjes@google.com
Cc: konrad.wilk@oracle.com
Cc: bp@alien8.de
Cc: Yinghai Lu <yinghai@kernel.org>
Link: http://lkml.kernel.org/r/1389685487-20872-1-git-send-email-richard@nod.at
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/apic/apic.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index d278736bf774..4ec1dd64022a 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1968,7 +1968,7 @@ __visible void smp_trace_spurious_interrupt(struct pt_regs *regs)
  */
 static inline void __smp_error_interrupt(struct pt_regs *regs)
 {
-	u32 v0, v1;
+	u32 v;
 	u32 i = 0;
 	static const char * const error_interrupt_reason[] = {
 		"Send CS error",		/* APIC Error Bit 0 */
@@ -1982,21 +1982,20 @@ static inline void __smp_error_interrupt(struct pt_regs *regs)
 	};
 
 	/* First tickle the hardware, only then report what went on. -- REW */
-	v0 = apic_read(APIC_ESR);
 	apic_write(APIC_ESR, 0);
-	v1 = apic_read(APIC_ESR);
+	v = apic_read(APIC_ESR);
 	ack_APIC_irq();
 	atomic_inc(&irq_err_count);
 
-	apic_printk(APIC_DEBUG, KERN_DEBUG "APIC error on CPU%d: %02x(%02x)",
-		    smp_processor_id(), v0 , v1);
+	apic_printk(APIC_DEBUG, KERN_DEBUG "APIC error on CPU%d: %02x",
+		    smp_processor_id(), v);
 
-	v1 = v1 & 0xff;
-	while (v1) {
-		if (v1 & 0x1)
+	v &= 0xff;
+	while (v) {
+		if (v & 0x1)
 			apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]);
 		i++;
-		v1 >>= 1;
+		v >>= 1;
 	}
 
 	apic_printk(APIC_DEBUG, KERN_CONT "\n");
-- 
cgit v1.2.3


From 1c59a861d6982edf3f9905ad2098575336ae904d Mon Sep 17 00:00:00 2001
From: Eugene Crosser <Eugene.Crosser@ru.ibm.com>
Date: Wed, 24 Apr 2013 12:00:23 +0200
Subject: s390/qdio: bridgeport support - CHSC part

Introduce function for the "Perform network-subchannel operation"
CHSC command with operation code "bridgeport information",
and bit definitions for "characteristics" pertaning to this command.

Signed-off-by: Eugene Crosser <eugene.crosser@ru.ibm.com>
Reviewed-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/css_chars.h |  2 +
 arch/s390/include/asm/qdio.h      | 33 ++++++++++++++
 drivers/s390/cio/chsc.c           | 33 ++++++++++++++
 drivers/s390/cio/chsc.h           | 51 +++++++++++++++++++++-
 drivers/s390/cio/qdio_main.c      | 91 +++++++++++++++++++++++++++++++++++++++
 5 files changed, 209 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/css_chars.h b/arch/s390/include/asm/css_chars.h
index 7e1c917bbba2..09d1dd46bd57 100644
--- a/arch/s390/include/asm/css_chars.h
+++ b/arch/s390/include/asm/css_chars.h
@@ -29,6 +29,8 @@ struct css_general_char {
 	u32 fcx : 1;	 /* bit 88 */
 	u32 : 19;
 	u32 alt_ssi : 1; /* bit 108 */
+	u32:1;
+	u32 narf:1;	 /* bit 110 */
 } __packed;
 
 extern struct css_general_char css_general_characteristics;
diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h
index 57d0d7e794b1..0a1abf1e69af 100644
--- a/arch/s390/include/asm/qdio.h
+++ b/arch/s390/include/asm/qdio.h
@@ -378,6 +378,34 @@ struct qdio_initialize {
 	struct qdio_outbuf_state *output_sbal_state_array;
 };
 
+/**
+ * enum qdio_brinfo_entry_type - type of address entry for qdio_brinfo_desc()
+ * @l3_ipv6_addr: entry contains IPv6 address
+ * @l3_ipv4_addr: entry contains IPv4 address
+ * @l2_addr_lnid: entry contains MAC address and VLAN ID
+ */
+enum qdio_brinfo_entry_type {l3_ipv6_addr, l3_ipv4_addr, l2_addr_lnid};
+
+/**
+ * struct qdio_brinfo_entry_XXX - Address entry for qdio_brinfo_desc()
+ * @nit:  Network interface token
+ * @addr: Address of one of the three types
+ *
+ * The struct is passed to the callback function by qdio_brinfo_desc()
+ */
+struct qdio_brinfo_entry_l3_ipv6 {
+	u64 nit;
+	struct { unsigned char _s6_addr[16]; } addr;
+} __packed;
+struct qdio_brinfo_entry_l3_ipv4 {
+	u64 nit;
+	struct { uint32_t _s_addr; } addr;
+} __packed;
+struct qdio_brinfo_entry_l2 {
+	u64 nit;
+	struct { u8 mac[6]; u16 lnid; } addr_lnid;
+} __packed;
+
 #define QDIO_STATE_INACTIVE		0x00000002 /* after qdio_cleanup */
 #define QDIO_STATE_ESTABLISHED		0x00000004 /* after qdio_establish */
 #define QDIO_STATE_ACTIVE		0x00000008 /* after qdio_activate */
@@ -399,5 +427,10 @@ extern int qdio_get_next_buffers(struct ccw_device *, int, int *, int *);
 extern int qdio_shutdown(struct ccw_device *, int);
 extern int qdio_free(struct ccw_device *);
 extern int qdio_get_ssqd_desc(struct ccw_device *, struct qdio_ssqd_desc *);
+extern int qdio_pnso_brinfo(struct subchannel_id schid,
+		int cnc, u16 *response,
+		void (*cb)(void *priv, enum qdio_brinfo_entry_type type,
+				void *entry),
+		void *priv);
 
 #endif /* __QDIO_H__ */
diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c
index eee70cb8730b..f6b9188c5af5 100644
--- a/drivers/s390/cio/chsc.c
+++ b/drivers/s390/cio/chsc.c
@@ -55,6 +55,7 @@ int chsc_error_from_response(int response)
 	case 0x0004:
 		return -EOPNOTSUPP;
 	case 0x000b:
+	case 0x0107:		/* "Channel busy" for the op 0x003d */
 		return -EBUSY;
 	case 0x0100:
 	case 0x0102:
@@ -1202,3 +1203,35 @@ out:
 	return ret;
 }
 EXPORT_SYMBOL_GPL(chsc_scm_info);
+
+/**
+ * chsc_pnso_brinfo() - Perform Network-Subchannel Operation, Bridge Info.
+ * @schid:		id of the subchannel on which PNSO is performed
+ * @brinfo_area:	request and response block for the operation
+ * @resume_token:	resume token for multiblock response
+ * @cnc:		Boolean change-notification control
+ *
+ * brinfo_area must be allocated by the caller with get_zeroed_page(GFP_KERNEL)
+ *
+ * Returns 0 on success.
+ */
+int chsc_pnso_brinfo(struct subchannel_id schid,
+		struct chsc_pnso_area *brinfo_area,
+		struct chsc_brinfo_resume_token resume_token,
+		int cnc)
+{
+	memset(brinfo_area, 0, sizeof(*brinfo_area));
+	brinfo_area->request.length = 0x0030;
+	brinfo_area->request.code = 0x003d; /* network-subchannel operation */
+	brinfo_area->m	   = schid.m;
+	brinfo_area->ssid  = schid.ssid;
+	brinfo_area->sch   = schid.sch_no;
+	brinfo_area->cssid = schid.cssid;
+	brinfo_area->oc    = 0; /* Store-network-bridging-information list */
+	brinfo_area->resume_token = resume_token;
+	brinfo_area->n	   = (cnc != 0);
+	if (chsc(brinfo_area))
+		return -EIO;
+	return chsc_error_from_response(brinfo_area->response.code);
+}
+EXPORT_SYMBOL_GPL(chsc_pnso_brinfo);
diff --git a/drivers/s390/cio/chsc.h b/drivers/s390/cio/chsc.h
index 23d072e70eb2..7e53a9c8b0b9 100644
--- a/drivers/s390/cio/chsc.h
+++ b/drivers/s390/cio/chsc.h
@@ -61,7 +61,9 @@ struct css_chsc_char {
 	u32 : 20;
 	u32 scssc : 1;  /* bit 107 */
 	u32 scsscf : 1; /* bit 108 */
-	u32 : 19;
+	u32:7;
+	u32 pnso:1; /* bit 116 */
+	u32:11;
 }__attribute__((packed));
 
 extern struct css_chsc_char css_chsc_characteristics;
@@ -188,6 +190,53 @@ struct chsc_scm_info {
 
 int chsc_scm_info(struct chsc_scm_info *scm_area, u64 token);
 
+struct chsc_brinfo_resume_token {
+	u64 t1;
+	u64 t2;
+} __packed;
+
+struct chsc_brinfo_naihdr {
+	struct chsc_brinfo_resume_token resume_token;
+	u32:32;
+	u32 instance;
+	u32:24;
+	u8 naids;
+	u32 reserved[3];
+} __packed;
+
+struct chsc_pnso_area {
+	struct chsc_header request;
+	u8:2;
+	u8 m:1;
+	u8:5;
+	u8:2;
+	u8 ssid:2;
+	u8 fmt:4;
+	u16 sch;
+	u8:8;
+	u8 cssid;
+	u16:16;
+	u8 oc;
+	u32:24;
+	struct chsc_brinfo_resume_token resume_token;
+	u32 n:1;
+	u32:31;
+	u32 reserved[3];
+	struct chsc_header response;
+	u32:32;
+	struct chsc_brinfo_naihdr naihdr;
+	union {
+		struct qdio_brinfo_entry_l3_ipv6 l3_ipv6[0];
+		struct qdio_brinfo_entry_l3_ipv4 l3_ipv4[0];
+		struct qdio_brinfo_entry_l2	 l2[0];
+	} entries;
+} __packed;
+
+int chsc_pnso_brinfo(struct subchannel_id schid,
+		struct chsc_pnso_area *brinfo_area,
+		struct chsc_brinfo_resume_token resume_token,
+		int cnc);
+
 #ifdef CONFIG_SCM_BUS
 int scm_update_information(void);
 int scm_process_availability_information(void);
diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c
index 3e602e8affa7..c883a085c059 100644
--- a/drivers/s390/cio/qdio_main.c
+++ b/drivers/s390/cio/qdio_main.c
@@ -1752,6 +1752,97 @@ int qdio_stop_irq(struct ccw_device *cdev, int nr)
 }
 EXPORT_SYMBOL(qdio_stop_irq);
 
+/**
+ * qdio_pnso_brinfo() - perform network subchannel op #0 - bridge info.
+ * @schid:		Subchannel ID.
+ * @cnc:		Boolean Change-Notification Control
+ * @response:		Response code will be stored at this address
+ * @cb: 		Callback function will be executed for each element
+ *			of the address list
+ * @priv:		Pointer passed from the caller to qdio_pnso_brinfo()
+ * @type:		Type of the address entry passed to the callback
+ * @entry:		Entry containg the address of the specified type
+ * @priv:		Pointer to pass to the callback function.
+ *
+ * Performs "Store-network-bridging-information list" operation and calls
+ * the callback function for every entry in the list. If "change-
+ * notification-control" is set, further changes in the address list
+ * will be reported via the IPA command.
+ */
+int qdio_pnso_brinfo(struct subchannel_id schid,
+		int cnc, u16 *response,
+		void (*cb)(void *priv, enum qdio_brinfo_entry_type type,
+				void *entry),
+		void *priv)
+{
+	struct chsc_pnso_area *rr;
+	int rc;
+	u32 prev_instance = 0;
+	int isfirstblock = 1;
+	int i, size, elems;
+
+	rr = (struct chsc_pnso_area *)get_zeroed_page(GFP_KERNEL);
+	if (rr == NULL)
+		return -ENOMEM;
+	do {
+		/* on the first iteration, naihdr.resume_token will be zero */
+		rc = chsc_pnso_brinfo(schid, rr, rr->naihdr.resume_token, cnc);
+		if (rc != 0 && rc != -EBUSY)
+			goto out;
+		if (rr->response.code != 1) {
+			rc = -EIO;
+			continue;
+		} else
+			rc = 0;
+
+		if (cb == NULL)
+			continue;
+
+		size = rr->naihdr.naids;
+		elems = (rr->response.length -
+				sizeof(struct chsc_header) -
+				sizeof(struct chsc_brinfo_naihdr)) /
+				size;
+
+		if (!isfirstblock && (rr->naihdr.instance != prev_instance)) {
+			/* Inform the caller that they need to scrap */
+			/* the data that was already reported via cb */
+				rc = -EAGAIN;
+				break;
+		}
+		isfirstblock = 0;
+		prev_instance = rr->naihdr.instance;
+		for (i = 0; i < elems; i++)
+			switch (size) {
+			case sizeof(struct qdio_brinfo_entry_l3_ipv6):
+				(*cb)(priv, l3_ipv6_addr,
+						&rr->entries.l3_ipv6[i]);
+				break;
+			case sizeof(struct qdio_brinfo_entry_l3_ipv4):
+				(*cb)(priv, l3_ipv4_addr,
+						&rr->entries.l3_ipv4[i]);
+				break;
+			case sizeof(struct qdio_brinfo_entry_l2):
+				(*cb)(priv, l2_addr_lnid,
+						&rr->entries.l2[i]);
+				break;
+			default:
+				WARN_ON_ONCE(1);
+				rc = -EIO;
+				goto out;
+			}
+	} while (rr->response.code == 0x0107 ||  /* channel busy */
+		  (rr->response.code == 1 && /* list stored */
+		   /* resume token is non-zero => list incomplete */
+		   (rr->naihdr.resume_token.t1 || rr->naihdr.resume_token.t2)));
+	(*response) = rr->response.code;
+
+out:
+	free_page((unsigned long)rr);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(qdio_pnso_brinfo);
+
 static int __init init_QDIO(void)
 {
 	int rc;
-- 
cgit v1.2.3


From 19259943f0954dcd1817f94776376bf51c6a46d5 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Date: Sat, 7 Dec 2013 21:02:36 +0800
Subject: x86, kaslr: Remove unused including <linux/version.h>

Remove including <linux/version.h> that don't need it.

Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Link: http://lkml.kernel.org/r/CAPgLHd-Fjx1RybjWFAu1vHRfTvhWwMLL3x46BouC5uNxHPjy1A@mail.gmail.com
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/boot/compressed/aslr.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c
index 84be1752dcd8..90a21f430117 100644
--- a/arch/x86/boot/compressed/aslr.c
+++ b/arch/x86/boot/compressed/aslr.c
@@ -10,7 +10,6 @@
 #include <linux/uts.h>
 #include <linux/utsname.h>
 #include <generated/utsrelease.h>
-#include <linux/version.h>
 
 /* Simplified build-specific string for starting entropy. */
 static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
-- 
cgit v1.2.3


From da2b6fb990cf782b18952f534ec7323453bc4fc9 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 10 Dec 2013 12:27:45 -0800
Subject: x86, kaslr: Clarify RANDOMIZE_BASE_MAX_OFFSET

The help text for RANDOMIZE_BASE_MAX_OFFSET was confusing. This has been
clarified, and updated to be an export-only tunable.

Signed-off-by: Kees Cook <keescook@chromium.org>
Link: http://lkml.kernel.org/r/20131210202745.GA2961@www.outflux.net
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/Kconfig | 29 ++++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 596cd9edeb9c..5c9e19dccf2f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1747,26 +1747,33 @@ config RANDOMIZE_BASE
 	   possible. At best, due to page table layouts, 64-bit can use
 	   9 bits of entropy and 32-bit uses 8 bits.
 
+	   If unsure, say N.
+
 config RANDOMIZE_BASE_MAX_OFFSET
-	hex "Maximum ASLR offset allowed"
+	hex "Maximum kASLR offset allowed" if EXPERT
 	depends on RANDOMIZE_BASE
 	range 0x0 0x20000000 if X86_32
 	default "0x20000000" if X86_32
 	range 0x0 0x40000000 if X86_64
 	default "0x40000000" if X86_64
 	---help---
-	 Determines the maximal offset in bytes that will be applied to the
-	 kernel when Address Space Layout Randomization (ASLR) is active.
-	 Must be less than or equal to the actual physical memory on the
-	 system. This must be a multiple of CONFIG_PHYSICAL_ALIGN.
+	  The lesser of RANDOMIZE_BASE_MAX_OFFSET and available physical
+	  memory is used to determine the maximal offset in bytes that will
+	  be applied to the kernel when kernel Address Space Layout
+	  Randomization (kASLR) is active. This must be a multiple of
+	  PHYSICAL_ALIGN.
+
+	  On 32-bit this is limited to 512MiB by page table layouts. The
+	  default is 512MiB.
 
-	 On 32-bit this is limited to 512MiB.
+	  On 64-bit this is limited by how the kernel fixmap page table is
+	  positioned, so this cannot be larger than 1GiB currently. Without
+	  RANDOMIZE_BASE, there is a 512MiB to 1.5GiB split between kernel
+	  and modules. When RANDOMIZE_BASE_MAX_OFFSET is above 512MiB, the
+	  modules area will shrink to compensate, up to the current maximum
+	  1GiB to 1GiB split. The default is 1GiB.
 
-	 On 64-bit this is limited by how the kernel fixmap page table is
-	 positioned, so this cannot be larger that 1GiB currently. Normally
-	 there is a 512MiB to 1.5GiB split between kernel and modules. When
-	 this is raised above the 512MiB default, the modules area will
-	 shrink to compensate, up to the current maximum 1GiB to 1GiB split.
+	  If unsure, leave at the default value.
 
 # Relocation on x86 needs some additional build support
 config X86_NEED_RELOCS
-- 
cgit v1.2.3


From 3b56496865f9f7d9bcb2f93b44c63f274f08e3b6 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Wed, 15 Jan 2014 00:07:11 +0100
Subject: x86, cpu, amd: Add workaround for family 16h, erratum 793

This adds the workaround for erratum 793 as a precaution in case not
every BIOS implements it.  This addresses CVE-2013-6885.

Erratum text:

[Revision Guide for AMD Family 16h Models 00h-0Fh Processors,
document 51810 Rev. 3.04 November 2013]

793 Specific Combination of Writes to Write Combined Memory Types and
Locked Instructions May Cause Core Hang

Description

Under a highly specific and detailed set of internal timing
conditions, a locked instruction may trigger a timing sequence whereby
the write to a write combined memory type is not flushed, causing the
locked instruction to stall indefinitely.

Potential Effect on System

Processor core hang.

Suggested Workaround

BIOS should set MSR
C001_1020[15] = 1b.

Fix Planned

No fix planned

[ hpa: updated description, fixed typo in MSR name ]

Signed-off-by: Borislav Petkov <bp@suse.de>
Link: http://lkml.kernel.org/r/20140114230711.GS29865@pd.tnic
Tested-by: Aravind Gopalakrishnan <aravind.gopalakrishnan@amd.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/include/uapi/asm/msr-index.h |  1 +
 arch/x86/kernel/cpu/amd.c             | 10 ++++++++++
 2 files changed, 11 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index 37813b5ddc37..59cea185ad1d 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -184,6 +184,7 @@
 #define MSR_AMD64_PATCH_LOADER		0xc0010020
 #define MSR_AMD64_OSVW_ID_LENGTH	0xc0010140
 #define MSR_AMD64_OSVW_STATUS		0xc0010141
+#define MSR_AMD64_LS_CFG		0xc0011020
 #define MSR_AMD64_DC_CFG		0xc0011022
 #define MSR_AMD64_BU_CFG2		0xc001102a
 #define MSR_AMD64_IBSFETCHCTL		0xc0011030
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index bca023bdd6b2..59bfebc8c805 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -508,6 +508,16 @@ static void early_init_amd(struct cpuinfo_x86 *c)
 			set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
 	}
 #endif
+
+	/* F16h erratum 793, CVE-2013-6885 */
+	if (c->x86 == 0x16 && c->x86_model <= 0xf) {
+		u64 val;
+
+		rdmsrl(MSR_AMD64_LS_CFG, val);
+		if (!(val & BIT(15)))
+			wrmsrl(MSR_AMD64_LS_CFG, val | BIT(15));
+	}
+
 }
 
 static const int amd_erratum_383[];
-- 
cgit v1.2.3


From bad009fe354a00e6b2bf87328995ec76e59ab970 Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhc@lemote.com>
Date: Tue, 14 Jan 2014 17:56:37 -0800
Subject: MIPS: fix case mismatch in local_r4k_flush_icache_range()

Currently, Loongson-2 call protected_blast_icache_range() and others
call protected_loongson23_blast_icache_range(), but I think the correct
behavior should be the opposite.  BTW, Loongson-3's cache-ops is
compatible with MIPS64, but not compatible with Loongson-2.  So, rename
xxx_loongson23_yyy things to xxx_loongson2_yyy.

The patch fixes early boot hang with 3.13-rc1, introduced in commit
14bd8c082016 ("MIPS: Loongson: Get rid of Loongson 2 #ifdefery all over
arch/mips").

Signed-off-by: Huacai Chen <chenhc@lemote.com>
Signed-off-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Acked-by: John Crispin <blogic@openwrt.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/mips/include/asm/cacheops.h | 2 +-
 arch/mips/include/asm/r4kcache.h | 8 ++++----
 arch/mips/mm/c-r4k.c             | 4 ++--
 3 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/cacheops.h b/arch/mips/include/asm/cacheops.h
index c75025f27c20..06b9bc7ea14b 100644
--- a/arch/mips/include/asm/cacheops.h
+++ b/arch/mips/include/asm/cacheops.h
@@ -83,6 +83,6 @@
 /*
  * Loongson2-specific cacheops
  */
-#define Hit_Invalidate_I_Loongson23	0x00
+#define Hit_Invalidate_I_Loongson2	0x00
 
 #endif	/* __ASM_CACHEOPS_H */
diff --git a/arch/mips/include/asm/r4kcache.h b/arch/mips/include/asm/r4kcache.h
index 34d1a1917125..91d20b08246f 100644
--- a/arch/mips/include/asm/r4kcache.h
+++ b/arch/mips/include/asm/r4kcache.h
@@ -165,7 +165,7 @@ static inline void flush_icache_line(unsigned long addr)
 	__iflush_prologue
 	switch (boot_cpu_type()) {
 	case CPU_LOONGSON2:
-		cache_op(Hit_Invalidate_I_Loongson23, addr);
+		cache_op(Hit_Invalidate_I_Loongson2, addr);
 		break;
 
 	default:
@@ -219,7 +219,7 @@ static inline void protected_flush_icache_line(unsigned long addr)
 {
 	switch (boot_cpu_type()) {
 	case CPU_LOONGSON2:
-		protected_cache_op(Hit_Invalidate_I_Loongson23, addr);
+		protected_cache_op(Hit_Invalidate_I_Loongson2, addr);
 		break;
 
 	default:
@@ -452,8 +452,8 @@ static inline void prot##extra##blast_##pfx##cache##_range(unsigned long start,
 __BUILD_BLAST_CACHE_RANGE(d, dcache, Hit_Writeback_Inv_D, protected_, )
 __BUILD_BLAST_CACHE_RANGE(s, scache, Hit_Writeback_Inv_SD, protected_, )
 __BUILD_BLAST_CACHE_RANGE(i, icache, Hit_Invalidate_I, protected_, )
-__BUILD_BLAST_CACHE_RANGE(i, icache, Hit_Invalidate_I_Loongson23, \
-	protected_, loongson23_)
+__BUILD_BLAST_CACHE_RANGE(i, icache, Hit_Invalidate_I_Loongson2, \
+	protected_, loongson2_)
 __BUILD_BLAST_CACHE_RANGE(d, dcache, Hit_Writeback_Inv_D, , )
 __BUILD_BLAST_CACHE_RANGE(s, scache, Hit_Writeback_Inv_SD, , )
 /* blast_inv_dcache_range */
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index 62ffd20ea869..73f02da61baf 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -580,11 +580,11 @@ static inline void local_r4k_flush_icache_range(unsigned long start, unsigned lo
 	else {
 		switch (boot_cpu_type()) {
 		case CPU_LOONGSON2:
-			protected_blast_icache_range(start, end);
+			protected_loongson2_blast_icache_range(start, end);
 			break;
 
 		default:
-			protected_loongson23_blast_icache_range(start, end);
+			protected_blast_icache_range(start, end);
 			break;
 		}
 	}
-- 
cgit v1.2.3


From 43a06847b9d277e9f2c3bf8052b44b74e17526c7 Mon Sep 17 00:00:00 2001
From: Aaro Koskinen <aaro.koskinen@iki.fi>
Date: Tue, 14 Jan 2014 17:56:38 -0800
Subject: MIPS: fix blast_icache32 on loongson2

Commit 14bd8c082016 ("MIPS: Loongson: Get rid of Loongson 2 #ifdefery
all over arch/mips") failed to add Loongson2 specific blast_icache32
functions.  Fix that.

The patch fixes the following crash seen with 3.13-rc1:

  Reserved instruction in kernel code[#1]:
  [...]
  Call Trace:
    blast_icache32_page+0x8/0xb0
    r4k_flush_cache_page+0x19c/0x200
    do_wp_page.isra.97+0x47c/0xe08
    handle_mm_fault+0x938/0x1118
    __do_page_fault+0x140/0x540
    resume_userspace_check+0x0/0x10
  Code: 00200825  64834000  00200825 <bc900000> bc900020  bc900040  bc900060  bc900080  bc9000a0

Signed-off-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Acked-by: John Crispin <blogic@openwrt.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/mips/include/asm/r4kcache.h | 43 ++++++++++++++++++++--------------------
 arch/mips/mm/c-r4k.c             |  7 +++++++
 2 files changed, 29 insertions(+), 21 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/r4kcache.h b/arch/mips/include/asm/r4kcache.h
index 91d20b08246f..c84caddb8bde 100644
--- a/arch/mips/include/asm/r4kcache.h
+++ b/arch/mips/include/asm/r4kcache.h
@@ -357,8 +357,8 @@ static inline void invalidate_tcache_page(unsigned long addr)
 		  "i" (op));
 
 /* build blast_xxx, blast_xxx_page, blast_xxx_page_indexed */
-#define __BUILD_BLAST_CACHE(pfx, desc, indexop, hitop, lsize) \
-static inline void blast_##pfx##cache##lsize(void)			\
+#define __BUILD_BLAST_CACHE(pfx, desc, indexop, hitop, lsize, extra)	\
+static inline void extra##blast_##pfx##cache##lsize(void)		\
 {									\
 	unsigned long start = INDEX_BASE;				\
 	unsigned long end = start + current_cpu_data.desc.waysize;	\
@@ -376,7 +376,7 @@ static inline void blast_##pfx##cache##lsize(void)			\
 	__##pfx##flush_epilogue						\
 }									\
 									\
-static inline void blast_##pfx##cache##lsize##_page(unsigned long page) \
+static inline void extra##blast_##pfx##cache##lsize##_page(unsigned long page) \
 {									\
 	unsigned long start = page;					\
 	unsigned long end = page + PAGE_SIZE;				\
@@ -391,7 +391,7 @@ static inline void blast_##pfx##cache##lsize##_page(unsigned long page) \
 	__##pfx##flush_epilogue						\
 }									\
 									\
-static inline void blast_##pfx##cache##lsize##_page_indexed(unsigned long page) \
+static inline void extra##blast_##pfx##cache##lsize##_page_indexed(unsigned long page) \
 {									\
 	unsigned long indexmask = current_cpu_data.desc.waysize - 1;	\
 	unsigned long start = INDEX_BASE + (page & indexmask);		\
@@ -410,23 +410,24 @@ static inline void blast_##pfx##cache##lsize##_page_indexed(unsigned long page)
 	__##pfx##flush_epilogue						\
 }
 
-__BUILD_BLAST_CACHE(d, dcache, Index_Writeback_Inv_D, Hit_Writeback_Inv_D, 16)
-__BUILD_BLAST_CACHE(i, icache, Index_Invalidate_I, Hit_Invalidate_I, 16)
-__BUILD_BLAST_CACHE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 16)
-__BUILD_BLAST_CACHE(d, dcache, Index_Writeback_Inv_D, Hit_Writeback_Inv_D, 32)
-__BUILD_BLAST_CACHE(i, icache, Index_Invalidate_I, Hit_Invalidate_I, 32)
-__BUILD_BLAST_CACHE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 32)
-__BUILD_BLAST_CACHE(d, dcache, Index_Writeback_Inv_D, Hit_Writeback_Inv_D, 64)
-__BUILD_BLAST_CACHE(i, icache, Index_Invalidate_I, Hit_Invalidate_I, 64)
-__BUILD_BLAST_CACHE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 64)
-__BUILD_BLAST_CACHE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 128)
-
-__BUILD_BLAST_CACHE(inv_d, dcache, Index_Writeback_Inv_D, Hit_Invalidate_D, 16)
-__BUILD_BLAST_CACHE(inv_d, dcache, Index_Writeback_Inv_D, Hit_Invalidate_D, 32)
-__BUILD_BLAST_CACHE(inv_s, scache, Index_Writeback_Inv_SD, Hit_Invalidate_SD, 16)
-__BUILD_BLAST_CACHE(inv_s, scache, Index_Writeback_Inv_SD, Hit_Invalidate_SD, 32)
-__BUILD_BLAST_CACHE(inv_s, scache, Index_Writeback_Inv_SD, Hit_Invalidate_SD, 64)
-__BUILD_BLAST_CACHE(inv_s, scache, Index_Writeback_Inv_SD, Hit_Invalidate_SD, 128)
+__BUILD_BLAST_CACHE(d, dcache, Index_Writeback_Inv_D, Hit_Writeback_Inv_D, 16, )
+__BUILD_BLAST_CACHE(i, icache, Index_Invalidate_I, Hit_Invalidate_I, 16, )
+__BUILD_BLAST_CACHE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 16, )
+__BUILD_BLAST_CACHE(d, dcache, Index_Writeback_Inv_D, Hit_Writeback_Inv_D, 32, )
+__BUILD_BLAST_CACHE(i, icache, Index_Invalidate_I, Hit_Invalidate_I, 32, )
+__BUILD_BLAST_CACHE(i, icache, Index_Invalidate_I, Hit_Invalidate_I_Loongson2, 32, loongson2_)
+__BUILD_BLAST_CACHE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 32, )
+__BUILD_BLAST_CACHE(d, dcache, Index_Writeback_Inv_D, Hit_Writeback_Inv_D, 64, )
+__BUILD_BLAST_CACHE(i, icache, Index_Invalidate_I, Hit_Invalidate_I, 64, )
+__BUILD_BLAST_CACHE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 64, )
+__BUILD_BLAST_CACHE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 128, )
+
+__BUILD_BLAST_CACHE(inv_d, dcache, Index_Writeback_Inv_D, Hit_Invalidate_D, 16, )
+__BUILD_BLAST_CACHE(inv_d, dcache, Index_Writeback_Inv_D, Hit_Invalidate_D, 32, )
+__BUILD_BLAST_CACHE(inv_s, scache, Index_Writeback_Inv_SD, Hit_Invalidate_SD, 16, )
+__BUILD_BLAST_CACHE(inv_s, scache, Index_Writeback_Inv_SD, Hit_Invalidate_SD, 32, )
+__BUILD_BLAST_CACHE(inv_s, scache, Index_Writeback_Inv_SD, Hit_Invalidate_SD, 64, )
+__BUILD_BLAST_CACHE(inv_s, scache, Index_Writeback_Inv_SD, Hit_Invalidate_SD, 128, )
 
 /* build blast_xxx_range, protected_blast_xxx_range */
 #define __BUILD_BLAST_CACHE_RANGE(pfx, desc, hitop, prot, extra)	\
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index 73f02da61baf..49e572d879e1 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -237,6 +237,8 @@ static void r4k_blast_icache_page_setup(void)
 		r4k_blast_icache_page = (void *)cache_noop;
 	else if (ic_lsize == 16)
 		r4k_blast_icache_page = blast_icache16_page;
+	else if (ic_lsize == 32 && current_cpu_type() == CPU_LOONGSON2)
+		r4k_blast_icache_page = loongson2_blast_icache32_page;
 	else if (ic_lsize == 32)
 		r4k_blast_icache_page = blast_icache32_page;
 	else if (ic_lsize == 64)
@@ -261,6 +263,9 @@ static void r4k_blast_icache_page_indexed_setup(void)
 		else if (TX49XX_ICACHE_INDEX_INV_WAR)
 			r4k_blast_icache_page_indexed =
 				tx49_blast_icache32_page_indexed;
+		else if (current_cpu_type() == CPU_LOONGSON2)
+			r4k_blast_icache_page_indexed =
+				loongson2_blast_icache32_page_indexed;
 		else
 			r4k_blast_icache_page_indexed =
 				blast_icache32_page_indexed;
@@ -284,6 +289,8 @@ static void r4k_blast_icache_setup(void)
 			r4k_blast_icache = blast_r4600_v1_icache32;
 		else if (TX49XX_ICACHE_INDEX_INV_WAR)
 			r4k_blast_icache = tx49_blast_icache32;
+		else if (current_cpu_type() == CPU_LOONGSON2)
+			r4k_blast_icache = loongson2_blast_icache32;
 		else
 			r4k_blast_icache = blast_icache32;
 	} else if (ic_lsize == 64)
-- 
cgit v1.2.3


From d139336700a5f3a560da235e4dfcd286773025d4 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Wed, 15 Jan 2014 12:52:15 +0100
Subject: x86, cpu, amd: Fix a shadowed variable situation

Having u32 and struct cpuinfo_x86 * by the same name is not very smart,
although it was ok in this case due to the limited scope of u32 c and it
being used only once in there.

Fix this.

Signed-off-by: Borislav Petkov <bp@suse.de>
Link: http://lkml.kernel.org/r/1389786735-16751-1-git-send-email-bp@alien8.de
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/cpu/amd.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 39bc78dad377..e5647ab5fc23 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -789,14 +789,10 @@ static void cpu_detect_tlb_amd(struct cpuinfo_x86 *c)
 	}
 
 	/* Handle DTLB 2M and 4M sizes, fall back to L1 if L2 is disabled */
-	if (!((eax >> 16) & mask)) {
-		u32 a, b, c, d;
-
-		cpuid(0x80000005, &a, &b, &c, &d);
-		tlb_lld_2m[ENTRIES] = (a >> 16) & 0xff;
-	} else {
+	if (!((eax >> 16) & mask))
+		tlb_lld_2m[ENTRIES] = (cpuid_eax(0x80000005) >> 16) & 0xff;
+	else
 		tlb_lld_2m[ENTRIES] = (eax >> 16) & mask;
-	}
 
 	/* a 4M entry uses two 2M entries */
 	tlb_lld_4m[ENTRIES] = tlb_lld_2m[ENTRIES] >> 1;
-- 
cgit v1.2.3


From 0dce7cd67fd9055c4a2ff278f8af1431e646d346 Mon Sep 17 00:00:00 2001
From: Andrew Jones <drjones@redhat.com>
Date: Wed, 15 Jan 2014 13:39:59 +0100
Subject: kvm: x86: fix apic_base enable check

Commit e66d2ae7c67bd moved the assignment
vcpu->arch.apic_base = value above a condition with
(vcpu->arch.apic_base ^ value), causing that check
to always fail. Use old_value, vcpu->arch.apic_base's
old value, in the condition instead.

Signed-off-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/lapic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 1673940cf9c3..775702f649ca 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1355,7 +1355,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
 	vcpu->arch.apic_base = value;
 
 	/* update jump label if enable bit changes */
-	if ((vcpu->arch.apic_base ^ value) & MSR_IA32_APICBASE_ENABLE) {
+	if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) {
 		if (value & MSR_IA32_APICBASE_ENABLE)
 			static_key_slow_dec_deferred(&apic_hw_disabled);
 		else
-- 
cgit v1.2.3


From 151e0c7de616310f95393d9306903900fcd8b277 Mon Sep 17 00:00:00 2001
From: HATAYAMA Daisuke <d.hatayama@jp.fujitsu.com>
Date: Wed, 15 Jan 2014 15:44:58 +0900
Subject: x86, apic, kexec: Add disable_cpu_apicid kernel parameter

Add disable_cpu_apicid kernel parameter. To use this kernel parameter,
specify an initial APIC ID of the corresponding CPU you want to
disable.

This is mostly used for the kdump 2nd kernel to disable BSP to wake up
multiple CPUs without causing system reset or hang due to sending INIT
from AP to BSP.

Kdump users first figure out initial APIC ID of the BSP, CPU0 in the
1st kernel, for example from /proc/cpuinfo and then set up this kernel
parameter for the 2nd kernel using the obtained APIC ID.

However, doing this procedure at each boot time manually is awkward,
which should be automatically done by user-land service scripts, for
example, kexec-tools on fedora/RHEL distributions.

This design is more flexible than disabling BSP in kernel boot time
automatically in that in kernel boot time we have no choice but
referring to ACPI/MP table to obtain initial APIC ID for BSP, meaning
that the method is not applicable to the systems without such BIOS
tables.

One assumption behind this design is that users get initial APIC ID of
the BSP in still healthy state and so BSP is uniquely kept in
CPU0. Thus, through the kernel parameter, only one initial APIC ID can
be specified.

In a comparison with disabled_cpu_apicid, we use read_apic_id(), not
boot_cpu_physical_apicid, because on some platforms, the variable is
modified to the apicid reported as BSP through MP table and this
function is executed with the temporarily modified
boot_cpu_physical_apicid. As a result, disabled_cpu_apicid kernel
parameter doesn't work well for apicids of APs.

Fixing the wrong handling of boot_cpu_physical_apicid requires some
reviews and tests beyond some platforms and it could take some
time. The fix here is a kind of workaround to focus on the main topic
of this patch.

Signed-off-by: HATAYAMA Daisuke <d.hatayama@jp.fujitsu.com>
Link: http://lkml.kernel.org/r/20140115064458.1545.38775.stgit@localhost6.localdomain6
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 Documentation/kernel-parameters.txt |  9 +++++++
 arch/x86/kernel/apic/apic.c         | 49 +++++++++++++++++++++++++++++++++++++
 2 files changed, 58 insertions(+)

(limited to 'arch')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index b9e9bd854298..6fdbf8c968e6 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -774,6 +774,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 	disable=	[IPV6]
 			See Documentation/networking/ipv6.txt.
 
+	disable_cpu_apicid= [X86,APIC,SMP]
+			Format: <int>
+			The number of initial APIC ID for the
+			corresponding CPU to be disabled at boot,
+			mostly used for the kdump 2nd kernel to
+			disable BSP to wake up multiple CPUs without
+			causing system reset or hang due to sending
+			INIT from AP to BSP.
+
 	disable_ddw     [PPC/PSERIES]
 			Disable Dynamic DMA Window support. Use this if
 			to workaround buggy firmware.
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 4ec1dd64022a..e78ab8c8ac2e 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -74,6 +74,13 @@ unsigned int max_physical_apicid;
  */
 physid_mask_t phys_cpu_present_map;
 
+/*
+ * Processor to be disabled specified by kernel parameter
+ * disable_cpu_apicid=<int>, mostly used for the kdump 2nd kernel to
+ * avoid undefined behaviour caused by sending INIT from AP to BSP.
+ */
+unsigned int disabled_cpu_apicid = BAD_APICID;
+
 /*
  * Map cpu index to physical APIC ID
  */
@@ -2113,6 +2120,39 @@ int generic_processor_info(int apicid, int version)
 	bool boot_cpu_detected = physid_isset(boot_cpu_physical_apicid,
 				phys_cpu_present_map);
 
+	/*
+	 * boot_cpu_physical_apicid is designed to have the apicid
+	 * returned by read_apic_id(), i.e, the apicid of the
+	 * currently booting-up processor. However, on some platforms,
+	 * it is temporarilly modified by the apicid reported as BSP
+	 * through MP table. Concretely:
+	 *
+	 * - arch/x86/kernel/mpparse.c: MP_processor_info()
+	 * - arch/x86/mm/amdtopology.c: amd_numa_init()
+	 * - arch/x86/platform/visws/visws_quirks.c: MP_processor_info()
+	 *
+	 * This function is executed with the modified
+	 * boot_cpu_physical_apicid. So, disabled_cpu_apicid kernel
+	 * parameter doesn't work to disable APs on kdump 2nd kernel.
+	 *
+	 * Since fixing handling of boot_cpu_physical_apicid requires
+	 * another discussion and tests on each platform, we leave it
+	 * for now and here we use read_apic_id() directly in this
+	 * function, generic_processor_info().
+	 */
+	if (disabled_cpu_apicid != BAD_APICID &&
+	    disabled_cpu_apicid != read_apic_id() &&
+	    disabled_cpu_apicid == apicid) {
+		int thiscpu = num_processors + disabled_cpus;
+
+		pr_warning("ACPI: Disabling requested cpu."
+			   " Processor %d/0x%x ignored.\n",
+			   thiscpu, apicid);
+
+		disabled_cpus++;
+		return -ENODEV;
+	}
+
 	/*
 	 * If boot cpu has not been detected yet, then only allow upto
 	 * nr_cpu_ids - 1 processors and keep one slot free for boot cpu
@@ -2591,3 +2631,12 @@ static int __init lapic_insert_resource(void)
  * that is using request_resource
  */
 late_initcall(lapic_insert_resource);
+
+static int __init apic_set_disabled_cpu_apicid(char *arg)
+{
+	if (!arg || !get_option(&arg, &disabled_cpu_apicid))
+		return -EINVAL;
+
+	return 0;
+}
+early_param("disable_cpu_apicid", apic_set_disabled_cpu_apicid);
-- 
cgit v1.2.3


From 5b4d1dbc24bb6fd7179ada0f47be34e27e64decb Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Wed, 15 Jan 2014 13:02:08 -0800
Subject: x86, apic: Make disabled_cpu_apicid static read_mostly, fix typos

Make disabled_cpu_apicid static and read_mostly, and fix a couple of
typos.

Reported-by: Ingo Molnar <mingo@kernel.org>
Link: http://lkml.kernel.org/r/20140115182511.GA22737@gmail.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Cc: HATAYAMA Daisuke <d.hatayama@jp.fujitsu.com>
---
 arch/x86/kernel/apic/apic.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index e78ab8c8ac2e..7f26c9a70a9e 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -79,7 +79,7 @@ physid_mask_t phys_cpu_present_map;
  * disable_cpu_apicid=<int>, mostly used for the kdump 2nd kernel to
  * avoid undefined behaviour caused by sending INIT from AP to BSP.
  */
-unsigned int disabled_cpu_apicid = BAD_APICID;
+static unsigned int disabled_cpu_apicid __read_mostly = BAD_APICID;
 
 /*
  * Map cpu index to physical APIC ID
@@ -2124,7 +2124,7 @@ int generic_processor_info(int apicid, int version)
 	 * boot_cpu_physical_apicid is designed to have the apicid
 	 * returned by read_apic_id(), i.e, the apicid of the
 	 * currently booting-up processor. However, on some platforms,
-	 * it is temporarilly modified by the apicid reported as BSP
+	 * it is temporarily modified by the apicid reported as BSP
 	 * through MP table. Concretely:
 	 *
 	 * - arch/x86/kernel/mpparse.c: MP_processor_info()
@@ -2145,7 +2145,7 @@ int generic_processor_info(int apicid, int version)
 	    disabled_cpu_apicid == apicid) {
 		int thiscpu = num_processors + disabled_cpus;
 
-		pr_warning("ACPI: Disabling requested cpu."
+		pr_warning("APIC: Disabling requested cpu."
 			   " Processor %d/0x%x ignored.\n",
 			   thiscpu, apicid);
 
-- 
cgit v1.2.3


From ecd6910db979bc40ac19f0e71e027132fc906068 Mon Sep 17 00:00:00 2001
From: David Cohen <david.a.cohen@linux.intel.com>
Date: Mon, 16 Dec 2013 12:07:36 -0800
Subject: x86, intel-mid: Move Medfield code out of intel-mid.c core file

In order make the driver more portable and support other Intel MID
(Mobile Internet Device) platforms we need to move Medfield code from
intel-mid.c core to its own mfld.c file.

This patch contains no functional changes.

Signed-off-by: David Cohen <david.a.cohen@linux.intel.com>
Link: http://lkml.kernel.org/r/1387224459-25746-2-git-send-email-david.a.cohen@linux.intel.com
Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/platform/intel-mid/Makefile               |  4 +-
 arch/x86/platform/intel-mid/intel-mid.c            | 37 +---------------
 arch/x86/platform/intel-mid/intel_mid_weak_decls.h | 15 +++++++
 arch/x86/platform/intel-mid/mfld.c                 | 51 ++++++++++++++++++++++
 4 files changed, 70 insertions(+), 37 deletions(-)
 create mode 100644 arch/x86/platform/intel-mid/intel_mid_weak_decls.h
 create mode 100644 arch/x86/platform/intel-mid/mfld.c

(limited to 'arch')

diff --git a/arch/x86/platform/intel-mid/Makefile b/arch/x86/platform/intel-mid/Makefile
index 01cc29ea5ff7..78a14ba0e0db 100644
--- a/arch/x86/platform/intel-mid/Makefile
+++ b/arch/x86/platform/intel-mid/Makefile
@@ -1,6 +1,6 @@
-obj-$(CONFIG_X86_INTEL_MID) += intel-mid.o
-obj-$(CONFIG_X86_INTEL_MID) += intel_mid_vrtc.o
+obj-$(CONFIG_X86_INTEL_MID) += intel-mid.o intel_mid_vrtc.o mfld.o
 obj-$(CONFIG_EARLY_PRINTK_INTEL_MID) += early_printk_intel_mid.o
+
 # SFI specific code
 ifdef CONFIG_X86_INTEL_MID
 obj-$(CONFIG_SFI) += sfi.o device_libs/
diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c
index f90e290f689f..527d6d50643d 100644
--- a/arch/x86/platform/intel-mid/intel-mid.c
+++ b/arch/x86/platform/intel-mid/intel-mid.c
@@ -35,6 +35,8 @@
 #include <asm/apb_timer.h>
 #include <asm/reboot.h>
 
+#include "intel_mid_weak_decls.h"
+
 /*
  * the clockevent devices on Moorestown/Medfield can be APBT or LAPIC clock,
  * cmdline option x86_intel_mid_timer can be used to override the configuration
@@ -61,46 +63,11 @@ enum intel_mid_timer_options intel_mid_timer_options;
 enum intel_mid_cpu_type __intel_mid_cpu_chip;
 EXPORT_SYMBOL_GPL(__intel_mid_cpu_chip);
 
-static void intel_mid_power_off(void)
-{
-}
-
 static void intel_mid_reboot(void)
 {
 	intel_scu_ipc_simple_command(IPCMSG_COLD_BOOT, 0);
 }
 
-static unsigned long __init intel_mid_calibrate_tsc(void)
-{
-	unsigned long fast_calibrate;
-	u32 lo, hi, ratio, fsb;
-
-	rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
-	pr_debug("IA32 perf status is 0x%x, 0x%0x\n", lo, hi);
-	ratio = (hi >> 8) & 0x1f;
-	pr_debug("ratio is %d\n", ratio);
-	if (!ratio) {
-		pr_err("read a zero ratio, should be incorrect!\n");
-		pr_err("force tsc ratio to 16 ...\n");
-		ratio = 16;
-	}
-	rdmsr(MSR_FSB_FREQ, lo, hi);
-	if ((lo & 0x7) == 0x7)
-		fsb = PENWELL_FSB_FREQ_83SKU;
-	else
-		fsb = PENWELL_FSB_FREQ_100SKU;
-	fast_calibrate = ratio * fsb;
-	pr_debug("read penwell tsc %lu khz\n", fast_calibrate);
-	lapic_timer_frequency = fsb * 1000 / HZ;
-	/* mark tsc clocksource as reliable */
-	set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE);
-
-	if (fast_calibrate)
-		return fast_calibrate;
-
-	return 0;
-}
-
 static void __init intel_mid_time_init(void)
 {
 	sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr);
diff --git a/arch/x86/platform/intel-mid/intel_mid_weak_decls.h b/arch/x86/platform/intel-mid/intel_mid_weak_decls.h
new file mode 100644
index 000000000000..519beb75ba4c
--- /dev/null
+++ b/arch/x86/platform/intel-mid/intel_mid_weak_decls.h
@@ -0,0 +1,15 @@
+/*
+ * intel_mid_weak_decls.h: Weak declarations of intel-mid.c
+ *
+ * (C) Copyright 2013 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+
+/* __attribute__((weak)) makes these declarations overridable */
+extern void intel_mid_power_off(void) __attribute__((weak));
+extern unsigned long __init intel_mid_calibrate_tsc(void) __attribute__((weak));
diff --git a/arch/x86/platform/intel-mid/mfld.c b/arch/x86/platform/intel-mid/mfld.c
new file mode 100644
index 000000000000..c7ff83c4576d
--- /dev/null
+++ b/arch/x86/platform/intel-mid/mfld.c
@@ -0,0 +1,51 @@
+/*
+ * mfld.c: Intel Medfield platform setup code
+ *
+ * (C) Copyright 2013 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/init.h>
+
+#include <asm/apic.h>
+#include <asm/intel-mid.h>
+#include <asm/intel_mid_vrtc.h>
+
+void intel_mid_power_off(void)
+{
+}
+
+unsigned long __init intel_mid_calibrate_tsc(void)
+{
+	unsigned long fast_calibrate;
+	u32 lo, hi, ratio, fsb;
+
+	rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
+	pr_debug("IA32 perf status is 0x%x, 0x%0x\n", lo, hi);
+	ratio = (hi >> 8) & 0x1f;
+	pr_debug("ratio is %d\n", ratio);
+	if (!ratio) {
+		pr_err("read a zero ratio, should be incorrect!\n");
+		pr_err("force tsc ratio to 16 ...\n");
+		ratio = 16;
+	}
+	rdmsr(MSR_FSB_FREQ, lo, hi);
+	if ((lo & 0x7) == 0x7)
+		fsb = PENWELL_FSB_FREQ_83SKU;
+	else
+		fsb = PENWELL_FSB_FREQ_100SKU;
+	fast_calibrate = ratio * fsb;
+	pr_debug("read penwell tsc %lu khz\n", fast_calibrate);
+	lapic_timer_frequency = fsb * 1000 / HZ;
+	/* mark tsc clocksource as reliable */
+	set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE);
+
+	if (fast_calibrate)
+		return fast_calibrate;
+
+	return 0;
+}
-- 
cgit v1.2.3


From 85611e3febe78955a519f5f9eb47b941525c8c76 Mon Sep 17 00:00:00 2001
From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Date: Mon, 16 Dec 2013 12:07:37 -0800
Subject: x86, intel-mid: Add Clovertrail platform support

This patch adds Clovertrail support on intel-mid and makes it more
flexible to support other SoCs.

Signed-off-by: David Cohen <david.a.cohen@linux.intel.com>
Link: http://lkml.kernel.org/r/1387224459-25746-3-git-send-email-david.a.cohen@linux.intel.com
Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Signed-off-by: Fei Yang <fei.yang@intel.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/include/asm/intel-mid.h                   | 46 +++++++++++++++++++++-
 arch/x86/platform/intel-mid/intel-mid.c            | 39 ++++++++++++++++--
 arch/x86/platform/intel-mid/intel_mid_weak_decls.h |  7 +++-
 arch/x86/platform/intel-mid/mfld.c                 | 32 +++++++++++++--
 4 files changed, 113 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h
index 459769d39263..f8a831431fe0 100644
--- a/arch/x86/include/asm/intel-mid.h
+++ b/arch/x86/include/asm/intel-mid.h
@@ -51,10 +51,39 @@ struct devs_id {
 enum intel_mid_cpu_type {
 	/* 1 was Moorestown */
 	INTEL_MID_CPU_CHIP_PENWELL = 2,
+	INTEL_MID_CPU_CHIP_CLOVERVIEW,
 };
 
 extern enum intel_mid_cpu_type __intel_mid_cpu_chip;
 
+/**
+ * struct intel_mid_ops - Interface between intel-mid & sub archs
+ * @arch_setup: arch_setup function to re-initialize platform
+ *             structures (x86_init, x86_platform_init)
+ *
+ * This structure can be extended if any new interface is required
+ * between intel-mid & its sub arch files.
+ */
+struct intel_mid_ops {
+	void (*arch_setup)(void);
+};
+
+/* Helper API's for INTEL_MID_OPS_INIT */
+#define DECLARE_INTEL_MID_OPS_INIT(cpuname, cpuid)	\
+				[cpuid] = get_##cpuname##_ops
+
+/* Maximum number of CPU ops */
+#define MAX_CPU_OPS(a) (sizeof(a)/sizeof(void *))
+
+/*
+ * For every new cpu addition, a weak get_<cpuname>_ops() function needs be
+ * declared in arch/x86/platform/intel_mid/intel_mid_weak_decls.h.
+ */
+#define INTEL_MID_OPS_INIT {\
+	DECLARE_INTEL_MID_OPS_INIT(penwell, INTEL_MID_CPU_CHIP_PENWELL), \
+	DECLARE_INTEL_MID_OPS_INIT(cloverview, INTEL_MID_CPU_CHIP_CLOVERVIEW), \
+};
+
 #ifdef CONFIG_X86_INTEL_MID
 
 static inline enum intel_mid_cpu_type intel_mid_identify_cpu(void)
@@ -86,8 +115,21 @@ extern enum intel_mid_timer_options intel_mid_timer_options;
  * Penwell uses spread spectrum clock, so the freq number is not exactly
  * the same as reported by MSR based on SDM.
  */
-#define PENWELL_FSB_FREQ_83SKU         83200
-#define PENWELL_FSB_FREQ_100SKU        99840
+#define FSB_FREQ_83SKU	83200
+#define FSB_FREQ_100SKU	99840
+#define FSB_FREQ_133SKU	133000
+
+#define FSB_FREQ_167SKU	167000
+#define FSB_FREQ_200SKU	200000
+#define FSB_FREQ_267SKU	267000
+#define FSB_FREQ_333SKU	333000
+#define FSB_FREQ_400SKU	400000
+
+/* Bus Select SoC Fuse value */
+#define BSEL_SOC_FUSE_MASK	0x7
+#define BSEL_SOC_FUSE_001	0x1 /* FSB 133MHz */
+#define BSEL_SOC_FUSE_101	0x5 /* FSB 100MHz */
+#define BSEL_SOC_FUSE_111	0x7 /* FSB 83MHz */
 
 #define SFI_MTMR_MAX_NUM 8
 #define SFI_MRTC_MAX	8
diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c
index 527d6d50643d..40955841bb32 100644
--- a/arch/x86/platform/intel-mid/intel-mid.c
+++ b/arch/x86/platform/intel-mid/intel-mid.c
@@ -60,14 +60,27 @@
 
 enum intel_mid_timer_options intel_mid_timer_options;
 
+/* intel_mid_ops to store sub arch ops */
+struct intel_mid_ops *intel_mid_ops;
+/* getter function for sub arch ops*/
+static void *(*get_intel_mid_ops[])(void) = INTEL_MID_OPS_INIT;
 enum intel_mid_cpu_type __intel_mid_cpu_chip;
 EXPORT_SYMBOL_GPL(__intel_mid_cpu_chip);
 
+static void intel_mid_power_off(void)
+{
+};
+
 static void intel_mid_reboot(void)
 {
 	intel_scu_ipc_simple_command(IPCMSG_COLD_BOOT, 0);
 }
 
+static unsigned long __init intel_mid_calibrate_tsc(void)
+{
+	return 0;
+}
+
 static void __init intel_mid_time_init(void)
 {
 	sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr);
@@ -92,13 +105,33 @@ static void __init intel_mid_time_init(void)
 
 static void intel_mid_arch_setup(void)
 {
-	if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27)
-		__intel_mid_cpu_chip = INTEL_MID_CPU_CHIP_PENWELL;
-	else {
+	if (boot_cpu_data.x86 != 6) {
 		pr_err("Unknown Intel MID CPU (%d:%d), default to Penwell\n",
 			boot_cpu_data.x86, boot_cpu_data.x86_model);
 		__intel_mid_cpu_chip = INTEL_MID_CPU_CHIP_PENWELL;
+		goto out;
+	}
+
+	switch (boot_cpu_data.x86_model) {
+	case 0x35:
+		__intel_mid_cpu_chip = INTEL_MID_CPU_CHIP_CLOVERVIEW;
+		break;
+	case 0x27:
+	default:
+		__intel_mid_cpu_chip = INTEL_MID_CPU_CHIP_PENWELL;
+		break;
 	}
+
+	if (__intel_mid_cpu_chip < MAX_CPU_OPS(get_intel_mid_ops))
+		intel_mid_ops = get_intel_mid_ops[__intel_mid_cpu_chip]();
+	else {
+		intel_mid_ops = get_intel_mid_ops[INTEL_MID_CPU_CHIP_PENWELL]();
+		pr_info("ARCH: Uknown SoC, assuming PENWELL!\n");
+	}
+
+out:
+	if (intel_mid_ops->arch_setup)
+		intel_mid_ops->arch_setup();
 }
 
 /* MID systems don't have i8042 controller */
diff --git a/arch/x86/platform/intel-mid/intel_mid_weak_decls.h b/arch/x86/platform/intel-mid/intel_mid_weak_decls.h
index 519beb75ba4c..9ebce0447edf 100644
--- a/arch/x86/platform/intel-mid/intel_mid_weak_decls.h
+++ b/arch/x86/platform/intel-mid/intel_mid_weak_decls.h
@@ -11,5 +11,8 @@
 
 
 /* __attribute__((weak)) makes these declarations overridable */
-extern void intel_mid_power_off(void) __attribute__((weak));
-extern unsigned long __init intel_mid_calibrate_tsc(void) __attribute__((weak));
+/* For every CPU addition a new get_<cpuname>_ops interface needs
+ * to be added.
+ */
+extern void * __cpuinit get_penwell_ops(void) __attribute__((weak));
+extern void * __cpuinit get_cloverview_ops(void) __attribute__((weak));
diff --git a/arch/x86/platform/intel-mid/mfld.c b/arch/x86/platform/intel-mid/mfld.c
index c7ff83c4576d..4f7884eebc14 100644
--- a/arch/x86/platform/intel-mid/mfld.c
+++ b/arch/x86/platform/intel-mid/mfld.c
@@ -15,11 +15,19 @@
 #include <asm/intel-mid.h>
 #include <asm/intel_mid_vrtc.h>
 
-void intel_mid_power_off(void)
+#include "intel_mid_weak_decls.h"
+
+static void penwell_arch_setup(void);
+/* penwell arch ops */
+static struct intel_mid_ops penwell_ops = {
+	.arch_setup = penwell_arch_setup,
+};
+
+static void mfld_power_off(void)
 {
 }
 
-unsigned long __init intel_mid_calibrate_tsc(void)
+static unsigned long __init mfld_calibrate_tsc(void)
 {
 	unsigned long fast_calibrate;
 	u32 lo, hi, ratio, fsb;
@@ -35,9 +43,9 @@ unsigned long __init intel_mid_calibrate_tsc(void)
 	}
 	rdmsr(MSR_FSB_FREQ, lo, hi);
 	if ((lo & 0x7) == 0x7)
-		fsb = PENWELL_FSB_FREQ_83SKU;
+		fsb = FSB_FREQ_83SKU;
 	else
-		fsb = PENWELL_FSB_FREQ_100SKU;
+		fsb = FSB_FREQ_100SKU;
 	fast_calibrate = ratio * fsb;
 	pr_debug("read penwell tsc %lu khz\n", fast_calibrate);
 	lapic_timer_frequency = fsb * 1000 / HZ;
@@ -49,3 +57,19 @@ unsigned long __init intel_mid_calibrate_tsc(void)
 
 	return 0;
 }
+
+static void __init penwell_arch_setup()
+{
+	x86_platform.calibrate_tsc = mfld_calibrate_tsc;
+	pm_power_off = mfld_power_off;
+}
+
+void * __cpuinit get_penwell_ops()
+{
+	return &penwell_ops;
+}
+
+void * __cpuinit get_cloverview_ops()
+{
+	return &penwell_ops;
+}
-- 
cgit v1.2.3


From bc20aa48bbb3068224a1c91f8332971fdb689fad Mon Sep 17 00:00:00 2001
From: David Cohen <david.a.cohen@linux.intel.com>
Date: Mon, 16 Dec 2013 12:07:38 -0800
Subject: x86, intel-mid: Add Merrifield platform support

This code was partially based on Mark Brown's previous work.

Signed-off-by: David Cohen <david.a.cohen@linux.intel.com>
Link: http://lkml.kernel.org/r/1387224459-25746-4-git-send-email-david.a.cohen@linux.intel.com
Signed-off-by: Fei Yang <fei.yang@intel.com>
Cc: Mark F. Brown <mark.f.brown@intel.com>
Cc: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/include/asm/intel-mid.h                   |   2 +
 arch/x86/pci/intel_mid_pci.c                       |   6 +-
 arch/x86/platform/intel-mid/Makefile               |   2 +-
 arch/x86/platform/intel-mid/intel-mid.c            |   4 +
 arch/x86/platform/intel-mid/intel_mid_weak_decls.h |   1 +
 arch/x86/platform/intel-mid/mrfl.c                 | 103 +++++++++++++++++++++
 arch/x86/platform/intel-mid/sfi.c                  |  34 +++++--
 7 files changed, 144 insertions(+), 8 deletions(-)
 create mode 100644 arch/x86/platform/intel-mid/mrfl.c

(limited to 'arch')

diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h
index f8a831431fe0..e34e097b6f9d 100644
--- a/arch/x86/include/asm/intel-mid.h
+++ b/arch/x86/include/asm/intel-mid.h
@@ -52,6 +52,7 @@ enum intel_mid_cpu_type {
 	/* 1 was Moorestown */
 	INTEL_MID_CPU_CHIP_PENWELL = 2,
 	INTEL_MID_CPU_CHIP_CLOVERVIEW,
+	INTEL_MID_CPU_CHIP_TANGIER,
 };
 
 extern enum intel_mid_cpu_type __intel_mid_cpu_chip;
@@ -82,6 +83,7 @@ struct intel_mid_ops {
 #define INTEL_MID_OPS_INIT {\
 	DECLARE_INTEL_MID_OPS_INIT(penwell, INTEL_MID_CPU_CHIP_PENWELL), \
 	DECLARE_INTEL_MID_OPS_INIT(cloverview, INTEL_MID_CPU_CHIP_CLOVERVIEW), \
+	DECLARE_INTEL_MID_OPS_INIT(tangier, INTEL_MID_CPU_CHIP_TANGIER) \
 };
 
 #ifdef CONFIG_X86_INTEL_MID
diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c
index 51384ca727ad..84b9d672843d 100644
--- a/arch/x86/pci/intel_mid_pci.c
+++ b/arch/x86/pci/intel_mid_pci.c
@@ -31,6 +31,7 @@
 #include <asm/pci_x86.h>
 #include <asm/hw_irq.h>
 #include <asm/io_apic.h>
+#include <asm/intel-mid.h>
 
 #define PCIE_CAP_OFFSET	0x100
 
@@ -219,7 +220,10 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev)
 	irq_attr.ioapic = mp_find_ioapic(dev->irq);
 	irq_attr.ioapic_pin = dev->irq;
 	irq_attr.trigger = 1; /* level */
-	irq_attr.polarity = 1; /* active low */
+	if (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_TANGIER)
+		irq_attr.polarity = 0; /* active high */
+	else
+		irq_attr.polarity = 1; /* active low */
 	io_apic_set_pci_routing(&dev->dev, dev->irq, &irq_attr);
 
 	return 0;
diff --git a/arch/x86/platform/intel-mid/Makefile b/arch/x86/platform/intel-mid/Makefile
index 78a14ba0e0db..0a8ee703b9fa 100644
--- a/arch/x86/platform/intel-mid/Makefile
+++ b/arch/x86/platform/intel-mid/Makefile
@@ -1,4 +1,4 @@
-obj-$(CONFIG_X86_INTEL_MID) += intel-mid.o intel_mid_vrtc.o mfld.o
+obj-$(CONFIG_X86_INTEL_MID) += intel-mid.o intel_mid_vrtc.o mfld.o mrfl.o
 obj-$(CONFIG_EARLY_PRINTK_INTEL_MID) += early_printk_intel_mid.o
 
 # SFI specific code
diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c
index 40955841bb32..1bbedc4b0f88 100644
--- a/arch/x86/platform/intel-mid/intel-mid.c
+++ b/arch/x86/platform/intel-mid/intel-mid.c
@@ -116,6 +116,10 @@ static void intel_mid_arch_setup(void)
 	case 0x35:
 		__intel_mid_cpu_chip = INTEL_MID_CPU_CHIP_CLOVERVIEW;
 		break;
+	case 0x3C:
+	case 0x4A:
+		__intel_mid_cpu_chip = INTEL_MID_CPU_CHIP_TANGIER;
+		break;
 	case 0x27:
 	default:
 		__intel_mid_cpu_chip = INTEL_MID_CPU_CHIP_PENWELL;
diff --git a/arch/x86/platform/intel-mid/intel_mid_weak_decls.h b/arch/x86/platform/intel-mid/intel_mid_weak_decls.h
index 9ebce0447edf..a537ffc16299 100644
--- a/arch/x86/platform/intel-mid/intel_mid_weak_decls.h
+++ b/arch/x86/platform/intel-mid/intel_mid_weak_decls.h
@@ -16,3 +16,4 @@
  */
 extern void * __cpuinit get_penwell_ops(void) __attribute__((weak));
 extern void * __cpuinit get_cloverview_ops(void) __attribute__((weak));
+extern void * __init get_tangier_ops(void) __attribute__((weak));
diff --git a/arch/x86/platform/intel-mid/mrfl.c b/arch/x86/platform/intel-mid/mrfl.c
new file mode 100644
index 000000000000..09d10159e7b7
--- /dev/null
+++ b/arch/x86/platform/intel-mid/mrfl.c
@@ -0,0 +1,103 @@
+/*
+ * mrfl.c: Intel Merrifield platform specific setup code
+ *
+ * (C) Copyright 2013 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/init.h>
+
+#include <asm/apic.h>
+#include <asm/intel-mid.h>
+
+#include "intel_mid_weak_decls.h"
+
+static unsigned long __init tangier_calibrate_tsc(void)
+{
+	unsigned long fast_calibrate;
+	u32 lo, hi, ratio, fsb, bus_freq;
+
+	/* *********************** */
+	/* Compute TSC:Ratio * FSB */
+	/* *********************** */
+
+	/* Compute Ratio */
+	rdmsr(MSR_PLATFORM_INFO, lo, hi);
+	pr_debug("IA32 PLATFORM_INFO is 0x%x : %x\n", hi, lo);
+
+	ratio = (lo >> 8) & 0xFF;
+	pr_debug("ratio is %d\n", ratio);
+	if (!ratio) {
+		pr_err("Read a zero ratio, force tsc ratio to 4 ...\n");
+		ratio = 4;
+	}
+
+	/* Compute FSB */
+	rdmsr(MSR_FSB_FREQ, lo, hi);
+	pr_debug("Actual FSB frequency detected by SOC 0x%x : %x\n",
+			hi, lo);
+
+	bus_freq = lo & 0x7;
+	pr_debug("bus_freq = 0x%x\n", bus_freq);
+
+	if (bus_freq == 0)
+		fsb = FSB_FREQ_100SKU;
+	else if (bus_freq == 1)
+		fsb = FSB_FREQ_100SKU;
+	else if (bus_freq == 2)
+		fsb = FSB_FREQ_133SKU;
+	else if (bus_freq == 3)
+		fsb = FSB_FREQ_167SKU;
+	else if (bus_freq == 4)
+		fsb = FSB_FREQ_83SKU;
+	else if (bus_freq == 5)
+		fsb = FSB_FREQ_400SKU;
+	else if (bus_freq == 6)
+		fsb = FSB_FREQ_267SKU;
+	else if (bus_freq == 7)
+		fsb = FSB_FREQ_333SKU;
+	else {
+		BUG();
+		pr_err("Invalid bus_freq! Setting to minimal value!\n");
+		fsb = FSB_FREQ_100SKU;
+	}
+
+	/* TSC = FSB Freq * Resolved HFM Ratio */
+	fast_calibrate = ratio * fsb;
+	pr_debug("calculate tangier tsc %lu KHz\n", fast_calibrate);
+
+	/* ************************************ */
+	/* Calculate Local APIC Timer Frequency */
+	/* ************************************ */
+	lapic_timer_frequency = (fsb * 1000) / HZ;
+
+	pr_debug("Setting lapic_timer_frequency = %d\n",
+			lapic_timer_frequency);
+
+	/* mark tsc clocksource as reliable */
+	set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE);
+
+	if (fast_calibrate)
+		return fast_calibrate;
+
+	return 0;
+}
+
+static void __init tangier_arch_setup(void)
+{
+	x86_platform.calibrate_tsc = tangier_calibrate_tsc;
+}
+
+/* tangier arch ops */
+static struct intel_mid_ops tangier_ops = {
+	.arch_setup = tangier_arch_setup,
+};
+
+void * __cpuinit get_tangier_ops()
+{
+	return &tangier_ops;
+}
diff --git a/arch/x86/platform/intel-mid/sfi.c b/arch/x86/platform/intel-mid/sfi.c
index c84c1ca396bf..80a52288555c 100644
--- a/arch/x86/platform/intel-mid/sfi.c
+++ b/arch/x86/platform/intel-mid/sfi.c
@@ -443,13 +443,35 @@ static int __init sfi_parse_devs(struct sfi_table_header *table)
 			 * so we have to enable them one by one here
 			 */
 			ioapic = mp_find_ioapic(irq);
-			irq_attr.ioapic = ioapic;
-			irq_attr.ioapic_pin = irq;
-			irq_attr.trigger = 1;
-			irq_attr.polarity = 1;
-			io_apic_set_pci_routing(NULL, irq, &irq_attr);
-		} else
+			if (ioapic >= 0) {
+				irq_attr.ioapic = ioapic;
+				irq_attr.ioapic_pin = irq;
+				irq_attr.trigger = 1;
+				if (intel_mid_identify_cpu() ==
+						INTEL_MID_CPU_CHIP_TANGIER) {
+					if (!strncmp(pentry->name,
+							"r69001-ts-i2c", 13))
+						/* active low */
+						irq_attr.polarity = 1;
+					else if (!strncmp(pentry->name,
+							"synaptics_3202", 14))
+						/* active low */
+						irq_attr.polarity = 1;
+					else if (irq == 41)
+						/* fast_int_1 */
+						irq_attr.polarity = 1;
+					else
+						/* active high */
+						irq_attr.polarity = 0;
+				} else {
+					/* PNW and CLV go with active low */
+					irq_attr.polarity = 1;
+				}
+				io_apic_set_pci_routing(NULL, irq, &irq_attr);
+			}
+		} else {
 			irq = 0; /* No irq */
+		}
 
 		dev = get_device_id(pentry->type, pentry->name);
 
-- 
cgit v1.2.3


From 4cb9b00f42e07830310319a07e6c91413ee8153e Mon Sep 17 00:00:00 2001
From: David Cohen <david.a.cohen@linux.intel.com>
Date: Mon, 16 Dec 2013 17:37:26 -0800
Subject: x86, intel-mid: Remove deprecated X86_MDFLD and X86_WANT_INTEL_MID
 configs

We want to support all Intel MID (Mobile Internet Device) platforms
with a single config selection. This patch removes deprecated
CONFIG_X86_MDFLD and X86_WANT_INTEL_MID options in favor of having
CONFIG_X86_INTEL_MID only.

Signed-off-by: David Cohen <david.a.cohen@linux.intel.com>
Link: http://lkml.kernel.org/r/1387244246-20714-1-git-send-email-david.a.cohen@linux.intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/Kconfig | 30 +++++++-----------------------
 1 file changed, 7 insertions(+), 23 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0952ecd60eca..edcf80eda566 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -438,42 +438,26 @@ config X86_INTEL_CE
 	  This option compiles in support for the CE4100 SOC for settop
 	  boxes and media devices.
 
-config X86_WANT_INTEL_MID
+config X86_INTEL_MID
 	bool "Intel MID platform support"
 	depends on X86_32
 	depends on X86_EXTENDED_PLATFORM
-	---help---
-	  Select to build a kernel capable of supporting Intel MID platform
-	  systems which do not have the PCI legacy interfaces (Moorestown,
-	  Medfield). If you are building for a PC class system say N here.
-
-if X86_WANT_INTEL_MID
-
-config X86_INTEL_MID
-	bool
-
-config X86_MDFLD
-       bool "Medfield MID platform"
 	depends on PCI
 	depends on PCI_GOANY
 	depends on X86_IO_APIC
-	select X86_INTEL_MID
 	select SFI
+	select I2C
 	select DW_APB_TIMER
 	select APB_TIMER
-	select I2C
-	select SPI
 	select INTEL_SCU_IPC
-	select X86_PLATFORM_DEVICES
 	select MFD_INTEL_MSIC
 	---help---
-	  Medfield is Intel's Low Power Intel Architecture (LPIA) based Moblin
-	  Internet Device(MID) platform. 
-	  Unlike standard x86 PCs, Medfield does not have many legacy devices
-	  nor standard legacy replacement devices/features. e.g. Medfield does
-	  not contain i8259, i8254, HPET, legacy BIOS, most of the io ports.
+	  Select to build a kernel capable of supporting Intel MID (Mobile
+	  Internet Device) platform systems which do not have the PCI legacy
+	  interfaces. If you are building for a PC class system say N here.
 
-endif
+	  Intel MID platforms are based on an Intel processor and chipset which
+	  consume less power than most of the x86 derivatives.
 
 config X86_INTEL_LPSS
 	bool "Intel Low Power Subsystem Support"
-- 
cgit v1.2.3


From aee636c4809fa54848ff07a899b326eb1f9987a2 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 15 Jan 2014 06:50:07 -0800
Subject: bpf: do not use reciprocal divide

At first Jakub Zawadzki noticed that some divisions by reciprocal_divide
were not correct. (off by one in some cases)
http://www.wireshark.org/~darkjames/reciprocal-buggy.c

He could also show this with BPF:
http://www.wireshark.org/~darkjames/set-and-dump-filter-k-bug.c

The reciprocal divide in linux kernel is not generic enough,
lets remove its use in BPF, as it is not worth the pain with
current cpus.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Jakub Zawadzki <darkjames-ws@darkjames.pl>
Cc: Mircea Gherzan <mgherzan@gmail.com>
Cc: Daniel Borkmann <dxchgb@gmail.com>
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Cc: Matt Evans <matt@ozlabs.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm/net/bpf_jit_32.c       |  6 +++---
 arch/powerpc/net/bpf_jit_comp.c |  7 ++++---
 arch/s390/net/bpf_jit_comp.c    | 17 ++++++++++++-----
 arch/sparc/net/bpf_jit_comp.c   | 17 ++++++++++++++---
 arch/x86/net/bpf_jit_comp.c     | 14 ++++++++++----
 net/core/filter.c               | 30 ++----------------------------
 6 files changed, 45 insertions(+), 46 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index 9ed155ad0f97..271b5e971568 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -641,10 +641,10 @@ load_ind:
 			emit(ARM_MUL(r_A, r_A, r_X), ctx);
 			break;
 		case BPF_S_ALU_DIV_K:
-			/* current k == reciprocal_value(userspace k) */
+			if (k == 1)
+				break;
 			emit_mov_i(r_scratch, k, ctx);
-			/* A = top 32 bits of the product */
-			emit(ARM_UMULL(r_scratch, r_A, r_A, r_scratch), ctx);
+			emit_udiv(r_A, r_A, r_scratch, ctx);
 			break;
 		case BPF_S_ALU_DIV_X:
 			update_on_xread(ctx);
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index ac3c2a10dafd..555034f8505e 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -223,10 +223,11 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 			}
 			PPC_DIVWU(r_A, r_A, r_X);
 			break;
-		case BPF_S_ALU_DIV_K: /* A = reciprocal_divide(A, K); */
+		case BPF_S_ALU_DIV_K: /* A /= K */
+			if (K == 1)
+				break;
 			PPC_LI32(r_scratch1, K);
-			/* Top 32 bits of 64bit result -> A */
-			PPC_MULHWU(r_A, r_A, r_scratch1);
+			PPC_DIVWU(r_A, r_A, r_scratch1);
 			break;
 		case BPF_S_ALU_AND_X:
 			ctx->seen |= SEEN_XREG;
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 16871da37371..fc0fa77728e1 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -371,11 +371,13 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter,
 		/* dr %r4,%r12 */
 		EMIT2(0x1d4c);
 		break;
-	case BPF_S_ALU_DIV_K: /* A = reciprocal_divide(A, K) */
-		/* m %r4,<d(K)>(%r13) */
-		EMIT4_DISP(0x5c40d000, EMIT_CONST(K));
-		/* lr %r5,%r4 */
-		EMIT2(0x1854);
+	case BPF_S_ALU_DIV_K: /* A /= K */
+		if (K == 1)
+			break;
+		/* lhi %r4,0 */
+		EMIT4(0xa7480000);
+		/* d %r4,<d(K)>(%r13) */
+		EMIT4_DISP(0x5d40d000, EMIT_CONST(K));
 		break;
 	case BPF_S_ALU_MOD_X: /* A %= X */
 		jit->seen |= SEEN_XREG | SEEN_RET0;
@@ -391,6 +393,11 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter,
 		EMIT2(0x1854);
 		break;
 	case BPF_S_ALU_MOD_K: /* A %= K */
+		if (K == 1) {
+			/* lhi %r5,0 */
+			EMIT4(0xa7580000);
+			break;
+		}
 		/* lhi %r4,0 */
 		EMIT4(0xa7480000);
 		/* d %r4,<d(K)>(%r13) */
diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c
index 218b6b23c378..01fe9946d388 100644
--- a/arch/sparc/net/bpf_jit_comp.c
+++ b/arch/sparc/net/bpf_jit_comp.c
@@ -497,9 +497,20 @@ void bpf_jit_compile(struct sk_filter *fp)
 			case BPF_S_ALU_MUL_K:	/* A *= K */
 				emit_alu_K(MUL, K);
 				break;
-			case BPF_S_ALU_DIV_K:	/* A /= K */
-				emit_alu_K(MUL, K);
-				emit_read_y(r_A);
+			case BPF_S_ALU_DIV_K:	/* A /= K with K != 0*/
+				if (K == 1)
+					break;
+				emit_write_y(G0);
+#ifdef CONFIG_SPARC32
+				/* The Sparc v8 architecture requires
+				 * three instructions between a %y
+				 * register write and the first use.
+				 */
+				emit_nop();
+				emit_nop();
+				emit_nop();
+#endif
+				emit_alu_K(DIV, K);
 				break;
 			case BPF_S_ALU_DIV_X:	/* A /= X; */
 				emit_cmpi(r_X, 0);
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 26328e800869..4ed75dd81d05 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -359,15 +359,21 @@ void bpf_jit_compile(struct sk_filter *fp)
 				EMIT2(0x89, 0xd0);	/* mov %edx,%eax */
 				break;
 			case BPF_S_ALU_MOD_K: /* A %= K; */
+				if (K == 1) {
+					CLEAR_A();
+					break;
+				}
 				EMIT2(0x31, 0xd2);	/* xor %edx,%edx */
 				EMIT1(0xb9);EMIT(K, 4);	/* mov imm32,%ecx */
 				EMIT2(0xf7, 0xf1);	/* div %ecx */
 				EMIT2(0x89, 0xd0);	/* mov %edx,%eax */
 				break;
-			case BPF_S_ALU_DIV_K: /* A = reciprocal_divide(A, K); */
-				EMIT3(0x48, 0x69, 0xc0); /* imul imm32,%rax,%rax */
-				EMIT(K, 4);
-				EMIT4(0x48, 0xc1, 0xe8, 0x20); /* shr $0x20,%rax */
+			case BPF_S_ALU_DIV_K: /* A /= K */
+				if (K == 1)
+					break;
+				EMIT2(0x31, 0xd2);	/* xor %edx,%edx */
+				EMIT1(0xb9);EMIT(K, 4);	/* mov imm32,%ecx */
+				EMIT2(0xf7, 0xf1);	/* div %ecx */
 				break;
 			case BPF_S_ALU_AND_X:
 				seen |= SEEN_XREG;
diff --git a/net/core/filter.c b/net/core/filter.c
index 01b780856db2..ad30d626a5bd 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -36,7 +36,6 @@
 #include <asm/uaccess.h>
 #include <asm/unaligned.h>
 #include <linux/filter.h>
-#include <linux/reciprocal_div.h>
 #include <linux/ratelimit.h>
 #include <linux/seccomp.h>
 #include <linux/if_vlan.h>
@@ -166,7 +165,7 @@ unsigned int sk_run_filter(const struct sk_buff *skb,
 			A /= X;
 			continue;
 		case BPF_S_ALU_DIV_K:
-			A = reciprocal_divide(A, K);
+			A /= K;
 			continue;
 		case BPF_S_ALU_MOD_X:
 			if (X == 0)
@@ -553,11 +552,6 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
 		/* Some instructions need special checks */
 		switch (code) {
 		case BPF_S_ALU_DIV_K:
-			/* check for division by zero */
-			if (ftest->k == 0)
-				return -EINVAL;
-			ftest->k = reciprocal_value(ftest->k);
-			break;
 		case BPF_S_ALU_MOD_K:
 			/* check for division by zero */
 			if (ftest->k == 0)
@@ -853,27 +847,7 @@ void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to)
 	to->code = decodes[code];
 	to->jt = filt->jt;
 	to->jf = filt->jf;
-
-	if (code == BPF_S_ALU_DIV_K) {
-		/*
-		 * When loaded this rule user gave us X, which was
-		 * translated into R = r(X). Now we calculate the
-		 * RR = r(R) and report it back. If next time this
-		 * value is loaded and RRR = r(RR) is calculated
-		 * then the R == RRR will be true.
-		 *
-		 * One exception. X == 1 translates into R == 0 and
-		 * we can't calculate RR out of it with r().
-		 */
-
-		if (filt->k == 0)
-			to->k = 1;
-		else
-			to->k = reciprocal_value(filt->k);
-
-		BUG_ON(reciprocal_value(to->k) != filt->k);
-	} else
-		to->k = filt->k;
+	to->k = filt->k;
 }
 
 int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, unsigned int len)
-- 
cgit v1.2.3


From da6139e49c7cb0f4251265cb5243b8d220adb48d Mon Sep 17 00:00:00 2001
From: Prarit Bhargava <prarit@redhat.com>
Date: Mon, 13 Jan 2014 06:51:01 -0500
Subject: x86: Add check for number of available vectors before CPU down

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=64791

When a cpu is downed on a system, the irqs on the cpu are assigned to
other cpus.  It is possible, however, that when a cpu is downed there
aren't enough free vectors on the remaining cpus to account for the
vectors from the cpu that is being downed.

This results in an interesting "overflow" condition where irqs are
"assigned" to a CPU but are not handled.

For example, when downing cpus on a 1-64 logical processor system:

<snip>
[  232.021745] smpboot: CPU 61 is now offline
[  238.480275] smpboot: CPU 62 is now offline
[  245.991080] ------------[ cut here ]------------
[  245.996270] WARNING: CPU: 0 PID: 0 at net/sched/sch_generic.c:264 dev_watchdog+0x246/0x250()
[  246.005688] NETDEV WATCHDOG: p786p1 (ixgbe): transmit queue 0 timed out
[  246.013070] Modules linked in: lockd sunrpc iTCO_wdt iTCO_vendor_support sb_edac ixgbe microcode e1000e pcspkr joydev edac_core lpc_ich ioatdma ptp mdio mfd_core i2c_i801 dca pps_core i2c_core wmi acpi_cpufreq isci libsas scsi_transport_sas
[  246.037633] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 3.12.0+ #14
[  246.044451] Hardware name: Intel Corporation S4600LH ........../SVRBD-ROW_T, BIOS SE5C600.86B.01.08.0003.022620131521 02/26/2013
[  246.057371]  0000000000000009 ffff88081fa03d40 ffffffff8164fbf6 ffff88081fa0ee48
[  246.065728]  ffff88081fa03d90 ffff88081fa03d80 ffffffff81054ecc ffff88081fa13040
[  246.074073]  0000000000000000 ffff88200cce0000 0000000000000040 0000000000000000
[  246.082430] Call Trace:
[  246.085174]  <IRQ>  [<ffffffff8164fbf6>] dump_stack+0x46/0x58
[  246.091633]  [<ffffffff81054ecc>] warn_slowpath_common+0x8c/0xc0
[  246.098352]  [<ffffffff81054fb6>] warn_slowpath_fmt+0x46/0x50
[  246.104786]  [<ffffffff815710d6>] dev_watchdog+0x246/0x250
[  246.110923]  [<ffffffff81570e90>] ? dev_deactivate_queue.constprop.31+0x80/0x80
[  246.119097]  [<ffffffff8106092a>] call_timer_fn+0x3a/0x110
[  246.125224]  [<ffffffff8106280f>] ? update_process_times+0x6f/0x80
[  246.132137]  [<ffffffff81570e90>] ? dev_deactivate_queue.constprop.31+0x80/0x80
[  246.140308]  [<ffffffff81061db0>] run_timer_softirq+0x1f0/0x2a0
[  246.146933]  [<ffffffff81059a80>] __do_softirq+0xe0/0x220
[  246.152976]  [<ffffffff8165fedc>] call_softirq+0x1c/0x30
[  246.158920]  [<ffffffff810045f5>] do_softirq+0x55/0x90
[  246.164670]  [<ffffffff81059d35>] irq_exit+0xa5/0xb0
[  246.170227]  [<ffffffff8166062a>] smp_apic_timer_interrupt+0x4a/0x60
[  246.177324]  [<ffffffff8165f40a>] apic_timer_interrupt+0x6a/0x70
[  246.184041]  <EOI>  [<ffffffff81505a1b>] ? cpuidle_enter_state+0x5b/0xe0
[  246.191559]  [<ffffffff81505a17>] ? cpuidle_enter_state+0x57/0xe0
[  246.198374]  [<ffffffff81505b5d>] cpuidle_idle_call+0xbd/0x200
[  246.204900]  [<ffffffff8100b7ae>] arch_cpu_idle+0xe/0x30
[  246.210846]  [<ffffffff810a47b0>] cpu_startup_entry+0xd0/0x250
[  246.217371]  [<ffffffff81646b47>] rest_init+0x77/0x80
[  246.223028]  [<ffffffff81d09e8e>] start_kernel+0x3ee/0x3fb
[  246.229165]  [<ffffffff81d0989f>] ? repair_env_string+0x5e/0x5e
[  246.235787]  [<ffffffff81d095a5>] x86_64_start_reservations+0x2a/0x2c
[  246.242990]  [<ffffffff81d0969f>] x86_64_start_kernel+0xf8/0xfc
[  246.249610] ---[ end trace fb74fdef54d79039 ]---
[  246.254807] ixgbe 0000:c2:00.0 p786p1: initiating reset due to tx timeout
[  246.262489] ixgbe 0000:c2:00.0 p786p1: Reset adapter
Last login: Mon Nov 11 08:35:14 from 10.18.17.119
[root@(none) ~]# [  246.792676] ixgbe 0000:c2:00.0 p786p1: detected SFP+: 5
[  249.231598] ixgbe 0000:c2:00.0 p786p1: NIC Link is Up 10 Gbps, Flow Control: RX/TX
[  246.792676] ixgbe 0000:c2:00.0 p786p1: detected SFP+: 5
[  249.231598] ixgbe 0000:c2:00.0 p786p1: NIC Link is Up 10 Gbps, Flow Control: RX/TX

(last lines keep repeating.  ixgbe driver is dead until module reload.)

If the downed cpu has more vectors than are free on the remaining cpus on the
system, it is possible that some vectors are "orphaned" even though they are
assigned to a cpu.  In this case, since the ixgbe driver had a watchdog, the
watchdog fired and notified that something was wrong.

This patch adds a function, check_vectors(), to compare the number of vectors
on the CPU going down and compares it to the number of vectors available on
the system.  If there aren't enough vectors for the CPU to go down, an
error is returned and propogated back to userspace.

v2: Do not need to look at percpu irqs
v3: Need to check affinity to prevent counting of MSIs in IOAPIC Lowest
    Priority Mode
v4: Additional changes suggested by Gong Chen.
v5/v6/v7/v8: Updated comment text

Signed-off-by: Prarit Bhargava <prarit@redhat.com>
Link: http://lkml.kernel.org/r/1389613861-3853-1-git-send-email-prarit@redhat.com
Reviewed-by: Gong Chen <gong.chen@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Michel Lespinasse <walken@google.com>
Cc: Seiji Aguchi <seiji.aguchi@hds.com>
Cc: Yang Zhang <yang.z.zhang@Intel.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Janet Morgan <janet.morgan@intel.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Ruiv Wang <ruiv.wang@gmail.com>
Cc: Gong Chen <gong.chen@linux.intel.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Cc: <stable@vger.kernel.org>
---
 arch/x86/include/asm/irq.h |  1 +
 arch/x86/kernel/irq.c      | 70 ++++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/smpboot.c  |  6 ++++
 3 files changed, 77 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index 0ea10f27d613..cb6cfcd034cf 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -25,6 +25,7 @@ extern void irq_ctx_init(int cpu);
 
 #ifdef CONFIG_HOTPLUG_CPU
 #include <linux/cpumask.h>
+extern int check_irq_vectors_for_cpu_disable(void);
 extern void fixup_irqs(void);
 extern void irq_force_complete_move(int);
 #endif
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 22d0687e7fda..4207e8d1a094 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -262,6 +262,76 @@ __visible void smp_trace_x86_platform_ipi(struct pt_regs *regs)
 EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
 
 #ifdef CONFIG_HOTPLUG_CPU
+/*
+ * This cpu is going to be removed and its vectors migrated to the remaining
+ * online cpus.  Check to see if there are enough vectors in the remaining cpus.
+ * This function is protected by stop_machine().
+ */
+int check_irq_vectors_for_cpu_disable(void)
+{
+	int irq, cpu;
+	unsigned int this_cpu, vector, this_count, count;
+	struct irq_desc *desc;
+	struct irq_data *data;
+	struct cpumask affinity_new, online_new;
+
+	this_cpu = smp_processor_id();
+	cpumask_copy(&online_new, cpu_online_mask);
+	cpu_clear(this_cpu, online_new);
+
+	this_count = 0;
+	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
+		irq = __this_cpu_read(vector_irq[vector]);
+		if (irq >= 0) {
+			desc = irq_to_desc(irq);
+			data = irq_desc_get_irq_data(desc);
+			cpumask_copy(&affinity_new, data->affinity);
+			cpu_clear(this_cpu, affinity_new);
+
+			/* Do not count inactive or per-cpu irqs. */
+			if (!irq_has_action(irq) || irqd_is_per_cpu(data))
+				continue;
+
+			/*
+			 * A single irq may be mapped to multiple
+			 * cpu's vector_irq[] (for example IOAPIC cluster
+			 * mode).  In this case we have two
+			 * possibilities:
+			 *
+			 * 1) the resulting affinity mask is empty; that is
+			 * this the down'd cpu is the last cpu in the irq's
+			 * affinity mask, or
+			 *
+			 * 2) the resulting affinity mask is no longer
+			 * a subset of the online cpus but the affinity
+			 * mask is not zero; that is the down'd cpu is the
+			 * last online cpu in a user set affinity mask.
+			 */
+			if (cpumask_empty(&affinity_new) ||
+			    !cpumask_subset(&affinity_new, &online_new))
+				this_count++;
+		}
+	}
+
+	count = 0;
+	for_each_online_cpu(cpu) {
+		if (cpu == this_cpu)
+			continue;
+		for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
+		     vector++) {
+			if (per_cpu(vector_irq, cpu)[vector] < 0)
+				count++;
+		}
+	}
+
+	if (count < this_count) {
+		pr_warn("CPU %d disable failed: CPU has %u vectors assigned and there are only %u available.\n",
+			this_cpu, this_count, count);
+		return -ERANGE;
+	}
+	return 0;
+}
+
 /* A cpu has been removed from cpu_online_mask.  Reset irq affinities. */
 void fixup_irqs(void)
 {
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 85dc05a3aa02..391ea529dc26 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1312,6 +1312,12 @@ void cpu_disable_common(void)
 
 int native_cpu_disable(void)
 {
+	int ret;
+
+	ret = check_irq_vectors_for_cpu_disable();
+	if (ret)
+		return ret;
+
 	clear_local_APIC();
 
 	cpu_disable_common();
-- 
cgit v1.2.3


From 7da7c1561366ba8adb7275464ab44e84e1faa7e0 Mon Sep 17 00:00:00 2001
From: Bin Gao <bin.gao@intel.com>
Date: Mon, 21 Oct 2013 09:16:33 -0700
Subject: x86, tsc: Add static (MSR) TSC calibration on Intel Atom SoCs

On SoCs that have the calibration MSRs available, either there is no
PIT, HPET or PMTIMER to calibrate against, or the PIT/HPET/PMTIMER is
driven from the same clock as the TSC, so calibration is redundant and
just slows down the boot.

TSC rate is caculated by this formula:
<maximum core-clock to bus-clock ratio> * <maximum resolved frequency>
The ratio and the resolved frequency ID can be obtained from MSR.
See Intel 64 and IA-32 System Programming Guid section 16.12 and 30.11.5
for details.

Signed-off-by: Bin Gao <bin.gao@intel.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Link: http://lkml.kernel.org/n/tip-rgm7xmg7k6qnjlw3ynkcjsmh@git.kernel.org
---
 arch/x86/include/asm/tsc.h |   3 ++
 arch/x86/kernel/Makefile   |   2 +-
 arch/x86/kernel/tsc.c      |  10 ++++
 arch/x86/kernel/tsc_msr.c  | 125 +++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 139 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/kernel/tsc_msr.c

(limited to 'arch')

diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 235be70d5bb4..57ae63cd6ee2 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -65,4 +65,7 @@ extern int notsc_setup(char *);
 extern void tsc_save_sched_clock_state(void);
 extern void tsc_restore_sched_clock_state(void);
 
+/* MSR based TSC calibration for Intel Atom SoC platforms */
+int try_msr_calibrate_tsc(unsigned long *fast_calibrate);
+
 #endif /* _ASM_X86_TSC_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index dbe9bd65ab7b..6dbbb1e05d64 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -32,7 +32,7 @@ obj-$(CONFIG_X86_64)	+= vsyscall_emu_64.o
 obj-y			+= bootflag.o e820.o
 obj-y			+= pci-dma.o quirks.o topology.o kdebugfs.o
 obj-y			+= alternative.o i8253.o pci-nommu.o hw_breakpoint.o
-obj-y			+= tsc.o io_delay.o rtc.o
+obj-y			+= tsc.o tsc_msr.o io_delay.o rtc.o
 obj-y			+= pci-iommu_table.o
 obj-y			+= resource.o
 
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 930e5d48f560..e5747167da83 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -419,6 +419,16 @@ unsigned long native_calibrate_tsc(void)
 	unsigned long flags, latch, ms, fast_calibrate;
 	int hpet = is_hpet_enabled(), i, loopmin;
 
+	/* Calibrate TSC using MSR for Intel Atom SoCs */
+	local_irq_save(flags);
+	i = try_msr_calibrate_tsc(&fast_calibrate);
+	local_irq_restore(flags);
+	if (i >= 0) {
+		if (i == 0)
+			pr_warn("Fast TSC calibration using MSR failed\n");
+		return fast_calibrate;
+	}
+
 	local_irq_save(flags);
 	fast_calibrate = quick_pit_calibrate();
 	local_irq_restore(flags);
diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c
new file mode 100644
index 000000000000..c5027724e307
--- /dev/null
+++ b/arch/x86/kernel/tsc_msr.c
@@ -0,0 +1,125 @@
+/*
+ * tsc_msr.c - MSR based TSC calibration on Intel Atom SoC platforms.
+ *
+ * TSC in Intel Atom SoC runs at a constant rate which can be figured
+ * by this formula:
+ * <maximum core-clock to bus-clock ratio> * <maximum resolved frequency>
+ * See Intel 64 and IA-32 System Programming Guid section 16.12 and 30.11.5
+ * for details.
+ * Especially some Intel Atom SoCs don't have PIT(i8254) or HPET, so MSR
+ * based calibration is the only option.
+ *
+ *
+ * Copyright (C) 2013 Intel Corporation
+ * Author: Bin Gao <bin.gao@intel.com>
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/kernel.h>
+#include <asm/processor.h>
+#include <asm/setup.h>
+#include <asm/apic.h>
+#include <asm/param.h>
+
+/* CPU reference clock frequency: in KHz */
+#define FREQ_83		83200
+#define FREQ_100	99840
+#define FREQ_133	133200
+#define FREQ_166	166400
+
+#define MAX_NUM_FREQS	8
+
+/*
+ * According to Intel 64 and IA-32 System Programming Guide,
+ * if MSR_PERF_STAT[31] is set, the maximum resolved bus ratio can be
+ * read in MSR_PLATFORM_ID[12:8], otherwise in MSR_PERF_STAT[44:40].
+ * Unfortunately some Intel Atom SoCs aren't quite compliant to this,
+ * so we need manually differentiate SoC families. This is what the
+ * field msr_plat does.
+ */
+struct freq_desc {
+	u8 x86_family;	/* CPU family */
+	u8 x86_model;	/* model */
+	u8 msr_plat;	/* 1: use MSR_PLATFORM_INFO, 0: MSR_IA32_PERF_STATUS */
+	u32 freqs[MAX_NUM_FREQS];
+};
+
+static struct freq_desc freq_desc_tables[] = {
+	/* PNW */
+	{ 6, 0x27, 0, { 0, 0, 0, 0, 0, FREQ_100, 0, FREQ_83 } },
+	/* CLV+ */
+	{ 6, 0x35, 0, { 0, FREQ_133, 0, 0, 0, FREQ_100, 0, FREQ_83 } },
+	/* TNG */
+	{ 6, 0x4a, 1, { 0, FREQ_100, FREQ_133, 0, 0, 0, 0, 0 } },
+	/* VLV2 */
+	{ 6, 0x37, 1, { 0, FREQ_100, FREQ_133, FREQ_166, 0, 0, 0, 0 } },
+	/* ANN */
+	{ 6, 0x5a, 1, { FREQ_83, FREQ_100, FREQ_133, FREQ_100, 0, 0, 0, 0 } },
+};
+
+static int match_cpu(u8 family, u8 model)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(freq_desc_tables); i++) {
+		if ((family == freq_desc_tables[i].x86_family) &&
+			(model == freq_desc_tables[i].x86_model))
+			return i;
+	}
+
+	return -1;
+}
+
+/* Map CPU reference clock freq ID(0-7) to CPU reference clock freq(KHz) */
+#define id_to_freq(cpu_index, freq_id) \
+	(freq_desc_tables[cpu_index].freqs[freq_id])
+
+/*
+ * Do MSR calibration only for known/supported CPUs.
+ * Return values:
+ * -1: CPU is unknown/unsupported for MSR based calibration
+ *  0: CPU is known/supported, but calibration failed
+ *  1: CPU is known/supported, and calibration succeeded
+ */
+int try_msr_calibrate_tsc(unsigned long *fast_calibrate)
+{
+	int cpu_index;
+	u32 lo, hi, ratio, freq_id, freq;
+
+	cpu_index = match_cpu(boot_cpu_data.x86, boot_cpu_data.x86_model);
+	if (cpu_index < 0)
+		return -1;
+
+	*fast_calibrate = 0;
+
+	if (freq_desc_tables[cpu_index].msr_plat) {
+		rdmsr(MSR_PLATFORM_INFO, lo, hi);
+		ratio = (lo >> 8) & 0x1f;
+	} else {
+		rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
+		ratio = (hi >> 8) & 0x1f;
+	}
+	pr_info("Maximum core-clock to bus-clock ratio: 0x%x\n", ratio);
+
+	if (!ratio)
+		return 0;
+
+	/* Get FSB FREQ ID */
+	rdmsr(MSR_FSB_FREQ, lo, hi);
+	freq_id = lo & 0x7;
+	freq = id_to_freq(cpu_index, freq_id);
+	pr_info("Resolved frequency ID: %u, frequency: %u KHz\n",
+				freq_id, freq);
+	if (!freq)
+		return 0;
+
+	/* TSC frequency = maximum resolved freq * maximum resolved bus ratio */
+	*fast_calibrate = freq * ratio;
+	pr_info("TSC runs at %lu KHz\n", *fast_calibrate);
+
+	lapic_timer_frequency = (freq * 1000) / HZ;
+	pr_info("lapic_timer_frequency = %d\n", lapic_timer_frequency);
+
+	return 1;
+}
-- 
cgit v1.2.3


From c026b3591e4f2a4993df773183704bb31634e0bd Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 10 Jan 2014 21:06:03 +0100
Subject: x86, mm, perf: Allow recursive faults from interrupts

Waiman managed to trigger a PMI while in a emulate_vsyscall() fault,
the PMI in turn managed to trigger a fault while obtaining a stack
trace. This triggered the sig_on_uaccess_error recursive fault logic
and killed the process dead.

Fix this by explicitly excluding interrupts from the recursive fault
logic.

Reported-and-Tested-by: Waiman Long <waiman.long@hp.com>
Fixes: e00b12e64be9 ("perf/x86: Further optimize copy_from_user_nmi()")
Cc: Aswin Chandramouleeswaran <aswin@hp.com>
Cc: Scott J Norton <scott.norton@hp.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20140110200603.GJ7572@laptop.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/fault.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 9ff85bb8dd69..9d591c895803 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -641,6 +641,20 @@ no_context(struct pt_regs *regs, unsigned long error_code,
 
 	/* Are we prepared to handle this kernel fault? */
 	if (fixup_exception(regs)) {
+		/*
+		 * Any interrupt that takes a fault gets the fixup. This makes
+		 * the below recursive fault logic only apply to a faults from
+		 * task context.
+		 */
+		if (in_interrupt())
+			return;
+
+		/*
+		 * Per the above we're !in_interrupt(), aka. task context.
+		 *
+		 * In this case we need to make sure we're not recursively
+		 * faulting through the emulate_vsyscall() logic.
+		 */
 		if (current_thread_info()->sig_on_uaccess_error && signal) {
 			tsk->thread.trap_nr = X86_TRAP_PF;
 			tsk->thread.error_code = error_code | PF_USER;
@@ -649,6 +663,10 @@ no_context(struct pt_regs *regs, unsigned long error_code,
 			/* XXX: hwpoison faults will set the wrong code. */
 			force_sig_info_fault(signal, si_code, address, tsk, 0);
 		}
+
+		/*
+		 * Barring that, we can do the fixup and be happy.
+		 */
 		return;
 	}
 
-- 
cgit v1.2.3


From bee09ed91cacdbffdbcd3b05de8409c77ec9fcd6 Mon Sep 17 00:00:00 2001
From: Robert Richter <rric@kernel.org>
Date: Wed, 15 Jan 2014 15:57:29 +0100
Subject: perf/x86/amd/ibs: Fix waking up from S3 for AMD family 10h

On AMD family 10h we see following error messages while waking up from
S3 for all non-boot CPUs leading to a failed IBS initialization:

 Enabling non-boot CPUs ...
 smpboot: Booting Node 0 Processor 1 APIC 0x1
 [Firmware Bug]: cpu 1, try to use APIC500 (LVT offset 0) for vector 0x400, but the register is already in use for vector 0xf9 on another cpu
 perf: IBS APIC setup failed on cpu #1
 process: Switch to broadcast mode on CPU1
 CPU1 is up
 ...
 ACPI: Waking up from system sleep state S3

Reason for this is that during suspend the LVT offset for the IBS
vector gets lost and needs to be reinialized while resuming.

The offset is read from the IBSCTL msr. On family 10h the offset needs
to be 1 as offset 0 is used for the MCE threshold interrupt, but
firmware assings it for IBS to 0 too. The kernel needs to reprogram
the vector. The msr is a readonly node msr, but a new value can be
written via pci config space access. The reinitialization is
implemented for family 10h in setup_ibs_ctl() which is forced during
IBS setup.

This patch fixes IBS setup after waking up from S3 by adding
resume/supend hooks for the boot cpu which does the offset
reinitialization.

Marking it as stable to let distros pick up this fix.

Signed-off-by: Robert Richter <rric@kernel.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: <stable@vger.kernel.org> v3.2..
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/1389797849-5565-1-git-send-email-rric.net@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_amd_ibs.c | 53 +++++++++++++++++++++++++++-----
 1 file changed, 45 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index e09f0bfb7b8f..4b8e4d3cd6ea 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/ptrace.h>
+#include <linux/syscore_ops.h>
 
 #include <asm/apic.h>
 
@@ -816,6 +817,18 @@ out:
 	return ret;
 }
 
+static void ibs_eilvt_setup(void)
+{
+	/*
+	 * Force LVT offset assignment for family 10h: The offsets are
+	 * not assigned by the BIOS for this family, so the OS is
+	 * responsible for doing it. If the OS assignment fails, fall
+	 * back to BIOS settings and try to setup this.
+	 */
+	if (boot_cpu_data.x86 == 0x10)
+		force_ibs_eilvt_setup();
+}
+
 static inline int get_ibs_lvt_offset(void)
 {
 	u64 val;
@@ -851,6 +864,36 @@ static void clear_APIC_ibs(void *dummy)
 		setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1);
 }
 
+#ifdef CONFIG_PM
+
+static int perf_ibs_suspend(void)
+{
+	clear_APIC_ibs(NULL);
+	return 0;
+}
+
+static void perf_ibs_resume(void)
+{
+	ibs_eilvt_setup();
+	setup_APIC_ibs(NULL);
+}
+
+static struct syscore_ops perf_ibs_syscore_ops = {
+	.resume		= perf_ibs_resume,
+	.suspend	= perf_ibs_suspend,
+};
+
+static void perf_ibs_pm_init(void)
+{
+	register_syscore_ops(&perf_ibs_syscore_ops);
+}
+
+#else
+
+static inline void perf_ibs_pm_init(void) { }
+
+#endif
+
 static int
 perf_ibs_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
 {
@@ -877,18 +920,12 @@ static __init int amd_ibs_init(void)
 	if (!caps)
 		return -ENODEV;	/* ibs not supported by the cpu */
 
-	/*
-	 * Force LVT offset assignment for family 10h: The offsets are
-	 * not assigned by the BIOS for this family, so the OS is
-	 * responsible for doing it. If the OS assignment fails, fall
-	 * back to BIOS settings and try to setup this.
-	 */
-	if (boot_cpu_data.x86 == 0x10)
-		force_ibs_eilvt_setup();
+	ibs_eilvt_setup();
 
 	if (!ibs_eilvt_valid())
 		goto out;
 
+	perf_ibs_pm_init();
 	get_online_cpus();
 	ibs_caps = caps;
 	/* make ibs_caps visible to other cpus: */
-- 
cgit v1.2.3


From 54b278b50b8566c227437d6198412cb5a6619e6b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Sun, 12 Jan 2014 22:24:56 +0100
Subject: m68k: Fix build warning in mac_via.h

Fengguang Wu's kbuild test robot reported the following new m68k warnings:

     In file included from drivers/nubus/nubus.c:22:0:
  >> arch/m68k/include/asm/mac_via.h:262:47: warning: 'struct irq_desc' declared inside parameter list [enabled by default]
  >> arch/m68k/include/asm/mac_via.h:262:47: warning: its scope is only this definition or declaration, which is probably not what you want [enabled by default]

Caused by the reworking of the generic local_bh{dis,en}able() code.

To fix it, forward declare 'struct irq_desc'.

Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Fixes: c795eb55e740 ("sched/preempt, locking: Rework local_bh_{dis,en}able()")
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: geert@linux-m68k.org
Link: http://lkml.kernel.org/r/20140112212456.GQ7572@laptop.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/m68k/include/asm/mac_via.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/m68k/include/asm/mac_via.h b/arch/m68k/include/asm/mac_via.h
index aeeedf8b2d25..fe3fc9ae1b69 100644
--- a/arch/m68k/include/asm/mac_via.h
+++ b/arch/m68k/include/asm/mac_via.h
@@ -254,6 +254,8 @@
 extern volatile __u8 *via1,*via2;
 extern int rbv_present,via_alt_mapping;
 
+struct irq_desc;
+
 extern void via_register_interrupts(void);
 extern void via_irq_enable(int);
 extern void via_irq_disable(int);
-- 
cgit v1.2.3


From b4a960159e6f5254ac3c95dd183789f402431977 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Fri, 13 Dec 2013 12:53:42 +0100
Subject: s390: Fix misspellings using 'codespell' tool

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/qdio.h          | 2 +-
 arch/s390/kernel/entry64.S            | 2 +-
 arch/s390/kernel/setup.c              | 2 +-
 arch/s390/kvm/priv.c                  | 2 +-
 arch/s390/lib/uaccess_pt.c            | 4 ++--
 arch/s390/mm/pgtable.c                | 4 ++--
 drivers/s390/block/dasd.c             | 2 +-
 drivers/s390/crypto/zcrypt_msgtype6.c | 2 +-
 8 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h
index 0a1abf1e69af..d786c634e052 100644
--- a/arch/s390/include/asm/qdio.h
+++ b/arch/s390/include/asm/qdio.h
@@ -336,7 +336,7 @@ typedef void qdio_handler_t(struct ccw_device *, unsigned int, int,
 #define QDIO_FLAG_CLEANUP_USING_HALT		0x02
 
 /**
- * struct qdio_initialize - qdio initalization data
+ * struct qdio_initialize - qdio initialization data
  * @cdev: associated ccw device
  * @q_format: queue format
  * @adapter_name: name for the adapter
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 9532fe23be47..384e609b4711 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -975,7 +975,7 @@ sie_done:
 	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
 # some program checks are suppressing. C code (e.g. do_protection_exception)
 # will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other
-# instructions beween sie64a and sie_done should not cause program
+# instructions between sie64a and sie_done should not cause program
 # interrupts. So lets use a nop (47 00 00 00) as a landing pad.
 # See also HANDLE_SIE_INTERCEPT
 rewind_pad:
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 4444875266ee..36e81d775031 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -373,7 +373,7 @@ static void __init setup_lowcore(void)
 
 	/*
 	 * Set up PSW restart to call ipl.c:do_restart(). Copy the relevant
-	 * restart data to the absolute zero lowcore. This is necesary if
+	 * restart data to the absolute zero lowcore. This is necessary if
 	 * PSW restart is done on an offline CPU that has lowcore zero.
 	 */
 	lc->restart_stack = (unsigned long) restart_stack;
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 2440602e6df1..d101dae62771 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -275,7 +275,7 @@ static int handle_io_inst(struct kvm_vcpu *vcpu)
 		return -EOPNOTSUPP;
 	} else {
 		/*
-		 * Set condition code 3 to stop the guest from issueing channel
+		 * Set condition code 3 to stop the guest from issuing channel
 		 * I/O instructions.
 		 */
 		kvm_s390_set_psw_cc(vcpu, 3);
diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c
index dbdab3e7a1a6..0632dc50da78 100644
--- a/arch/s390/lib/uaccess_pt.c
+++ b/arch/s390/lib/uaccess_pt.c
@@ -74,8 +74,8 @@ static size_t copy_in_kernel(size_t count, void __user *to,
 
 /*
  * Returns kernel address for user virtual address. If the returned address is
- * >= -4095 (IS_ERR_VALUE(x) returns true), a fault has occured and the address
- * contains the (negative) exception code.
+ * >= -4095 (IS_ERR_VALUE(x) returns true), a fault has occurred and the
+ * address contains the (negative) exception code.
  */
 #ifdef CONFIG_64BIT
 
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index e794c88f699a..3584ed9b20a1 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -293,7 +293,7 @@ static int gmap_alloc_table(struct gmap *gmap,
  * @addr: address in the guest address space
  * @len: length of the memory area to unmap
  *
- * Returns 0 if the unmap succeded, -EINVAL if not.
+ * Returns 0 if the unmap succeeded, -EINVAL if not.
  */
 int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
 {
@@ -344,7 +344,7 @@ EXPORT_SYMBOL_GPL(gmap_unmap_segment);
  * @from: source address in the parent address space
  * @to: target address in the guest address space
  *
- * Returns 0 if the mmap succeded, -EINVAL or -ENOMEM if not.
+ * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not.
  */
 int gmap_map_segment(struct gmap *gmap, unsigned long from,
 		     unsigned long to, unsigned long len)
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index f302efa937ef..1eef0f586950 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -3386,7 +3386,7 @@ int dasd_generic_set_offline(struct ccw_device *cdev)
 
 	if (test_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags)) {
 		/*
-		 * safe offline allready running
+		 * safe offline already running
 		 * could only be called by normal offline so safe_offline flag
 		 * needs to be removed to run normal offline and kill all I/O
 		 */
diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c
index 57bfda1bd71a..dc542e0a3055 100644
--- a/drivers/s390/crypto/zcrypt_msgtype6.c
+++ b/drivers/s390/crypto/zcrypt_msgtype6.c
@@ -419,7 +419,7 @@ static int xcrb_msg_to_type6_ep11cprb_msgx(struct zcrypt_device *zdev,
 	 replaced by the usage domain for non-management commands only.
 	 Therefore we check the first bit of the 'flags' parameter for
 	 management command indication.
-	   0 - non managment command
+	   0 - non management command
 	   1 - management command
 	*/
 	if (!((msg->cprbx.flags & 0x80) == 0x80)) {
-- 
cgit v1.2.3


From f85168e4d96b31b09ecf09a679820b031224e69e Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Wed, 8 Jan 2014 16:45:39 +0100
Subject: s390/cpum_sf: fix printk format warnings

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/perf_event.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index a76d602f5928..5d2dfa31c4ef 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -142,17 +142,17 @@ static void print_debug_sf(void)
 	if (qsi(&si))
 		return;
 
-	pr_info("CPU[%i] CPUM_SF: basic=%i diag=%i min=%i max=%i cpu_speed=%i\n",
+	pr_info("CPU[%i] CPUM_SF: basic=%i diag=%i min=%lu max=%lu cpu_speed=%u\n",
 		cpu, si.as, si.ad, si.min_sampl_rate, si.max_sampl_rate,
 		si.cpu_speed);
 
 	if (si.as)
 		pr_info("CPU[%i] CPUM_SF: Basic-sampling: a=%i e=%i c=%i"
-			" bsdes=%i tear=%p dear=%p\n", cpu,
+			" bsdes=%i tear=%016lx dear=%016lx\n", cpu,
 			si.as, si.es, si.cs, si.bsdes, si.tear, si.dear);
 	if (si.ad)
 		pr_info("CPU[%i] CPUM_SF: Diagnostic-sampling: a=%i e=%i c=%i"
-			" dsdes=%i tear=%p dear=%p\n", cpu,
+			" dsdes=%i tear=%016lx dear=%016lx\n", cpu,
 			si.ad, si.ed, si.cd, si.dsdes, si.tear, si.dear);
 }
 
-- 
cgit v1.2.3


From 4ce00dfcf19c473f3dbf23d5b1372639f0c334f6 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 16 Jan 2014 18:32:25 +0000
Subject: Revert "arm64: Fix memory shareability attribute for
 ioremap_wc/cache"

This reverts commit 2f7dc6027522499582a520807cb9ffda589de47e.

The above commit breaks the mapping type for Device memory because
pgprot_default already contains a Normal memory type. pgprot_default is
also not initialised early enough for earlyprintk resulting in an
inconsistent memory mapping with 64K PAGE_SIZE configuration.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Reported-by: Will Deacon <will.deacon@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/io.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 572769727227..4cc813eddacb 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -229,7 +229,7 @@ extern void __iomem *__ioremap(phys_addr_t phys_addr, size_t size, pgprot_t prot
 extern void __iounmap(volatile void __iomem *addr);
 extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size);
 
-#define PROT_DEFAULT		(pgprot_default | PTE_DIRTY)
+#define PROT_DEFAULT		(PTE_TYPE_PAGE | PTE_AF | PTE_DIRTY)
 #define PROT_DEVICE_nGnRE	(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRE))
 #define PROT_NORMAL_NC		(PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL_NC))
 #define PROT_NORMAL		(PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL))
-- 
cgit v1.2.3


From ca1e631c3acf80bc5f5934ce9054a9b4880c96e1 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Thu, 16 Jan 2014 13:00:21 -0800
Subject: x86, tsc, apic: Unbreak static (MSR) calibration when
 CONFIG_X86_LOCAL_APIC=n

If we aren't going to use the local APIC anyway, we obviously don't
care about its timer frequency.

Link: http://lkml.kernel.org/r/tip-rgm7xmg7k6qnjlw3ynkcjsmh@git.kernel.org
Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Cc: Bin Gao <bin.gao@intel.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/kernel/tsc_msr.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c
index c5027724e307..8b5434f4389f 100644
--- a/arch/x86/kernel/tsc_msr.c
+++ b/arch/x86/kernel/tsc_msr.c
@@ -118,8 +118,10 @@ int try_msr_calibrate_tsc(unsigned long *fast_calibrate)
 	*fast_calibrate = freq * ratio;
 	pr_info("TSC runs at %lu KHz\n", *fast_calibrate);
 
+#ifdef CONFIG_X86_LOCAL_APIC
 	lapic_timer_frequency = (freq * 1000) / HZ;
 	pr_info("lapic_timer_frequency = %d\n", lapic_timer_frequency);
+#endif
 
 	return 1;
 }
-- 
cgit v1.2.3


From acb20d7395f75b0fdb5b300f9fb3b9c6054fb04f Mon Sep 17 00:00:00 2001
From: David Cohen <david.a.cohen@linux.intel.com>
Date: Thu, 16 Jan 2014 15:07:02 -0800
Subject: x86, intel-mid: sfi_handle_*_dev() should check for pdata error code

When Intel MID finds a match between SFI table from FW and registered
SFI devices, it will always register a device regardless the platform
code was successful or not.

This patch adds an extra option for platform code to return error code
and abort device registration on SFI table parsing.

This patch does not contain any functional changes for current intel
mid platform code.

Signed-off-by: David Cohen <david.a.cohen@linux.intel.com>
Link: http://lkml.kernel.org/r/1389913624-9149-2-git-send-email-david.a.cohen@linux.intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/platform/intel-mid/sfi.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/platform/intel-mid/sfi.c b/arch/x86/platform/intel-mid/sfi.c
index 80a52288555c..1d5d20b088fa 100644
--- a/arch/x86/platform/intel-mid/sfi.c
+++ b/arch/x86/platform/intel-mid/sfi.c
@@ -337,6 +337,8 @@ static void __init sfi_handle_ipc_dev(struct sfi_device_table_entry *pentry,
 	pr_debug("IPC bus, name = %16.16s, irq = 0x%2x\n",
 		pentry->name, pentry->irq);
 	pdata = intel_mid_sfi_get_pdata(dev, pentry);
+	if (IS_ERR(pdata))
+		return;
 
 	pdev = platform_device_alloc(pentry->name, 0);
 	if (pdev == NULL) {
@@ -370,6 +372,8 @@ static void __init sfi_handle_spi_dev(struct sfi_device_table_entry *pentry,
 		spi_info.chip_select);
 
 	pdata = intel_mid_sfi_get_pdata(dev, &spi_info);
+	if (IS_ERR(pdata))
+		return;
 
 	spi_info.platform_data = pdata;
 	if (dev->delay)
@@ -395,6 +399,8 @@ static void __init sfi_handle_i2c_dev(struct sfi_device_table_entry *pentry,
 		i2c_info.addr);
 	pdata = intel_mid_sfi_get_pdata(dev, &i2c_info);
 	i2c_info.platform_data = pdata;
+	if (IS_ERR(pdata))
+		return;
 
 	if (dev->delay)
 		intel_scu_i2c_device_register(pentry->host_num, &i2c_info);
-- 
cgit v1.2.3


From a957a14bb4ca976cbaaff3594ef5b8f8f7f65804 Mon Sep 17 00:00:00 2001
From: David Cohen <david.a.cohen@linux.intel.com>
Date: Thu, 16 Jan 2014 15:07:03 -0800
Subject: x86, intel-mid: Check get_gpio_by_name() error code on platform code

This patch does cleanup on all intel mid platform code that uses
gpio_get_by_name() function. From now on they should check for any error
code instead of only hardcoded -1.

There are no functional changes from this change.

Signed-off-by: David Cohen <david.a.cohen@linux.intel.com>
Link: http://lkml.kernel.org/r/1389913624-9149-3-git-send-email-david.a.cohen@linux.intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/platform/intel-mid/device_libs/platform_emc1403.c   | 4 +++-
 arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c | 2 +-
 arch/x86/platform/intel-mid/device_libs/platform_lis331.c    | 4 +++-
 arch/x86/platform/intel-mid/device_libs/platform_max7315.c   | 2 +-
 arch/x86/platform/intel-mid/device_libs/platform_mpu3050.c   | 2 +-
 arch/x86/platform/intel-mid/device_libs/platform_pmic_gpio.c | 2 +-
 arch/x86/platform/intel-mid/device_libs/platform_tca6416.c   | 4 ++--
 7 files changed, 12 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/platform/intel-mid/device_libs/platform_emc1403.c b/arch/x86/platform/intel-mid/device_libs/platform_emc1403.c
index 0d942c1d26d5..69a783689d21 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_emc1403.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_emc1403.c
@@ -22,7 +22,9 @@ static void __init *emc1403_platform_data(void *info)
 	int intr = get_gpio_by_name("thermal_int");
 	int intr2nd = get_gpio_by_name("thermal_alert");
 
-	if (intr == -1 || intr2nd == -1)
+	if (intr < 0)
+		return NULL;
+	if (intr2nd < 0)
 		return NULL;
 
 	i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET;
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c b/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c
index a013a4834bbe..dccae6b0413f 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c
@@ -66,7 +66,7 @@ static int __init pb_keys_init(void)
 		gb[i].gpio = get_gpio_by_name(gb[i].desc);
 		pr_debug("info[%2d]: name = %s, gpio = %d\n", i, gb[i].desc,
 					gb[i].gpio);
-		if (gb[i].gpio == -1)
+		if (gb[i].gpio < 0)
 			continue;
 
 		if (i != good)
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_lis331.c b/arch/x86/platform/intel-mid/device_libs/platform_lis331.c
index 15278c11f714..54226de7541a 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_lis331.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_lis331.c
@@ -21,7 +21,9 @@ static void __init *lis331dl_platform_data(void *info)
 	int intr = get_gpio_by_name("accel_int");
 	int intr2nd = get_gpio_by_name("accel_2");
 
-	if (intr == -1 || intr2nd == -1)
+	if (intr < 0)
+		return NULL;
+	if (intr2nd < 0)
 		return NULL;
 
 	i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET;
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_max7315.c b/arch/x86/platform/intel-mid/device_libs/platform_max7315.c
index 94ade10024ae..2c8acbc1e9ad 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_max7315.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_max7315.c
@@ -48,7 +48,7 @@ static void __init *max7315_platform_data(void *info)
 	gpio_base = get_gpio_by_name(base_pin_name);
 	intr = get_gpio_by_name(intr_pin_name);
 
-	if (gpio_base == -1)
+	if (gpio_base < 0)
 		return NULL;
 	max7315->gpio_base = gpio_base;
 	if (intr != -1) {
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mpu3050.c b/arch/x86/platform/intel-mid/device_libs/platform_mpu3050.c
index dd28d63c84fb..cfe9a47a1e87 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_mpu3050.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_mpu3050.c
@@ -19,7 +19,7 @@ static void *mpu3050_platform_data(void *info)
 	struct i2c_board_info *i2c_info = info;
 	int intr = get_gpio_by_name("mpu3050_int");
 
-	if (intr == -1)
+	if (intr < 0)
 		return NULL;
 
 	i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET;
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_pmic_gpio.c b/arch/x86/platform/intel-mid/device_libs/platform_pmic_gpio.c
index d87182a09263..65c2a9a19db4 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_pmic_gpio.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_pmic_gpio.c
@@ -26,7 +26,7 @@ static void __init *pmic_gpio_platform_data(void *info)
 	static struct intel_pmic_gpio_platform_data pmic_gpio_pdata;
 	int gpio_base = get_gpio_by_name("pmic_gpio_base");
 
-	if (gpio_base == -1)
+	if (gpio_base < 0)
 		gpio_base = 64;
 	pmic_gpio_pdata.gpio_base = gpio_base;
 	pmic_gpio_pdata.irq_base = gpio_base + INTEL_MID_IRQ_OFFSET;
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_tca6416.c b/arch/x86/platform/intel-mid/device_libs/platform_tca6416.c
index 22881c9a6737..33be0b3be6e1 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_tca6416.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_tca6416.c
@@ -34,10 +34,10 @@ static void *tca6416_platform_data(void *info)
 	gpio_base = get_gpio_by_name(base_pin_name);
 	intr = get_gpio_by_name(intr_pin_name);
 
-	if (gpio_base == -1)
+	if (gpio_base < 0)
 		return NULL;
 	tca6416.gpio_base = gpio_base;
-	if (intr != -1) {
+	if (intr >= 0) {
 		i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET;
 		tca6416.irq_base = gpio_base + INTEL_MID_IRQ_OFFSET;
 	} else {
-- 
cgit v1.2.3


From 28c6a39b3367f29462cd586785dc445cd6b5ac23 Mon Sep 17 00:00:00 2001
From: David Cohen <david.a.cohen@linux.intel.com>
Date: Thu, 16 Jan 2014 15:07:04 -0800
Subject: x86, intel-mid: Return proper error code from get_gpio_by_name()

This patch cleans up get_gpio_by_name() to return an error code
instead of hardcoded -1.

Signed-off-by: David Cohen <david.a.cohen@linux.intel.com>
Link: http://lkml.kernel.org/r/1389913624-9149-4-git-send-email-david.a.cohen@linux.intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/platform/intel-mid/sfi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/platform/intel-mid/sfi.c b/arch/x86/platform/intel-mid/sfi.c
index 1d5d20b088fa..438306ebed05 100644
--- a/arch/x86/platform/intel-mid/sfi.c
+++ b/arch/x86/platform/intel-mid/sfi.c
@@ -224,7 +224,7 @@ int get_gpio_by_name(const char *name)
 		if (!strncmp(name, pentry->pin_name, SFI_NAME_LEN))
 			return pentry->pin_no;
 	}
-	return -1;
+	return -EINVAL;
 }
 
 void __init intel_scu_device_register(struct platform_device *pdev)
-- 
cgit v1.2.3


From ee87c751d88f9b03fee7349556817fe80c113b32 Mon Sep 17 00:00:00 2001
From: Fengguang Wu <fengguang.wu@intel.com>
Date: Thu, 16 Jan 2014 16:13:08 -0800
Subject: x86, intel_mid: Replace memcpy with struct assignment

This is a cleanup proposed by coccinelle. It replaces memcpy with struct
assignment on intel-mid's sfi layer.

Generated by: coccinelle/misc/memcpy-assign.cocci

Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
Link: http://lkml.kernel.org/r/1389917588-9785-1-git-send-email-david.a.cohen@linux.intel.com
Signed-off-by: David Cohen <david.a.cohen@linux.intel.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/platform/intel-mid/sfi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/platform/intel-mid/sfi.c b/arch/x86/platform/intel-mid/sfi.c
index 438306ebed05..994c40bd7cb7 100644
--- a/arch/x86/platform/intel-mid/sfi.c
+++ b/arch/x86/platform/intel-mid/sfi.c
@@ -250,7 +250,7 @@ static void __init intel_scu_spi_device_register(struct spi_board_info *sdev)
 			sdev->modalias);
 		return;
 	}
-	memcpy(new_dev, sdev, sizeof(*sdev));
+	*new_dev = *sdev;
 
 	spi_devs[spi_next_dev++] = new_dev;
 }
@@ -271,7 +271,7 @@ static void __init intel_scu_i2c_device_register(int bus,
 			idev->type);
 		return;
 	}
-	memcpy(new_dev, idev, sizeof(*idev));
+	*new_dev = *idev;
 
 	i2c_bus[i2c_next_dev] = bus;
 	i2c_devs[i2c_next_dev++] = new_dev;
-- 
cgit v1.2.3


From 75b99dbd634f9caf7b4e48ec5c2dcec8c1837372 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 16 Jan 2014 11:15:12 -0800
Subject: parisc: fix SO_MAX_PACING_RATE typo

SO_MAX_PACING_RATE definition on parisc got a typo.
Its not too late to fix it, before 3.13 is official.

Fixes: 62748f32d501 ("net: introduce SO_MAX_PACING_RATE")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/parisc/include/uapi/asm/socket.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index f33113a6141e..70b3674dac4e 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -75,6 +75,6 @@
 
 #define SO_BUSY_POLL		0x4027
 
-#define SO_MAX_PACING_RATE	0x4048
+#define SO_MAX_PACING_RATE	0x4028
 
 #endif /* _UAPI_ASM_SOCKET_H */
-- 
cgit v1.2.3


From 3af57f78c38131b7a66e2b01e06fdacae01992a3 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 17 Jan 2014 09:37:15 +0100
Subject: s390/bpf,jit: fix 32 bit divisions, use unsigned divide instructions

The s390 bpf jit compiler emits the signed divide instructions "dr" and "d"
for unsigned divisions.
This can cause problems: the dividend will be zero extended to a 64 bit value
and the divisor is the 32 bit signed value as specified A or X accumulator,
even though A and X are supposed to be treated as unsigned values.

The divide instrunctions will generate an exception if the result cannot be
expressed with a 32 bit signed value.
This is the case if e.g. the dividend is 0xffffffff and the divisor either 1
or also 0xffffffff (signed: -1).

To avoid all these issues simply use unsigned divide instructions.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/s390/net/bpf_jit_comp.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index fc0fa77728e1..708d60e40066 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -368,16 +368,16 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter,
 		EMIT4_PCREL(0xa7840000, (jit->ret0_ip - jit->prg));
 		/* lhi %r4,0 */
 		EMIT4(0xa7480000);
-		/* dr %r4,%r12 */
-		EMIT2(0x1d4c);
+		/* dlr %r4,%r12 */
+		EMIT4(0xb997004c);
 		break;
 	case BPF_S_ALU_DIV_K: /* A /= K */
 		if (K == 1)
 			break;
 		/* lhi %r4,0 */
 		EMIT4(0xa7480000);
-		/* d %r4,<d(K)>(%r13) */
-		EMIT4_DISP(0x5d40d000, EMIT_CONST(K));
+		/* dl %r4,<d(K)>(%r13) */
+		EMIT6_DISP(0xe340d000, 0x0097, EMIT_CONST(K));
 		break;
 	case BPF_S_ALU_MOD_X: /* A %= X */
 		jit->seen |= SEEN_XREG | SEEN_RET0;
@@ -387,8 +387,8 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter,
 		EMIT4_PCREL(0xa7840000, (jit->ret0_ip - jit->prg));
 		/* lhi %r4,0 */
 		EMIT4(0xa7480000);
-		/* dr %r4,%r12 */
-		EMIT2(0x1d4c);
+		/* dlr %r4,%r12 */
+		EMIT4(0xb997004c);
 		/* lr %r5,%r4 */
 		EMIT2(0x1854);
 		break;
@@ -400,8 +400,8 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter,
 		}
 		/* lhi %r4,0 */
 		EMIT4(0xa7480000);
-		/* d %r4,<d(K)>(%r13) */
-		EMIT4_DISP(0x5d40d000, EMIT_CONST(K));
+		/* dl %r4,<d(K)>(%r13) */
+		EMIT6_DISP(0xe340d000, 0x0097, EMIT_CONST(K));
 		/* lr %r5,%r4 */
 		EMIT2(0x1854);
 		break;
-- 
cgit v1.2.3


From 56931d73697c99ecf7aba6ae86c94d3a2d15d596 Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Mon, 13 Jan 2014 00:56:38 +1100
Subject: m68k/mac: Make SCC reset work more reliably

For SCC initialization we cannot assume that the control register is in
the correct state to accept a register pointer. So first read from the
control register in order to "sync" up.

Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/kernel/head.S | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/m68k/kernel/head.S b/arch/m68k/kernel/head.S
index 7d7913f5dce3..4c99bab7e664 100644
--- a/arch/m68k/kernel/head.S
+++ b/arch/m68k/kernel/head.S
@@ -2915,7 +2915,9 @@ func_start	serial_init,%d0/%d1/%a0/%a1
 
 #if defined(MAC_USE_SCC_A) || defined(MAC_USE_SCC_B)
 	movel	%pc@(L(mac_sccbase)),%a0
-	/* Reset SCC device */
+	/* Reset SCC register pointer */
+	moveb	%a0@(mac_scc_cha_a_ctrl_offset),%d0
+	/* Reset SCC device: write register pointer then register value */
 	moveb	#9,%a0@(mac_scc_cha_a_ctrl_offset)
 	moveb	#0xc0,%a0@(mac_scc_cha_a_ctrl_offset)
 	/* Wait for 5 PCLK cycles, which is about 68 CPU cycles */
-- 
cgit v1.2.3