44 files changed, 1709 insertions, 369 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index eb1cf898ed3c..c2fb8a87dccb 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -488,6 +488,22 @@ config X86_INTEL_MID
 	  Intel MID platforms are based on an Intel processor and chipset which
 	  consume less power than most of the x86 derivatives.
 
+config X86_INTEL_QUARK
+	bool "Intel Quark platform support"
+	depends on X86_32
+	depends on X86_EXTENDED_PLATFORM
+	depends on X86_PLATFORM_DEVICES
+	depends on X86_TSC
+	depends on PCI
+	depends on PCI_GOANY
+	depends on X86_IO_APIC
+	select IOSF_MBI
+	select INTEL_IMR
+	---help---
+	  Select to include support for Quark X1000 SoC.
+	  Say Y here if you have a Quark based system such as the Arduino
+	  compatible Intel Galileo.
+
 config X86_INTEL_LPSS
 	bool "Intel Low Power Subsystem Support"
 	depends on ACPI
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 61bd2ad94281..20028da8ae18 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -313,6 +313,19 @@ config DEBUG_NMI_SELFTEST
 
 	  If unsure, say N.
 
+config DEBUG_IMR_SELFTEST
+	bool "Isolated Memory Region self test"
+	default n
+	depends on INTEL_IMR
+	---help---
+	  This option enables automated sanity testing of the IMR code.
+	  Some simple tests are run to verify IMR bounds checking, alignment
+	  and overlapping. This option is really only useful if you are
+	  debugging an IMR memory map or are modifying the IMR code and want to
+	  test your changes.
+
+	  If unsure say N here.
+
 config X86_DEBUG_STATIC_CPU_HAS
 	bool "Debug alternatives"
 	depends on DEBUG_KERNEL
diff --git a/arch/x86/Makefile.um b/arch/x86/Makefile.um
index 36b62bc52638..95eba554baf9 100644
--- a/arch/x86/Makefile.um
+++ b/arch/x86/Makefile.um
@@ -30,7 +30,7 @@ cflags-y += -ffreestanding
 # Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use
 # a lot more stack due to the lack of sharing of stacklots.  Also, gcc
 # 4.3.0 needs -funit-at-a-time for extern inline functions.
-KBUILD_CFLAGS += $(shell if [ $(call cc-version) -lt 0400 ] ; then \
+KBUILD_CFLAGS += $(shell if [ $(cc-version) -lt 0400 ] ; then \
 			echo $(call cc-option,-fno-unit-at-a-time); \
 			else echo $(call cc-option,-funit-at-a-time); fi ;)
 
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 843feb3eb20b..0a291cdfaf77 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -51,6 +51,7 @@ $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone
 
 vmlinux-objs-$(CONFIG_EFI_STUB) += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o \
 	$(objtree)/drivers/firmware/efi/libstub/lib.a
+vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o
 
 $(obj)/vmlinux: $(vmlinux-objs-y) FORCE
 	$(call if_changed,ld)
diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c
index bb1376381985..7083c16cccba 100644
--- a/arch/x86/boot/compressed/aslr.c
+++ b/arch/x86/boot/compressed/aslr.c
@@ -14,6 +14,13 @@
 static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
 		LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
 
+struct kaslr_setup_data {
+	__u64 next;
+	__u32 type;
+	__u32 len;
+	__u8 data[1];
+} kaslr_setup_data;
+
 #define I8254_PORT_CONTROL	0x43
 #define I8254_PORT_COUNTER0	0x40
 #define I8254_CMD_READBACK	0xC0
@@ -295,7 +302,29 @@ static unsigned long find_random_addr(unsigned long minimum,
 	return slots_fetch_random();
 }
 
-unsigned char *choose_kernel_location(unsigned char *input,
+static void add_kaslr_setup_data(struct boot_params *params, __u8 enabled)
+{
+	struct setup_data *data;
+
+	kaslr_setup_data.type = SETUP_KASLR;
+	kaslr_setup_data.len = 1;
+	kaslr_setup_data.next = 0;
+	kaslr_setup_data.data[0] = enabled;
+
+	data = (struct setup_data *)(unsigned long)params->hdr.setup_data;
+
+	while (data && data->next)
+		data = (struct setup_data *)(unsigned long)data->next;
+
+	if (data)
+		data->next = (unsigned long)&kaslr_setup_data;
+	else
+		params->hdr.setup_data = (unsigned long)&kaslr_setup_data;
+
+}
+
+unsigned char *choose_kernel_location(struct boot_params *params,
+				      unsigned char *input,
 				      unsigned long input_size,
 				      unsigned char *output,
 				      unsigned long output_size)
@@ -306,14 +335,17 @@ unsigned char *choose_kernel_location(unsigned char *input,
 #ifdef CONFIG_HIBERNATION
 	if (!cmdline_find_option_bool("kaslr")) {
 		debug_putstr("KASLR disabled by default...\n");
+		add_kaslr_setup_data(params, 0);
 		goto out;
 	}
 #else
 	if (cmdline_find_option_bool("nokaslr")) {
 		debug_putstr("KASLR disabled by cmdline...\n");
+		add_kaslr_setup_data(params, 0);
 		goto out;
 	}
 #endif
+	add_kaslr_setup_data(params, 1);
 
 	/* Record the various known unsafe memory ranges. */
 	mem_avoid_init((unsigned long)input, input_size,
diff --git a/arch/x86/boot/compressed/efi_stub_64.S b/arch/x86/boot/compressed/efi_stub_64.S
index 7ff3632806b1..99494dff2113 100644
--- a/arch/x86/boot/compressed/efi_stub_64.S
+++ b/arch/x86/boot/compressed/efi_stub_64.S
@@ -3,28 +3,3 @@
 #include <asm/processor-flags.h>
 
 #include "../../platform/efi/efi_stub_64.S"
-
-#ifdef CONFIG_EFI_MIXED
-	.code64
-	.text
-ENTRY(efi64_thunk)
-	push	%rbp
-	push	%rbx
-
-	subq	$16, %rsp
-	leaq	efi_exit32(%rip), %rax
-	movl	%eax, 8(%rsp)
-	leaq	efi_gdt64(%rip), %rax
-	movl	%eax, 4(%rsp)
-	movl	%eax, 2(%rax)		/* Fixup the gdt base address */
-	leaq	efi32_boot_gdt(%rip), %rax
-	movl	%eax, (%rsp)
-
-	call	__efi64_thunk
-
-	addq	$16, %rsp
-	pop	%rbx
-	pop	%rbp
-	ret
-ENDPROC(efi64_thunk)
-#endif /* CONFIG_EFI_MIXED */
diff --git a/arch/x86/boot/compressed/efi_thunk_64.S b/arch/x86/boot/compressed/efi_thunk_64.S
new file mode 100644
index 000000000000..630384a4c14a
--- /dev/null
+++ b/arch/x86/boot/compressed/efi_thunk_64.S
@@ -0,0 +1,196 @@
+/*
+ * Copyright (C) 2014, 2015 Intel Corporation; author Matt Fleming
+ *
+ * Early support for invoking 32-bit EFI services from a 64-bit kernel.
+ *
+ * Because this thunking occurs before ExitBootServices() we have to
+ * restore the firmware's 32-bit GDT before we make EFI serivce calls,
+ * since the firmware's 32-bit IDT is still currently installed and it
+ * needs to be able to service interrupts.
+ *
+ * On the plus side, we don't have to worry about mangling 64-bit
+ * addresses into 32-bits because we're executing with an identify
+ * mapped pagetable and haven't transitioned to 64-bit virtual addresses
+ * yet.
+ */
+
+#include <linux/linkage.h>
+#include <asm/msr.h>
+#include <asm/page_types.h>
+#include <asm/processor-flags.h>
+#include <asm/segment.h>
+
+	.code64
+	.text
+ENTRY(efi64_thunk)
+	push	%rbp
+	push	%rbx
+
+	subq	$8, %rsp
+	leaq	efi_exit32(%rip), %rax
+	movl	%eax, 4(%rsp)
+	leaq	efi_gdt64(%rip), %rax
+	movl	%eax, (%rsp)
+	movl	%eax, 2(%rax)		/* Fixup the gdt base address */
+
+	movl	%ds, %eax
+	push	%rax
+	movl	%es, %eax
+	push	%rax
+	movl	%ss, %eax
+	push	%rax
+
+	/*
+	 * Convert x86-64 ABI params to i386 ABI
+	 */
+	subq	$32, %rsp
+	movl	%esi, 0x0(%rsp)
+	movl	%edx, 0x4(%rsp)
+	movl	%ecx, 0x8(%rsp)
+	movq	%r8, %rsi
+	movl	%esi, 0xc(%rsp)
+	movq	%r9, %rsi
+	movl	%esi,  0x10(%rsp)
+
+	sgdt	save_gdt(%rip)
+
+	leaq	1f(%rip), %rbx
+	movq	%rbx, func_rt_ptr(%rip)
+
+	/*
+	 * Switch to gdt with 32-bit segments. This is the firmware GDT
+	 * that was installed when the kernel started executing. This
+	 * pointer was saved at the EFI stub entry point in head_64.S.
+	 */
+	leaq	efi32_boot_gdt(%rip), %rax
+	lgdt	(%rax)
+
+	pushq	$__KERNEL_CS
+	leaq	efi_enter32(%rip), %rax
+	pushq	%rax
+	lretq
+
+1:	addq	$32, %rsp
+
+	lgdt	save_gdt(%rip)
+
+	pop	%rbx
+	movl	%ebx, %ss
+	pop	%rbx
+	movl	%ebx, %es
+	pop	%rbx
+	movl	%ebx, %ds
+
+	/*
+	 * Convert 32-bit status code into 64-bit.
+	 */
+	test	%rax, %rax
+	jz	1f
+	movl	%eax, %ecx
+	andl	$0x0fffffff, %ecx
+	andl	$0xf0000000, %eax
+	shl	$32, %rax
+	or	%rcx, %rax
+1:
+	addq	$8, %rsp
+	pop	%rbx
+	pop	%rbp
+	ret
+ENDPROC(efi64_thunk)
+
+ENTRY(efi_exit32)
+	movq	func_rt_ptr(%rip), %rax
+	push	%rax
+	mov	%rdi, %rax
+	ret
+ENDPROC(efi_exit32)
+
+	.code32
+/*
+ * EFI service pointer must be in %edi.
+ *
+ * The stack should represent the 32-bit calling convention.
+ */
+ENTRY(efi_enter32)
+	movl	$__KERNEL_DS, %eax
+	movl	%eax, %ds
+	movl	%eax, %es
+	movl	%eax, %ss
+
+	/* Reload pgtables */
+	movl	%cr3, %eax
+	movl	%eax, %cr3
+
+	/* Disable paging */
+	movl	%cr0, %eax
+	btrl	$X86_CR0_PG_BIT, %eax
+	movl	%eax, %cr0
+
+	/* Disable long mode via EFER */
+	movl	$MSR_EFER, %ecx
+	rdmsr
+	btrl	$_EFER_LME, %eax
+	wrmsr
+
+	call	*%edi
+
+	/* We must preserve return value */
+	movl	%eax, %edi
+
+	/*
+	 * Some firmware will return with interrupts enabled. Be sure to
+	 * disable them before we switch GDTs.
+	 */
+	cli
+
+	movl	56(%esp), %eax
+	movl	%eax, 2(%eax)
+	lgdtl	(%eax)
+
+	movl	%cr4, %eax
+	btsl	$(X86_CR4_PAE_BIT), %eax
+	movl	%eax, %cr4
+
+	movl	%cr3, %eax
+	movl	%eax, %cr3
+
+	movl	$MSR_EFER, %ecx
+	rdmsr
+	btsl	$_EFER_LME, %eax
+	wrmsr
+
+	xorl	%eax, %eax
+	lldt	%ax
+
+	movl	60(%esp), %eax
+	pushl	$__KERNEL_CS
+	pushl	%eax
+
+	/* Enable paging */
+	movl	%cr0, %eax
+	btsl	$X86_CR0_PG_BIT, %eax
+	movl	%eax, %cr0
+	lret
+ENDPROC(efi_enter32)
+
+	.data
+	.balign	8
+	.global	efi32_boot_gdt
+efi32_boot_gdt:	.word	0
+		.quad	0
+
+save_gdt:	.word	0
+		.quad	0
+func_rt_ptr:	.quad	0
+
+	.global efi_gdt64
+efi_gdt64:
+	.word	efi_gdt64_end - efi_gdt64
+	.long	0			/* Filled out by user */
+	.word	0
+	.quad	0x0000000000000000	/* NULL descriptor */
+	.quad	0x00af9a000000ffff	/* __KERNEL_CS */
+	.quad	0x00cf92000000ffff	/* __KERNEL_DS */
+	.quad	0x0080890000000000	/* TS descriptor */
+	.quad   0x0000000000000000	/* TS continued */
+efi_gdt64_end:
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index a950864a64da..5903089c818f 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -401,7 +401,8 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap,
 	 * the entire decompressed kernel plus relocation table, or the
 	 * entire decompressed kernel plus .bss and .brk sections.
 	 */
-	output = choose_kernel_location(input_data, input_len, output,
+	output = choose_kernel_location(real_mode, input_data, input_len,
+					output,
 					output_len > run_size ? output_len
 							      : run_size);
 
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 04477d68403f..ee3576b2666b 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -57,7 +57,8 @@ int cmdline_find_option_bool(const char *option);
 
 #if CONFIG_RANDOMIZE_BASE
 /* aslr.c */
-unsigned char *choose_kernel_location(unsigned char *input,
+unsigned char *choose_kernel_location(struct boot_params *params,
+				      unsigned char *input,
 				      unsigned long input_size,
 				      unsigned char *output,
 				      unsigned long output_size);
@@ -65,7 +66,8 @@ unsigned char *choose_kernel_location(unsigned char *input,
 bool has_cpuflag(int flag);
 #else
 static inline
-unsigned char *choose_kernel_location(unsigned char *input,
+unsigned char *choose_kernel_location(struct boot_params *params,
+				      unsigned char *input,
 				      unsigned long input_size,
 				      unsigned char *output,
 				      unsigned long output_size)
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 92003f3c8a42..efc3b22d896e 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -213,7 +213,15 @@ void register_lapic_address(unsigned long address);
 extern void setup_boot_APIC_clock(void);
 extern void setup_secondary_APIC_clock(void);
 extern int APIC_init_uniprocessor(void);
+
+#ifdef CONFIG_X86_64
+static inline int apic_force_enable(unsigned long addr)
+{
+	return -1;
+}
+#else
 extern int apic_force_enable(unsigned long addr);
+#endif
 
 extern int apic_bsp_setup(bool upmode);
 extern void apic_ap_setup(void);
diff --git a/arch/x86/include/asm/imr.h b/arch/x86/include/asm/imr.h
new file mode 100644
index 000000000000..cd2ce4068441
--- /dev/null
+++ b/arch/x86/include/asm/imr.h
@@ -0,0 +1,60 @@
+/*
+ * imr.h: Isolated Memory Region API
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ * Copyright(c) 2015 Bryan O'Donoghue <pure.logic@nexus-software.ie>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+#ifndef _IMR_H
+#define _IMR_H
+
+#include <linux/types.h>
+
+/*
+ * IMR agent access mask bits
+ * See section 12.7.4.7 from quark-x1000-datasheet.pdf for register
+ * definitions.
+ */
+#define IMR_ESRAM_FLUSH		BIT(31)
+#define IMR_CPU_SNOOP		BIT(30)		/* Applicable only to write */
+#define IMR_RMU			BIT(29)
+#define IMR_VC1_SAI_ID3		BIT(15)
+#define IMR_VC1_SAI_ID2		BIT(14)
+#define IMR_VC1_SAI_ID1		BIT(13)
+#define IMR_VC1_SAI_ID0		BIT(12)
+#define IMR_VC0_SAI_ID3		BIT(11)
+#define IMR_VC0_SAI_ID2		BIT(10)
+#define IMR_VC0_SAI_ID1		BIT(9)
+#define IMR_VC0_SAI_ID0		BIT(8)
+#define IMR_CPU_0		BIT(1)		/* SMM mode */
+#define IMR_CPU			BIT(0)		/* Non SMM mode */
+#define IMR_ACCESS_NONE		0
+
+/*
+ * Read/Write access-all bits here include some reserved bits
+ * These are the values firmware uses and are accepted by hardware.
+ * The kernel defines read/write access-all in the same way as firmware
+ * in order to have a consistent and crisp definition across firmware,
+ * bootloader and kernel.
+ */
+#define IMR_READ_ACCESS_ALL	0xBFFFFFFF
+#define IMR_WRITE_ACCESS_ALL	0xFFFFFFFF
+
+/* Number of IMRs provided by Quark X1000 SoC */
+#define QUARK_X1000_IMR_MAX	0x08
+#define QUARK_X1000_IMR_REGBASE 0x40
+
+/* IMR alignment bits - only bits 31:10 are checked for IMR validity */
+#define IMR_ALIGN		0x400
+#define IMR_MASK		(IMR_ALIGN - 1)
+
+int imr_add_range(phys_addr_t base, size_t size,
+		  unsigned int rmask, unsigned int wmask, bool lock);
+
+int imr_remove_range(phys_addr_t base, size_t size);
+
+#endif /* _IMR_H */
diff --git a/arch/x86/include/asm/lguest_hcall.h b/arch/x86/include/asm/lguest_hcall.h
index 879fd7d33877..ef01fef3eebc 100644
--- a/arch/x86/include/asm/lguest_hcall.h
+++ b/arch/x86/include/asm/lguest_hcall.h
@@ -16,7 +16,6 @@
 #define LHCALL_SET_PTE		14
 #define LHCALL_SET_PGD		15
 #define LHCALL_LOAD_TLS		16
-#define LHCALL_NOTIFY		17
 #define LHCALL_LOAD_GDT_ENTRY	18
 #define LHCALL_SEND_INTERRUPTS	19
 
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index f97fbe3abb67..95e11f79f123 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -51,6 +51,8 @@ extern int devmem_is_allowed(unsigned long pagenr);
 extern unsigned long max_low_pfn_mapped;
 extern unsigned long max_pfn_mapped;
 
+extern bool kaslr_enabled;
+
 static inline phys_addr_t get_max_mapped(void)
 {
 	return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT;
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 67fc3d2b0aab..a0c35bf6cb92 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -476,12 +476,14 @@ static inline int pmd_present(pmd_t pmd)
  */
 static inline int pte_protnone(pte_t pte)
 {
-	return pte_flags(pte) & _PAGE_PROTNONE;
+	return (pte_flags(pte) & (_PAGE_PROTNONE | _PAGE_PRESENT))
+		== _PAGE_PROTNONE;
 }
 
 static inline int pmd_protnone(pmd_t pmd)
 {
-	return pmd_flags(pmd) & _PAGE_PROTNONE;
+	return (pmd_flags(pmd) & (_PAGE_PROTNONE | _PAGE_PRESENT))
+		== _PAGE_PROTNONE;
 }
 #endif /* CONFIG_NUMA_BALANCING */
 
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index 7050d864f520..cf87de3fc390 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -46,7 +46,7 @@ static __always_inline bool static_key_false(struct static_key *key);
 
 static inline void __ticket_enter_slowpath(arch_spinlock_t *lock)
 {
-	set_bit(0, (volatile unsigned long *)&lock->tickets.tail);
+	set_bit(0, (volatile unsigned long *)&lock->tickets.head);
 }
 
 #else  /* !CONFIG_PARAVIRT_SPINLOCKS */
@@ -60,10 +60,30 @@ static inline void __ticket_unlock_kick(arch_spinlock_t *lock,
 }
 
 #endif /* CONFIG_PARAVIRT_SPINLOCKS */
+static inline int  __tickets_equal(__ticket_t one, __ticket_t two)
+{
+	return !((one ^ two) & ~TICKET_SLOWPATH_FLAG);
+}
+
+static inline void __ticket_check_and_clear_slowpath(arch_spinlock_t *lock,
+							__ticket_t head)
+{
+	if (head & TICKET_SLOWPATH_FLAG) {
+		arch_spinlock_t old, new;
+
+		old.tickets.head = head;
+		new.tickets.head = head & ~TICKET_SLOWPATH_FLAG;
+		old.tickets.tail = new.tickets.head + TICKET_LOCK_INC;
+		new.tickets.tail = old.tickets.tail;
+
+		/* try to clear slowpath flag when there are no contenders */
+		cmpxchg(&lock->head_tail, old.head_tail, new.head_tail);
+	}
+}
 
 static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
 {
-	return lock.tickets.head == lock.tickets.tail;
+	return __tickets_equal(lock.tickets.head, lock.tickets.tail);
 }
 
 /*
@@ -87,18 +107,21 @@ static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
 	if (likely(inc.head == inc.tail))
 		goto out;
 
-	inc.tail &= ~TICKET_SLOWPATH_FLAG;
 	for (;;) {
 		unsigned count = SPIN_THRESHOLD;
 
 		do {
-			if (READ_ONCE(lock->tickets.head) == inc.tail)
-				goto out;
+			inc.head = READ_ONCE(lock->tickets.head);
+			if (__tickets_equal(inc.head, inc.tail))
+				goto clear_slowpath;
 			cpu_relax();
 		} while (--count);
 		__ticket_lock_spinning(lock, inc.tail);
 	}
-out:	barrier();	/* make sure nothing creeps before the lock is taken */
+clear_slowpath:
+	__ticket_check_and_clear_slowpath(lock, inc.head);
+out:
+	barrier();	/* make sure nothing creeps before the lock is taken */
 }
 
 static __always_inline int arch_spin_trylock(arch_spinlock_t *lock)
@@ -106,56 +129,30 @@ static __always_inline int arch_spin_trylock(arch_spinlock_t *lock)
 	arch_spinlock_t old, new;
 
 	old.tickets = READ_ONCE(lock->tickets);
-	if (old.tickets.head != (old.tickets.tail & ~TICKET_SLOWPATH_FLAG))
+	if (!__tickets_equal(old.tickets.head, old.tickets.tail))
 		return 0;
 
 	new.head_tail = old.head_tail + (TICKET_LOCK_INC << TICKET_SHIFT);
+	new.head_tail &= ~TICKET_SLOWPATH_FLAG;
 
 	/* cmpxchg is a full barrier, so nothing can move before it */
 	return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail;
 }
 
-static inline void __ticket_unlock_slowpath(arch_spinlock_t *lock,
-					    arch_spinlock_t old)
-{
-	arch_spinlock_t new;
-
-	BUILD_BUG_ON(((__ticket_t)NR_CPUS) != NR_CPUS);
-
-	/* Perform the unlock on the "before" copy */
-	old.tickets.head += TICKET_LOCK_INC;
-
-	/* Clear the slowpath flag */
-	new.head_tail = old.head_tail & ~(TICKET_SLOWPATH_FLAG << TICKET_SHIFT);
-
-	/*
-	 * If the lock is uncontended, clear the flag - use cmpxchg in
-	 * case it changes behind our back though.
-	 */
-	if (new.tickets.head != new.tickets.tail ||
-	    cmpxchg(&lock->head_tail, old.head_tail,
-					new.head_tail) != old.head_tail) {
-		/*
-		 * Lock still has someone queued for it, so wake up an
-		 * appropriate waiter.
-		 */
-		__ticket_unlock_kick(lock, old.tickets.head);
-	}
-}
-
 static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
 	if (TICKET_SLOWPATH_FLAG &&
-	    static_key_false(&paravirt_ticketlocks_enabled)) {
-		arch_spinlock_t prev;
+		static_key_false(&paravirt_ticketlocks_enabled)) {
+		__ticket_t head;
 
-		prev = *lock;
-		add_smp(&lock->tickets.head, TICKET_LOCK_INC);
+		BUILD_BUG_ON(((__ticket_t)NR_CPUS) != NR_CPUS);
 
-		/* add_smp() is a full mb() */
+		head = xadd(&lock->tickets.head, TICKET_LOCK_INC);
 
-		if (unlikely(lock->tickets.tail & TICKET_SLOWPATH_FLAG))
-			__ticket_unlock_slowpath(lock, prev);
+		if (unlikely(head & TICKET_SLOWPATH_FLAG)) {
+			head &= ~TICKET_SLOWPATH_FLAG;
+			__ticket_unlock_kick(lock, (head + TICKET_LOCK_INC));
+		}
 	} else
 		__add(&lock->tickets.head, TICKET_LOCK_INC, UNLOCK_LOCK_PREFIX);
 }
@@ -164,14 +161,15 @@ static inline int arch_spin_is_locked(arch_spinlock_t *lock)
 {
 	struct __raw_tickets tmp = READ_ONCE(lock->tickets);
 
-	return tmp.tail != tmp.head;
+	return !__tickets_equal(tmp.tail, tmp.head);
 }
 
 static inline int arch_spin_is_contended(arch_spinlock_t *lock)
 {
 	struct __raw_tickets tmp = READ_ONCE(lock->tickets);
 
-	return (__ticket_t)(tmp.tail - tmp.head) > TICKET_LOCK_INC;
+	tmp.head &= ~TICKET_SLOWPATH_FLAG;
+	return (tmp.tail - tmp.head) > TICKET_LOCK_INC;
 }
 #define arch_spin_is_contended	arch_spin_is_contended
 
@@ -191,8 +189,8 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
 		 * We need to check "unlocked" in a loop, tmp.head == head
 		 * can be false positive because of overflow.
 		 */
-		if (tmp.head == (tmp.tail & ~TICKET_SLOWPATH_FLAG) ||
-		    tmp.head != head)
+		if (__tickets_equal(tmp.head, tmp.tail) ||
+				!__tickets_equal(tmp.head, head))
 			break;
 
 		cpu_relax();
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 0d592e0a5b84..ace9dec050b1 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -179,7 +179,7 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
 	asm volatile("call __get_user_%P3"				\
 		     : "=a" (__ret_gu), "=r" (__val_gu)			\
 		     : "0" (ptr), "i" (sizeof(*(ptr))));		\
-	(x) = (__typeof__(*(ptr))) __val_gu;				\
+	(x) = (__force __typeof__(*(ptr))) __val_gu;			\
 	__ret_gu;							\
 })
 
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index 225b0988043a..44e6dd7e36a2 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -7,6 +7,7 @@
 #define SETUP_DTB			2
 #define SETUP_PCI			3
 #define SETUP_EFI			4
+#define SETUP_KASLR			5
 
 /* ram_size flags */
 #define RAMDISK_IMAGE_START_MASK	0x07FF
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index ae97ed0873c6..3d525c6124f6 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -613,6 +613,11 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irqp)
 {
 	int rc, irq, trigger, polarity;
 
+	if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) {
+		*irqp = gsi;
+		return 0;
+	}
+
 	rc = acpi_get_override_irq(gsi, &trigger, &polarity);
 	if (rc == 0) {
 		trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE;
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index b5c8ff5e9dfc..2346c95c6ab1 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1396,6 +1396,12 @@ void cpu_init(void)
 
 	wait_for_master_cpu(cpu);
 
+	/*
+	 * Initialize the CR4 shadow before doing anything that could
+	 * try to read it.
+	 */
+	cr4_init_shadow();
+
 	show_ucode_info_early();
 
 	printk(KERN_INFO "Initializing CPU#%d\n", cpu);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 94d7dcb12145..50163fa9034f 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -565,8 +565,8 @@ static const struct _tlb_table intel_tlb_table[] = {
 	{ 0xb2, TLB_INST_4K,		64,	" TLB_INST 4KByte pages, 4-way set associative" },
 	{ 0xb3, TLB_DATA_4K,		128,	" TLB_DATA 4 KByte pages, 4-way set associative" },
 	{ 0xb4, TLB_DATA_4K,		256,	" TLB_DATA 4 KByte pages, 4-way associative" },
-	{ 0xb5, TLB_INST_4K,		64,	" TLB_INST 4 KByte pages, 8-way set ssociative" },
-	{ 0xb6, TLB_INST_4K,		128,	" TLB_INST 4 KByte pages, 8-way set ssociative" },
+	{ 0xb5, TLB_INST_4K,		64,	" TLB_INST 4 KByte pages, 8-way set associative" },
+	{ 0xb6, TLB_INST_4K,		128,	" TLB_INST 4 KByte pages, 8-way set associative" },
 	{ 0xba, TLB_DATA_4K,		64,	" TLB_DATA 4 KByte pages, 4-way associative" },
 	{ 0xc0, TLB_DATA_4K_4M,		8,	" TLB_DATA 4 KByte and 4 MByte pages, 4-way associative" },
 	{ 0xc1, STLB_4K_2M,		1024,	" STLB 4 KByte and 2 MByte pages, 8-way associative" },
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index c6826d1e8082..746e7fd08aad 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -196,6 +196,11 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
 		struct microcode_header_intel mc_header;
 		unsigned int mc_size;
 
+		if (leftover < sizeof(mc_header)) {
+			pr_err("error! Truncated header in microcode data file\n");
+			break;
+		}
+
 		if (get_ucode_data(&mc_header, ucode_ptr, sizeof(mc_header)))
 			break;
 
diff --git a/arch/x86/kernel/cpu/microcode/intel_early.c b/arch/x86/kernel/cpu/microcode/intel_early.c
index ec9df6f9cd47..420eb933189c 100644
--- a/arch/x86/kernel/cpu/microcode/intel_early.c
+++ b/arch/x86/kernel/cpu/microcode/intel_early.c
@@ -321,7 +321,11 @@ get_matching_model_microcode(int cpu, unsigned long start,
 	unsigned int mc_saved_count = mc_saved_data->mc_saved_count;
 	int i;
 
-	while (leftover) {
+	while (leftover && mc_saved_count < ARRAY_SIZE(mc_saved_tmp)) {
+
+		if (leftover < sizeof(mc_header))
+			break;
+
 		mc_header = (struct microcode_header_intel *)ucode_ptr;
 
 		mc_size = get_totalsize(mc_header);
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 000d4199b03e..31e2d5bf3e38 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -982,6 +982,9 @@ ENTRY(xen_hypervisor_callback)
 ENTRY(xen_do_upcall)
 1:	mov %esp, %eax
 	call xen_evtchn_do_upcall
+#ifndef CONFIG_PREEMPT
+	call xen_maybe_preempt_hcall
+#endif
 	jmp  ret_from_intr
 	CFI_ENDPROC
 ENDPROC(xen_hypervisor_callback)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index db13655c3a2a..10074ad9ebf8 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1208,6 +1208,9 @@ ENTRY(xen_do_hypervisor_callback)   # do_hypervisor_callback(struct *pt_regs)
 	popq %rsp
 	CFI_DEF_CFA_REGISTER rsp
 	decl PER_CPU_VAR(irq_count)
+#ifndef CONFIG_PREEMPT
+	call xen_maybe_preempt_hcall
+#endif
 	jmp  error_exit
 	CFI_ENDPROC
 END(xen_do_hypervisor_callback)
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 705ef8d48e2d..67b1cbe0093a 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -302,6 +302,9 @@ int check_irq_vectors_for_cpu_disable(void)
 		irq = __this_cpu_read(vector_irq[vector]);
 		if (irq >= 0) {
 			desc = irq_to_desc(irq);
+			if (!desc)
+				continue;
+
 			data = irq_desc_get_irq_data(desc);
 			cpumask_copy(&affinity_new, data->affinity);
 			cpu_clear(this_cpu, affinity_new);
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 98f654d466e5..4e3d5a9621fe 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -84,7 +84,7 @@ static volatile u32 twobyte_is_boostable[256 / 32] = {
 	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f          */
 	/*      ----------------------------------------------          */
 	W(0x00, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0) | /* 00 */
-	W(0x10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 10 */
+	W(0x10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1) , /* 10 */
 	W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 20 */
 	W(0x30, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */
 	W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
@@ -223,27 +223,48 @@ static unsigned long
 __recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr)
 {
 	struct kprobe *kp;
+	unsigned long faddr;
 
 	kp = get_kprobe((void *)addr);
-	/* There is no probe, return original address */
-	if (!kp)
+	faddr = ftrace_location(addr);
+	/*
+	 * Addresses inside the ftrace location are refused by
+	 * arch_check_ftrace_location(). Something went terribly wrong
+	 * if such an address is checked here.
+	 */
+	if (WARN_ON(faddr && faddr != addr))
+		return 0UL;
+	/*
+	 * Use the current code if it is not modified by Kprobe
+	 * and it cannot be modified by ftrace.
+	 */
+	if (!kp && !faddr)
 		return addr;
 
 	/*
-	 *  Basically, kp->ainsn.insn has an original instruction.
-	 *  However, RIP-relative instruction can not do single-stepping
-	 *  at different place, __copy_instruction() tweaks the displacement of
-	 *  that instruction. In that case, we can't recover the instruction
-	 *  from the kp->ainsn.insn.
+	 * Basically, kp->ainsn.insn has an original instruction.
+	 * However, RIP-relative instruction can not do single-stepping
+	 * at different place, __copy_instruction() tweaks the displacement of
+	 * that instruction. In that case, we can't recover the instruction
+	 * from the kp->ainsn.insn.
 	 *
-	 *  On the other hand, kp->opcode has a copy of the first byte of
-	 *  the probed instruction, which is overwritten by int3. And
-	 *  the instruction at kp->addr is not modified by kprobes except
-	 *  for the first byte, we can recover the original instruction
-	 *  from it and kp->opcode.
+	 * On the other hand, in case on normal Kprobe, kp->opcode has a copy
+	 * of the first byte of the probed instruction, which is overwritten
+	 * by int3. And the instruction at kp->addr is not modified by kprobes
+	 * except for the first byte, we can recover the original instruction
+	 * from it and kp->opcode.
+	 *
+	 * In case of Kprobes using ftrace, we do not have a copy of
+	 * the original instruction. In fact, the ftrace location might
+	 * be modified at anytime and even could be in an inconsistent state.
+	 * Fortunately, we know that the original code is the ideal 5-byte
+	 * long NOP.
 	 */
-	memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
-	buf[0] = kp->opcode;
+	memcpy(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
+	if (faddr)
+		memcpy(buf, ideal_nops[NOP_ATOMIC5], 5);
+	else
+		buf[0] = kp->opcode;
 	return (unsigned long)buf;
 }
 
@@ -251,6 +272,7 @@ __recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr)
  * Recover the probed instruction at addr for further analysis.
  * Caller must lock kprobes by kprobe_mutex, or disable preemption
  * for preventing to release referencing kprobes.
+ * Returns zero if the instruction can not get recovered.
  */
 unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
 {
@@ -285,6 +307,8 @@ static int can_probe(unsigned long paddr)
 		 * normally used, we just go through if there is no kprobe.
 		 */
 		__addr = recover_probed_instruction(buf, addr);
+		if (!__addr)
+			return 0;
 		kernel_insn_init(&insn, (void *)__addr, MAX_INSN_SIZE);
 		insn_get_length(&insn);
 
@@ -333,6 +357,8 @@ int __copy_instruction(u8 *dest, u8 *src)
 	unsigned long recovered_insn =
 		recover_probed_instruction(buf, (unsigned long)src);
 
+	if (!recovered_insn)
+		return 0;
 	kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
 	insn_get_length(&insn);
 	/* Another subsystem puts a breakpoint, failed to recover */
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 0dd8d089c315..7b3b9d15c47a 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -259,6 +259,8 @@ static int can_optimize(unsigned long paddr)
 			 */
 			return 0;
 		recovered_insn = recover_probed_instruction(buf, addr);
+		if (!recovered_insn)
+			return 0;
 		kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
 		insn_get_length(&insn);
 		/* Another subsystem puts a breakpoint */
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 94f643484300..e354cc6446ab 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -609,7 +609,7 @@ static inline void check_zero(void)
 	u8 ret;
 	u8 old;
 
-	old = ACCESS_ONCE(zero_stats);
+	old = READ_ONCE(zero_stats);
 	if (unlikely(old)) {
 		ret = cmpxchg(&zero_stats, old, 0);
 		/* This ensures only one fellow resets the stat */
@@ -727,6 +727,7 @@ __visible void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
 	int cpu;
 	u64 start;
 	unsigned long flags;
+	__ticket_t head;
 
 	if (in_nmi())
 		return;
@@ -768,11 +769,15 @@ __visible void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
 	 */
 	__ticket_enter_slowpath(lock);
 
+	/* make sure enter_slowpath, which is atomic does not cross the read */
+	smp_mb__after_atomic();
+
 	/*
 	 * check again make sure it didn't become free while
 	 * we weren't looking.
 	 */
-	if (ACCESS_ONCE(lock->tickets.head) == want) {
+	head = READ_ONCE(lock->tickets.head);
+	if (__tickets_equal(head, want)) {
 		add_stats(TAKEN_SLOW_PICKUP, 1);
 		goto out;
 	}
@@ -803,8 +808,8 @@ static void kvm_unlock_kick(struct arch_spinlock *lock, __ticket_t ticket)
 	add_stats(RELEASED_SLOW, 1);
 	for_each_cpu(cpu, &waiting_cpus) {
 		const struct kvm_lock_waiting *w = &per_cpu(klock_waiting, cpu);
-		if (ACCESS_ONCE(w->lock) == lock &&
-		    ACCESS_ONCE(w->want) == ticket) {
+		if (READ_ONCE(w->lock) == lock &&
+		    READ_ONCE(w->want) == ticket) {
 			add_stats(RELEASED_SLOW_KICKED, 1);
 			kvm_kick_cpu(cpu);
 			break;
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index d1ac80b72c72..9bbb9b35c144 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -47,21 +47,13 @@ do {							\
 
 #ifdef CONFIG_RANDOMIZE_BASE
 static unsigned long module_load_offset;
-static int randomize_modules = 1;
 
 /* Mutex protects the module_load_offset. */
 static DEFINE_MUTEX(module_kaslr_mutex);
 
-static int __init parse_nokaslr(char *p)
-{
-	randomize_modules = 0;
-	return 0;
-}
-early_param("nokaslr", parse_nokaslr);
-
 static unsigned long int get_module_load_offset(void)
 {
-	if (randomize_modules) {
+	if (kaslr_enabled) {
 		mutex_lock(&module_kaslr_mutex);
 		/*
 		 * Calculate the module_load_offset the first time this
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 0a2421cca01f..98dc9317286e 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -122,6 +122,8 @@
 unsigned long max_low_pfn_mapped;
 unsigned long max_pfn_mapped;
 
+bool __read_mostly kaslr_enabled = false;
+
 #ifdef CONFIG_DMI
 RESERVE_BRK(dmi_alloc, 65536);
 #endif
@@ -425,6 +427,11 @@ static void __init reserve_initrd(void)
 }
 #endif /* CONFIG_BLK_DEV_INITRD */
 
+static void __init parse_kaslr_setup(u64 pa_data, u32 data_len)
+{
+	kaslr_enabled = (bool)(pa_data + sizeof(struct setup_data));
+}
+
 static void __init parse_setup_data(void)
 {
 	struct setup_data *data;
@@ -450,6 +457,9 @@ static void __init parse_setup_data(void)
 		case SETUP_EFI:
 			parse_efi_setup(pa_data, data_len);
 			break;
+		case SETUP_KASLR:
+			parse_kaslr_setup(pa_data, data_len);
+			break;
 		default:
 			break;
 		}
@@ -832,10 +842,14 @@ static void __init trim_low_memory_range(void)
 static int
 dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p)
 {
-	pr_emerg("Kernel Offset: 0x%lx from 0x%lx "
-		 "(relocation range: 0x%lx-0x%lx)\n",
-		 (unsigned long)&_text - __START_KERNEL, __START_KERNEL,
-		 __START_KERNEL_map, MODULES_VADDR-1);
+	if (kaslr_enabled)
+		pr_emerg("Kernel Offset: 0x%lx from 0x%lx (relocation range: 0x%lx-0x%lx)\n",
+			 (unsigned long)&_text - __START_KERNEL,
+			 __START_KERNEL,
+			 __START_KERNEL_map,
+			 MODULES_VADDR-1);
+	else
+		pr_emerg("Kernel Offset: disabled\n");
 
 	return 0;
 }
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 8b96a947021f..81f8adb0679e 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -66,27 +66,54 @@
  * Good-instruction tables for 32-bit apps.  This is non-const and volatile
  * to keep gcc from statically optimizing it out, as variable_test_bit makes
  * some versions of gcc to think only *(unsigned long*) is used.
+ *
+ * Opcodes we'll probably never support:
+ * 6c-6f - ins,outs. SEGVs if used in userspace
+ * e4-e7 - in,out imm. SEGVs if used in userspace
+ * ec-ef - in,out acc. SEGVs if used in userspace
+ * cc - int3. SIGTRAP if used in userspace
+ * ce - into. Not used in userspace - no kernel support to make it useful. SEGVs
+ *	(why we support bound (62) then? it's similar, and similarly unused...)
+ * f1 - int1. SIGTRAP if used in userspace
+ * f4 - hlt. SEGVs if used in userspace
+ * fa - cli. SEGVs if used in userspace
+ * fb - sti. SEGVs if used in userspace
+ *
+ * Opcodes which need some work to be supported:
+ * 07,17,1f - pop es/ss/ds
+ *	Normally not used in userspace, but would execute if used.
+ *	Can cause GP or stack exception if tries to load wrong segment descriptor.
+ *	We hesitate to run them under single step since kernel's handling
+ *	of userspace single-stepping (TF flag) is fragile.
+ *	We can easily refuse to support push es/cs/ss/ds (06/0e/16/1e)
+ *	on the same grounds that they are never used.
+ * cd - int N.
+ *	Used by userspace for "int 80" syscall entry. (Other "int N"
+ *	cause GP -> SEGV since their IDT gates don't allow calls from CPL 3).
+ *	Not supported since kernel's handling of userspace single-stepping
+ *	(TF flag) is fragile.
+ * cf - iret. Normally not used in userspace. Doesn't SEGV unless arguments are bad
  */
 #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
 static volatile u32 good_insns_32[256 / 32] = {
 	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
 	/*      ----------------------------------------------         */
-	W(0x00, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) | /* 00 */
+	W(0x00, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 00 */
 	W(0x10, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) , /* 10 */
-	W(0x20, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1) | /* 20 */
-	W(0x30, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1) , /* 30 */
+	W(0x20, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */
+	W(0x30, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 30 */
 	W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
 	W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
-	W(0x60, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */
+	W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */
 	W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */
 	W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
 	W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
 	W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */
 	W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
 	W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */
-	W(0xd0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
+	W(0xd0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
 	W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */
-	W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1)   /* f0 */
+	W(0xf0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1)   /* f0 */
 	/*      ----------------------------------------------         */
 	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
 };
@@ -94,27 +121,61 @@ static volatile u32 good_insns_32[256 / 32] = {
 #define good_insns_32	NULL
 #endif
 
-/* Good-instruction tables for 64-bit apps */
+/* Good-instruction tables for 64-bit apps.
+ *
+ * Genuinely invalid opcodes:
+ * 06,07 - formerly push/pop es
+ * 0e - formerly push cs
+ * 16,17 - formerly push/pop ss
+ * 1e,1f - formerly push/pop ds
+ * 27,2f,37,3f - formerly daa/das/aaa/aas
+ * 60,61 - formerly pusha/popa
+ * 62 - formerly bound. EVEX prefix for AVX512 (not yet supported)
+ * 82 - formerly redundant encoding of Group1
+ * 9a - formerly call seg:ofs
+ * ce - formerly into
+ * d4,d5 - formerly aam/aad
+ * d6 - formerly undocumented salc
+ * ea - formerly jmp seg:ofs
+ *
+ * Opcodes we'll probably never support:
+ * 6c-6f - ins,outs. SEGVs if used in userspace
+ * e4-e7 - in,out imm. SEGVs if used in userspace
+ * ec-ef - in,out acc. SEGVs if used in userspace
+ * cc - int3. SIGTRAP if used in userspace
+ * f1 - int1. SIGTRAP if used in userspace
+ * f4 - hlt. SEGVs if used in userspace
+ * fa - cli. SEGVs if used in userspace
+ * fb - sti. SEGVs if used in userspace
+ *
+ * Opcodes which need some work to be supported:
+ * cd - int N.
+ *	Used by userspace for "int 80" syscall entry. (Other "int N"
+ *	cause GP -> SEGV since their IDT gates don't allow calls from CPL 3).
+ *	Not supported since kernel's handling of userspace single-stepping
+ *	(TF flag) is fragile.
+ * cf - iret. Normally not used in userspace. Doesn't SEGV unless arguments are bad
+ */
 #if defined(CONFIG_X86_64)
 static volatile u32 good_insns_64[256 / 32] = {
 	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
 	/*      ----------------------------------------------         */
-	W(0x00, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 00 */
+	W(0x00, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* 00 */
 	W(0x10, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 10 */
-	W(0x20, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 20 */
-	W(0x30, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 30 */
-	W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */
+	W(0x20, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) | /* 20 */
+	W(0x30, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) , /* 30 */
+	W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
 	W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
-	W(0x60, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */
+	W(0x60, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */
 	W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */
 	W(0x80, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
-	W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
+	W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1) , /* 90 */
 	W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */
 	W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
-	W(0xc0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */
+	W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */
 	W(0xd0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
-	W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */
-	W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1)   /* f0 */
+	W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0) | /* e0 */
+	W(0xf0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1)   /* f0 */
 	/*      ----------------------------------------------         */
 	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
 };
@@ -122,49 +183,55 @@ static volatile u32 good_insns_64[256 / 32] = {
 #define good_insns_64	NULL
 #endif
 
-/* Using this for both 64-bit and 32-bit apps */
+/* Using this for both 64-bit and 32-bit apps.
+ * Opcodes we don't support:
+ * 0f 00 - SLDT/STR/LLDT/LTR/VERR/VERW/-/- group. System insns
+ * 0f 01 - SGDT/SIDT/LGDT/LIDT/SMSW/-/LMSW/INVLPG group.
+ *	Also encodes tons of other system insns if mod=11.
+ *	Some are in fact non-system: xend, xtest, rdtscp, maybe more
+ * 0f 05 - syscall
+ * 0f 06 - clts (CPL0 insn)
+ * 0f 07 - sysret
+ * 0f 08 - invd (CPL0 insn)
+ * 0f 09 - wbinvd (CPL0 insn)
+ * 0f 0b - ud2
+ * 0f 30 - wrmsr (CPL0 insn) (then why rdmsr is allowed, it's also CPL0 insn?)
+ * 0f 34 - sysenter
+ * 0f 35 - sysexit
+ * 0f 37 - getsec
+ * 0f 78 - vmread (Intel VMX. CPL0 insn)
+ * 0f 79 - vmwrite (Intel VMX. CPL0 insn)
+ *	Note: with prefixes, these two opcodes are
+ *	extrq/insertq/AVX512 convert vector ops.
+ * 0f ae - group15: [f]xsave,[f]xrstor,[v]{ld,st}mxcsr,clflush[opt],
+ *	{rd,wr}{fs,gs}base,{s,l,m}fence.
+ *	Why? They are all user-executable.
+ */
 static volatile u32 good_2byte_insns[256 / 32] = {
 	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
 	/*      ----------------------------------------------         */
-	W(0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) | /* 00 */
-	W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* 10 */
-	W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */
-	W(0x30, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */
+	W(0x00, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1) | /* 00 */
+	W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 10 */
+	W(0x20, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */
+	W(0x30, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1) , /* 30 */
 	W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
 	W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
 	W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 60 */
-	W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */
+	W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1) , /* 70 */
 	W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
 	W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
-	W(0xa0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */
-	W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
+	W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */
+	W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
 	W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */
-	W(0xd0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
+	W(0xd0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
 	W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* e0 */
-	W(0xf0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0)   /* f0 */
+	W(0xf0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)   /* f0 */
 	/*      ----------------------------------------------         */
 	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
 };
 #undef W
 
 /*
- * opcodes we'll probably never support:
- *
- *  6c-6d, e4-e5, ec-ed - in
- *  6e-6f, e6-e7, ee-ef - out
- *  cc, cd - int3, int
- *  cf - iret
- *  d6 - illegal instruction
- *  f1 - int1/icebp
- *  f4 - hlt
- *  fa, fb - cli, sti
- *  0f - lar, lsl, syscall, clts, sysret, sysenter, sysexit, invd, wbinvd, ud2
- *
- * invalid opcodes in 64-bit mode:
- *
- *  06, 0e, 16, 1e, 27, 2f, 37, 3f, 60-62, 82, c4-c5, d4-d5
- *  63 - we support this opcode in x86_64 but not in i386.
- *
  * opcodes we may need to refine support for:
  *
  *  0f - 2-byte instructions: For many of these instructions, the validity
diff --git a/arch/x86/lguest/Kconfig b/arch/x86/lguest/Kconfig
index 4a0890f815c4..08f41caada45 100644
--- a/arch/x86/lguest/Kconfig
+++ b/arch/x86/lguest/Kconfig
@@ -1,6 +1,6 @@
 config LGUEST_GUEST
 	bool "Lguest guest support"
-	depends on X86_32 && PARAVIRT
+	depends on X86_32 && PARAVIRT && PCI
 	select TTY
 	select VIRTUALIZATION
 	select VIRTIO
@@ -8,7 +8,7 @@ config LGUEST_GUEST
 	help
 	  Lguest is a tiny in-kernel hypervisor.  Selecting this will
 	  allow your kernel to boot under lguest.  This option will increase
-	  your kernel size by about 6k.  If in doubt, say N.
+	  your kernel size by about 10k.  If in doubt, say N.
 
 	  If you say Y here, make sure you say Y (or M) to the virtio block
 	  and net drivers which lguest needs.
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index c1c1544b8485..ac4453d8520e 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -56,6 +56,9 @@
 #include <linux/virtio_console.h>
 #include <linux/pm.h>
 #include <linux/export.h>
+#include <linux/pci.h>
+#include <linux/virtio_pci.h>
+#include <asm/acpi.h>
 #include <asm/apic.h>
 #include <asm/lguest.h>
 #include <asm/paravirt.h>
@@ -71,6 +74,8 @@
 #include <asm/stackprotector.h>
 #include <asm/reboot.h>		/* for struct machine_ops */
 #include <asm/kvm_para.h>
+#include <asm/pci_x86.h>
+#include <asm/pci-direct.h>
 
 /*G:010
  * Welcome to the Guest!
@@ -831,6 +836,24 @@ static struct irq_chip lguest_irq_controller = {
 	.irq_unmask	= enable_lguest_irq,
 };
 
+static int lguest_enable_irq(struct pci_dev *dev)
+{
+	u8 line = 0;
+
+	/* We literally use the PCI interrupt line as the irq number. */
+	pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &line);
+	irq_set_chip_and_handler_name(line, &lguest_irq_controller,
+				      handle_level_irq, "level");
+	dev->irq = line;
+	return 0;
+}
+
+/* We don't do hotplug PCI, so this shouldn't be called. */
+static void lguest_disable_irq(struct pci_dev *dev)
+{
+	WARN_ON(1);
+}
+
 /*
  * This sets up the Interrupt Descriptor Table (IDT) entry for each hardware
  * interrupt (except 128, which is used for system calls), and then tells the
@@ -1181,25 +1204,136 @@ static __init char *lguest_memory_setup(void)
 	return "LGUEST";
 }
 
+/* Offset within PCI config space of BAR access capability. */
+static int console_cfg_offset = 0;
+static int console_access_cap;
+
+/* Set up so that we access off in bar0 (on bus 0, device 1, function 0) */
+static void set_cfg_window(u32 cfg_offset, u32 off)
+{
+	write_pci_config_byte(0, 1, 0,
+			      cfg_offset + offsetof(struct virtio_pci_cap, bar),
+			      0);
+	write_pci_config(0, 1, 0,
+			 cfg_offset + offsetof(struct virtio_pci_cap, length),
+			 4);
+	write_pci_config(0, 1, 0,
+			 cfg_offset + offsetof(struct virtio_pci_cap, offset),
+			 off);
+}
+
+static void write_bar_via_cfg(u32 cfg_offset, u32 off, u32 val)
+{
+	/*
+	 * We could set this up once, then leave it; nothing else in the *
+	 * kernel should touch these registers.  But if it went wrong, that
+	 * would be a horrible bug to find.
+	 */
+	set_cfg_window(cfg_offset, off);
+	write_pci_config(0, 1, 0,
+			 cfg_offset + sizeof(struct virtio_pci_cap), val);
+}
+
+static void probe_pci_console(void)
+{
+	u8 cap, common_cap = 0, device_cap = 0;
+	/* Offset within BAR0 */
+	u32 device_offset;
+	u32 device_len;
+
+	/* Avoid recursive printk into here. */
+	console_cfg_offset = -1;
+
+	if (!early_pci_allowed()) {
+		printk(KERN_ERR "lguest: early PCI access not allowed!\n");
+		return;
+	}
+
+	/* We expect a console PCI device at BUS0, slot 1. */
+	if (read_pci_config(0, 1, 0, 0) != 0x10431AF4) {
+		printk(KERN_ERR "lguest: PCI device is %#x!\n",
+		       read_pci_config(0, 1, 0, 0));
+		return;
+	}
+
+	/* Find the capabilities we need (must be in bar0) */
+	cap = read_pci_config_byte(0, 1, 0, PCI_CAPABILITY_LIST);
+	while (cap) {
+		u8 vndr = read_pci_config_byte(0, 1, 0, cap);
+		if (vndr == PCI_CAP_ID_VNDR) {
+			u8 type, bar;
+			u32 offset, length;
+
+			type = read_pci_config_byte(0, 1, 0,
+			    cap + offsetof(struct virtio_pci_cap, cfg_type));
+			bar = read_pci_config_byte(0, 1, 0,
+			    cap + offsetof(struct virtio_pci_cap, bar));
+			offset = read_pci_config(0, 1, 0,
+			    cap + offsetof(struct virtio_pci_cap, offset));
+			length = read_pci_config(0, 1, 0,
+			    cap + offsetof(struct virtio_pci_cap, length));
+
+			switch (type) {
+			case VIRTIO_PCI_CAP_DEVICE_CFG:
+				if (bar == 0) {
+					device_cap = cap;
+					device_offset = offset;
+					device_len = length;
+				}
+				break;
+			case VIRTIO_PCI_CAP_PCI_CFG:
+				console_access_cap = cap;
+				break;
+			}
+		}
+		cap = read_pci_config_byte(0, 1, 0, cap + PCI_CAP_LIST_NEXT);
+	}
+	if (!device_cap || !console_access_cap) {
+		printk(KERN_ERR "lguest: No caps (%u/%u/%u) in console!\n",
+		       common_cap, device_cap, console_access_cap);
+		return;
+	}
+
+	/*
+	 * Note that we can't check features, until we've set the DRIVER
+	 * status bit.  We don't want to do that until we have a real driver,
+	 * so we just check that the device-specific config has room for
+	 * emerg_wr.  If it doesn't support VIRTIO_CONSOLE_F_EMERG_WRITE
+	 * it should ignore the access.
+	 */
+	if (device_len < (offsetof(struct virtio_console_config, emerg_wr)
+			  + sizeof(u32))) {
+		printk(KERN_ERR "lguest: console missing emerg_wr field\n");
+		return;
+	}
+
+	console_cfg_offset = device_offset;
+	printk(KERN_INFO "lguest: Console via virtio-pci emerg_wr\n");
+}
+
 /*
  * We will eventually use the virtio console device to produce console output,
- * but before that is set up we use LHCALL_NOTIFY on normal memory to produce
- * console output.
+ * but before that is set up we use the virtio PCI console's backdoor mmio
+ * access and the "emergency" write facility (which is legal even before the
+ * device is configured).
  */
 static __init int early_put_chars(u32 vtermno, const char *buf, int count)
 {
-	char scratch[17];
-	unsigned int len = count;
+	/* If we couldn't find PCI console, forget it. */
+	if (console_cfg_offset < 0)
+		return count;
 
-	/* We use a nul-terminated string, so we make a copy.  Icky, huh? */
-	if (len > sizeof(scratch) - 1)
-		len = sizeof(scratch) - 1;
-	scratch[len] = '\0';
-	memcpy(scratch, buf, len);
-	hcall(LHCALL_NOTIFY, __pa(scratch), 0, 0, 0);
+	if (unlikely(!console_cfg_offset)) {
+		probe_pci_console();
+		if (console_cfg_offset < 0)
+			return count;
+	}
 
-	/* This routine returns the number of bytes actually written. */
-	return len;
+	write_bar_via_cfg(console_access_cap,
+			  console_cfg_offset
+			  + offsetof(struct virtio_console_config, emerg_wr),
+			  buf[0]);
+	return 1;
 }
 
 /*
@@ -1400,14 +1534,6 @@ __init void lguest_init(void)
 	atomic_notifier_chain_register(&panic_notifier_list, &paniced);
 
 	/*
-	 * The IDE code spends about 3 seconds probing for disks: if we reserve
-	 * all the I/O ports up front it can't get them and so doesn't probe.
-	 * Other device drivers are similar (but less severe).  This cuts the
-	 * kernel boot time on my machine from 4.1 seconds to 0.45 seconds.
-	 */
-	paravirt_disable_iospace();
-
-	/*
 	 * This is messy CPU setup stuff which the native boot code does before
 	 * start_kernel, so we have to do, too:
 	 */
@@ -1436,6 +1562,13 @@ __init void lguest_init(void)
 	/* Register our very early console. */
 	virtio_cons_early_init(early_put_chars);
 
+	/* Don't let ACPI try to control our PCI interrupts. */
+	disable_acpi();
+
+	/* We control them ourselves, by overriding these two hooks. */
+	pcibios_enable_irq = lguest_enable_irq;
+	pcibios_disable_irq = lguest_disable_irq;
+
 	/*
 	 * Last of all, we set the power management poweroff hook to point to
 	 * the Guest routine to power off, and the reboot hook to our restart
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 553c094b9cd7..a110efca6d06 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -238,6 +238,31 @@ static void __init_refok adjust_range_page_size_mask(struct map_range *mr,
 	}
 }
 
+static const char *page_size_string(struct map_range *mr)
+{
+	static const char str_1g[] = "1G";
+	static const char str_2m[] = "2M";
+	static const char str_4m[] = "4M";
+	static const char str_4k[] = "4k";
+
+	if (mr->page_size_mask & (1<<PG_LEVEL_1G))
+		return str_1g;
+	/*
+	 * 32-bit without PAE has a 4M large page size.
+	 * PG_LEVEL_2M is misnamed, but we can at least
+	 * print out the right size in the string.
+	 */
+	if (IS_ENABLED(CONFIG_X86_32) &&
+	    !IS_ENABLED(CONFIG_X86_PAE) &&
+	    mr->page_size_mask & (1<<PG_LEVEL_2M))
+		return str_4m;
+
+	if (mr->page_size_mask & (1<<PG_LEVEL_2M))
+		return str_2m;
+
+	return str_4k;
+}
+
 static int __meminit split_mem_range(struct map_range *mr, int nr_range,
 				     unsigned long start,
 				     unsigned long end)
@@ -333,8 +358,7 @@ static int __meminit split_mem_range(struct map_range *mr, int nr_range,
 	for (i = 0; i < nr_range; i++)
 		printk(KERN_DEBUG " [mem %#010lx-%#010lx] page %s\n",
 				mr[i].start, mr[i].end - 1,
-			(mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":(
-			 (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k"));
+				page_size_string(&mr[i]));
 
 	return nr_range;
 }
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 919b91205cd4..df4552bd239e 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -35,12 +35,12 @@ struct va_alignment __read_mostly va_align = {
 	.flags = -1,
 };
 
-static unsigned int stack_maxrandom_size(void)
+static unsigned long stack_maxrandom_size(void)
 {
-	unsigned int max = 0;
+	unsigned long max = 0;
 	if ((current->flags & PF_RANDOMIZE) &&
 		!(current->personality & ADDR_NO_RANDOMIZE)) {
-		max = ((-1U) & STACK_RND_MASK) << PAGE_SHIFT;
+		max = ((-1UL) & STACK_RND_MASK) << PAGE_SHIFT;
 	}
 
 	return max;
diff --git a/arch/x86/platform/Makefile b/arch/x86/platform/Makefile
index 85afde1fa3e5..a62e0be3a2f1 100644
--- a/arch/x86/platform/Makefile
+++ b/arch/x86/platform/Makefile
@@ -5,6 +5,7 @@ obj-y	+= geode/
 obj-y	+= goldfish/
 obj-y	+= iris/
 obj-y	+= intel-mid/
+obj-y	+= intel-quark/
 obj-y	+= olpc/
 obj-y	+= scx200/
 obj-y	+= sfi/
diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S
index 5fcda7272550..86d0f9e08dd9 100644
--- a/arch/x86/platform/efi/efi_stub_64.S
+++ b/arch/x86/platform/efi/efi_stub_64.S
@@ -91,167 +91,6 @@ ENTRY(efi_call)
 	ret
 ENDPROC(efi_call)
 
-#ifdef CONFIG_EFI_MIXED
-
-/*
- * We run this function from the 1:1 mapping.
- *
- * This function must be invoked with a 1:1 mapped stack.
- */
-ENTRY(__efi64_thunk)
-	movl	%ds, %eax
-	push	%rax
-	movl	%es, %eax
-	push	%rax
-	movl	%ss, %eax
-	push	%rax
-
-	subq	$32, %rsp
-	movl	%esi, 0x0(%rsp)
-	movl	%edx, 0x4(%rsp)
-	movl	%ecx, 0x8(%rsp)
-	movq	%r8, %rsi
-	movl	%esi, 0xc(%rsp)
-	movq	%r9, %rsi
-	movl	%esi,  0x10(%rsp)
-
-	sgdt	save_gdt(%rip)
-
-	leaq	1f(%rip), %rbx
-	movq	%rbx, func_rt_ptr(%rip)
-
-	/* Switch to gdt with 32-bit segments */
-	movl	64(%rsp), %eax
-	lgdt	(%rax)
-
-	leaq	efi_enter32(%rip), %rax
-	pushq	$__KERNEL_CS
-	pushq	%rax
-	lretq
-
-1:	addq	$32, %rsp
-
-	lgdt	save_gdt(%rip)
-
-	pop	%rbx
-	movl	%ebx, %ss
-	pop	%rbx
-	movl	%ebx, %es
-	pop	%rbx
-	movl	%ebx, %ds
-
-	/*
-	 * Convert 32-bit status code into 64-bit.
-	 */
-	test	%rax, %rax
-	jz	1f
-	movl	%eax, %ecx
-	andl	$0x0fffffff, %ecx
-	andl	$0xf0000000, %eax
-	shl	$32, %rax
-	or	%rcx, %rax
-1:
-	ret
-ENDPROC(__efi64_thunk)
-
-ENTRY(efi_exit32)
-	movq	func_rt_ptr(%rip), %rax
-	push	%rax
-	mov	%rdi, %rax
-	ret
-ENDPROC(efi_exit32)
-
-	.code32
-/*
- * EFI service pointer must be in %edi.
- *
- * The stack should represent the 32-bit calling convention.
- */
-ENTRY(efi_enter32)
-	movl	$__KERNEL_DS, %eax
-	movl	%eax, %ds
-	movl	%eax, %es
-	movl	%eax, %ss
-
-	/* Reload pgtables */
-	movl	%cr3, %eax
-	movl	%eax, %cr3
-
-	/* Disable paging */
-	movl	%cr0, %eax
-	btrl	$X86_CR0_PG_BIT, %eax
-	movl	%eax, %cr0
-
-	/* Disable long mode via EFER */
-	movl	$MSR_EFER, %ecx
-	rdmsr
-	btrl	$_EFER_LME, %eax
-	wrmsr
-
-	call	*%edi
-
-	/* We must preserve return value */
-	movl	%eax, %edi
-
-	/*
-	 * Some firmware will return with interrupts enabled. Be sure to
-	 * disable them before we switch GDTs.
-	 */
-	cli
-
-	movl	68(%esp), %eax
-	movl	%eax, 2(%eax)
-	lgdtl	(%eax)
-
-	movl	%cr4, %eax
-	btsl	$(X86_CR4_PAE_BIT), %eax
-	movl	%eax, %cr4
-
-	movl	%cr3, %eax
-	movl	%eax, %cr3
-
-	movl	$MSR_EFER, %ecx
-	rdmsr
-	btsl	$_EFER_LME, %eax
-	wrmsr
-
-	xorl	%eax, %eax
-	lldt	%ax
-
-	movl	72(%esp), %eax
-	pushl	$__KERNEL_CS
-	pushl	%eax
-
-	/* Enable paging */
-	movl	%cr0, %eax
-	btsl	$X86_CR0_PG_BIT, %eax
-	movl	%eax, %cr0
-	lret
-ENDPROC(efi_enter32)
-
-	.data
-	.balign	8
-	.global	efi32_boot_gdt
-efi32_boot_gdt:	.word	0
-		.quad	0
-
-save_gdt:	.word	0
-		.quad	0
-func_rt_ptr:	.quad	0
-
-	.global efi_gdt64
-efi_gdt64:
-	.word	efi_gdt64_end - efi_gdt64
-	.long	0			/* Filled out by user */
-	.word	0
-	.quad	0x0000000000000000	/* NULL descriptor */
-	.quad	0x00af9a000000ffff	/* __KERNEL_CS */
-	.quad	0x00cf92000000ffff	/* __KERNEL_DS */
-	.quad	0x0080890000000000	/* TS descriptor */
-	.quad   0x0000000000000000	/* TS continued */
-efi_gdt64_end:
-#endif /* CONFIG_EFI_MIXED */
-
 	.data
 ENTRY(efi_scratch)
 	.fill 3,8,0
diff --git a/arch/x86/platform/efi/efi_thunk_64.S b/arch/x86/platform/efi/efi_thunk_64.S
index 8806fa73e6e6..ff85d28c50f2 100644
--- a/arch/x86/platform/efi/efi_thunk_64.S
+++ b/arch/x86/platform/efi/efi_thunk_64.S
@@ -1,9 +1,26 @@
 /*
  * Copyright (C) 2014 Intel Corporation; author Matt Fleming
+ *
+ * Support for invoking 32-bit EFI runtime services from a 64-bit
+ * kernel.
+ *
+ * The below thunking functions are only used after ExitBootServices()
+ * has been called. This simplifies things considerably as compared with
+ * the early EFI thunking because we can leave all the kernel state
+ * intact (GDT, IDT, etc) and simply invoke the the 32-bit EFI runtime
+ * services from __KERNEL32_CS. This means we can continue to service
+ * interrupts across an EFI mixed mode call.
+ *
+ * We do however, need to handle the fact that we're running in a full
+ * 64-bit virtual address space. Things like the stack and instruction
+ * addresses need to be accessible by the 32-bit firmware, so we rely on
+ * using the identity mappings in the EFI page table to access the stack
+ * and kernel text (see efi_setup_page_tables()).
  */
 
 #include <linux/linkage.h>
 #include <asm/page_types.h>
+#include <asm/segment.h>
 
 	.text
 	.code64
@@ -33,14 +50,6 @@ ENTRY(efi64_thunk)
 	leaq	efi_exit32(%rip), %rbx
 	subq	%rax, %rbx
 	movl	%ebx, 8(%rsp)
-	leaq	efi_gdt64(%rip), %rbx
-	subq	%rax, %rbx
-	movl	%ebx, 2(%ebx)
-	movl	%ebx, 4(%rsp)
-	leaq	efi_gdt32(%rip), %rbx
-	subq	%rax, %rbx
-	movl	%ebx, 2(%ebx)
-	movl	%ebx, (%rsp)
 
 	leaq	__efi64_thunk(%rip), %rbx
 	subq	%rax, %rbx
@@ -52,14 +61,92 @@ ENTRY(efi64_thunk)
 	retq
 ENDPROC(efi64_thunk)
 
-	.data
-efi_gdt32:
-	.word 	efi_gdt32_end - efi_gdt32
-	.long	0			/* Filled out above */
-	.word	0
-	.quad	0x0000000000000000	/* NULL descriptor */
-	.quad	0x00cf9a000000ffff	/* __KERNEL_CS */
-	.quad	0x00cf93000000ffff	/* __KERNEL_DS */
-efi_gdt32_end:
+/*
+ * We run this function from the 1:1 mapping.
+ *
+ * This function must be invoked with a 1:1 mapped stack.
+ */
+ENTRY(__efi64_thunk)
+	movl	%ds, %eax
+	push	%rax
+	movl	%es, %eax
+	push	%rax
+	movl	%ss, %eax
+	push	%rax
+
+	subq	$32, %rsp
+	movl	%esi, 0x0(%rsp)
+	movl	%edx, 0x4(%rsp)
+	movl	%ecx, 0x8(%rsp)
+	movq	%r8, %rsi
+	movl	%esi, 0xc(%rsp)
+	movq	%r9, %rsi
+	movl	%esi,  0x10(%rsp)
+
+	leaq	1f(%rip), %rbx
+	movq	%rbx, func_rt_ptr(%rip)
+
+	/* Switch to 32-bit descriptor */
+	pushq	$__KERNEL32_CS
+	leaq	efi_enter32(%rip), %rax
+	pushq	%rax
+	lretq
+
+1:	addq	$32, %rsp
+
+	pop	%rbx
+	movl	%ebx, %ss
+	pop	%rbx
+	movl	%ebx, %es
+	pop	%rbx
+	movl	%ebx, %ds
 
+	/*
+	 * Convert 32-bit status code into 64-bit.
+	 */
+	test	%rax, %rax
+	jz	1f
+	movl	%eax, %ecx
+	andl	$0x0fffffff, %ecx
+	andl	$0xf0000000, %eax
+	shl	$32, %rax
+	or	%rcx, %rax
+1:
+	ret
+ENDPROC(__efi64_thunk)
+
+ENTRY(efi_exit32)
+	movq	func_rt_ptr(%rip), %rax
+	push	%rax
+	mov	%rdi, %rax
+	ret
+ENDPROC(efi_exit32)
+
+	.code32
+/*
+ * EFI service pointer must be in %edi.
+ *
+ * The stack should represent the 32-bit calling convention.
+ */
+ENTRY(efi_enter32)
+	movl	$__KERNEL_DS, %eax
+	movl	%eax, %ds
+	movl	%eax, %es
+	movl	%eax, %ss
+
+	call	*%edi
+
+	/* We must preserve return value */
+	movl	%eax, %edi
+
+	movl	72(%esp), %eax
+	pushl	$__KERNEL_CS
+	pushl	%eax
+
+	lret
+ENDPROC(efi_enter32)
+
+	.data
+	.balign	8
+func_rt_ptr:		.quad 0
 efi_saved_sp:		.quad 0
diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c
index 1bbedc4b0f88..3005f0c89f2e 100644
--- a/arch/x86/platform/intel-mid/intel-mid.c
+++ b/arch/x86/platform/intel-mid/intel-mid.c
@@ -130,7 +130,7 @@ static void intel_mid_arch_setup(void)
 		intel_mid_ops = get_intel_mid_ops[__intel_mid_cpu_chip]();
 	else {
 		intel_mid_ops = get_intel_mid_ops[INTEL_MID_CPU_CHIP_PENWELL]();
-		pr_info("ARCH: Uknown SoC, assuming PENWELL!\n");
+		pr_info("ARCH: Unknown SoC, assuming PENWELL!\n");
 	}
 
 out:
diff --git a/arch/x86/platform/intel-quark/Makefile b/arch/x86/platform/intel-quark/Makefile
new file mode 100644
index 000000000000..9cc57ed36022
--- /dev/null
+++ b/arch/x86/platform/intel-quark/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_INTEL_IMR) += imr.o
+obj-$(CONFIG_DEBUG_IMR_SELFTEST) += imr_selftest.o
diff --git a/arch/x86/platform/intel-quark/imr.c b/arch/x86/platform/intel-quark/imr.c
new file mode 100644
index 000000000000..0ee619f9fcb7
--- /dev/null
+++ b/arch/x86/platform/intel-quark/imr.c
@@ -0,0 +1,661 @@
+/**
+ * imr.c
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ * Copyright(c) 2015 Bryan O'Donoghue <pure.logic@nexus-software.ie>
+ *
+ * IMR registers define an isolated region of memory that can
+ * be masked to prohibit certain system agents from accessing memory.
+ * When a device behind a masked port performs an access - snooped or
+ * not, an IMR may optionally prevent that transaction from changing
+ * the state of memory or from getting correct data in response to the
+ * operation.
+ *
+ * Write data will be dropped and reads will return 0xFFFFFFFF, the
+ * system will reset and system BIOS will print out an error message to
+ * inform the user that an IMR has been violated.
+ *
+ * This code is based on the Linux MTRR code and reference code from
+ * Intel's Quark BSP EFI, Linux and grub code.
+ *
+ * See quark-x1000-datasheet.pdf for register definitions.
+ * http://www.intel.com/content/dam/www/public/us/en/documents/datasheets/quark-x1000-datasheet.pdf
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <asm-generic/sections.h>
+#include <asm/cpu_device_id.h>
+#include <asm/imr.h>
+#include <asm/iosf_mbi.h>
+#include <linux/debugfs.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/types.h>
+
+struct imr_device {
+	struct dentry	*file;
+	bool		init;
+	struct mutex	lock;
+	int		max_imr;
+	int		reg_base;
+};
+
+static struct imr_device imr_dev;
+
+/*
+ * IMR read/write mask control registers.
+ * See quark-x1000-datasheet.pdf sections 12.7.4.5 and 12.7.4.6 for
+ * bit definitions.
+ *
+ * addr_hi
+ * 31		Lock bit
+ * 30:24	Reserved
+ * 23:2		1 KiB aligned lo address
+ * 1:0		Reserved
+ *
+ * addr_hi
+ * 31:24	Reserved
+ * 23:2		1 KiB aligned hi address
+ * 1:0		Reserved
+ */
+#define IMR_LOCK	BIT(31)
+
+struct imr_regs {
+	u32 addr_lo;
+	u32 addr_hi;
+	u32 rmask;
+	u32 wmask;
+};
+
+#define IMR_NUM_REGS	(sizeof(struct imr_regs)/sizeof(u32))
+#define IMR_SHIFT	8
+#define imr_to_phys(x)	((x) << IMR_SHIFT)
+#define phys_to_imr(x)	((x) >> IMR_SHIFT)
+
+/**
+ * imr_is_enabled - true if an IMR is enabled false otherwise.
+ *
+ * Determines if an IMR is enabled based on address range and read/write
+ * mask. An IMR set with an address range set to zero and a read/write
+ * access mask set to all is considered to be disabled. An IMR in any
+ * other state - for example set to zero but without read/write access
+ * all is considered to be enabled. This definition of disabled is how
+ * firmware switches off an IMR and is maintained in kernel for
+ * consistency.
+ *
+ * @imr:	pointer to IMR descriptor.
+ * @return:	true if IMR enabled false if disabled.
+ */
+static inline int imr_is_enabled(struct imr_regs *imr)
+{
+	return !(imr->rmask == IMR_READ_ACCESS_ALL &&
+		 imr->wmask == IMR_WRITE_ACCESS_ALL &&
+		 imr_to_phys(imr->addr_lo) == 0 &&
+		 imr_to_phys(imr->addr_hi) == 0);
+}
+
+/**
+ * imr_read - read an IMR at a given index.
+ *
+ * Requires caller to hold imr mutex.
+ *
+ * @idev:	pointer to imr_device structure.
+ * @imr_id:	IMR entry to read.
+ * @imr:	IMR structure representing address and access masks.
+ * @return:	0 on success or error code passed from mbi_iosf on failure.
+ */
+static int imr_read(struct imr_device *idev, u32 imr_id, struct imr_regs *imr)
+{
+	u32 reg = imr_id * IMR_NUM_REGS + idev->reg_base;
+	int ret;
+
+	ret = iosf_mbi_read(QRK_MBI_UNIT_MM, QRK_MBI_MM_READ,
+				reg++, &imr->addr_lo);
+	if (ret)
+		return ret;
+
+	ret = iosf_mbi_read(QRK_MBI_UNIT_MM, QRK_MBI_MM_READ,
+				reg++, &imr->addr_hi);
+	if (ret)
+		return ret;
+
+	ret = iosf_mbi_read(QRK_MBI_UNIT_MM, QRK_MBI_MM_READ,
+				reg++, &imr->rmask);
+	if (ret)
+		return ret;
+
+	return iosf_mbi_read(QRK_MBI_UNIT_MM, QRK_MBI_MM_READ,
+				reg++, &imr->wmask);
+}
+
+/**
+ * imr_write - write an IMR at a given index.
+ *
+ * Requires caller to hold imr mutex.
+ * Note lock bits need to be written independently of address bits.
+ *
+ * @idev:	pointer to imr_device structure.
+ * @imr_id:	IMR entry to write.
+ * @imr:	IMR structure representing address and access masks.
+ * @lock:	indicates if the IMR lock bit should be applied.
+ * @return:	0 on success or error code passed from mbi_iosf on failure.
+ */
+static int imr_write(struct imr_device *idev, u32 imr_id,
+		     struct imr_regs *imr, bool lock)
+{
+	unsigned long flags;
+	u32 reg = imr_id * IMR_NUM_REGS + idev->reg_base;
+	int ret;
+
+	local_irq_save(flags);
+
+	ret = iosf_mbi_write(QRK_MBI_UNIT_MM, QRK_MBI_MM_WRITE, reg++,
+				imr->addr_lo);
+	if (ret)
+		goto failed;
+
+	ret = iosf_mbi_write(QRK_MBI_UNIT_MM, QRK_MBI_MM_WRITE,
+				reg++, imr->addr_hi);
+	if (ret)
+		goto failed;
+
+	ret = iosf_mbi_write(QRK_MBI_UNIT_MM, QRK_MBI_MM_WRITE,
+				reg++, imr->rmask);
+	if (ret)
+		goto failed;
+
+	ret = iosf_mbi_write(QRK_MBI_UNIT_MM, QRK_MBI_MM_WRITE,
+				reg++, imr->wmask);
+	if (ret)
+		goto failed;
+
+	/* Lock bit must be set separately to addr_lo address bits. */
+	if (lock) {
+		imr->addr_lo |= IMR_LOCK;
+		ret = iosf_mbi_write(QRK_MBI_UNIT_MM, QRK_MBI_MM_WRITE,
+					reg - IMR_NUM_REGS, imr->addr_lo);
+		if (ret)
+			goto failed;
+	}
+
+	local_irq_restore(flags);
+	return 0;
+failed:
+	/*
+	 * If writing to the IOSF failed then we're in an unknown state,
+	 * likely a very bad state. An IMR in an invalid state will almost
+	 * certainly lead to a memory access violation.
+	 */
+	local_irq_restore(flags);
+	WARN(ret, "IOSF-MBI write fail range 0x%08x-0x%08x unreliable\n",
+	     imr_to_phys(imr->addr_lo), imr_to_phys(imr->addr_hi) + IMR_MASK);
+
+	return ret;
+}
+
+/**
+ * imr_dbgfs_state_show - print state of IMR registers.
+ *
+ * @s:		pointer to seq_file for output.
+ * @unused:	unused parameter.
+ * @return:	0 on success or error code passed from mbi_iosf on failure.
+ */
+static int imr_dbgfs_state_show(struct seq_file *s, void *unused)
+{
+	phys_addr_t base;
+	phys_addr_t end;
+	int i;
+	struct imr_device *idev = s->private;
+	struct imr_regs imr;
+	size_t size;
+	int ret = -ENODEV;
+
+	mutex_lock(&idev->lock);
+
+	for (i = 0; i < idev->max_imr; i++) {
+
+		ret = imr_read(idev, i, &imr);
+		if (ret)
+			break;
+
+		/*
+		 * Remember to add IMR_ALIGN bytes to size to indicate the
+		 * inherent IMR_ALIGN size bytes contained in the masked away
+		 * lower ten bits.
+		 */
+		if (imr_is_enabled(&imr)) {
+			base = imr_to_phys(imr.addr_lo);
+			end = imr_to_phys(imr.addr_hi) + IMR_MASK;
+		} else {
+			base = 0;
+			end = 0;
+		}
+		size = end - base;
+		seq_printf(s, "imr%02i: base=%pa, end=%pa, size=0x%08zx "
+			   "rmask=0x%08x, wmask=0x%08x, %s, %s\n", i,
+			   &base, &end, size, imr.rmask, imr.wmask,
+			   imr_is_enabled(&imr) ? "enabled " : "disabled",
+			   imr.addr_lo & IMR_LOCK ? "locked" : "unlocked");
+	}
+
+	mutex_unlock(&idev->lock);
+	return ret;
+}
+
+/**
+ * imr_state_open - debugfs open callback.
+ *
+ * @inode:	pointer to struct inode.
+ * @file:	pointer to struct file.
+ * @return:	result of single open.
+ */
+static int imr_state_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, imr_dbgfs_state_show, inode->i_private);
+}
+
+static const struct file_operations imr_state_ops = {
+	.open		= imr_state_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+/**
+ * imr_debugfs_register - register debugfs hooks.
+ *
+ * @idev:	pointer to imr_device structure.
+ * @return:	0 on success - errno on failure.
+ */
+static int imr_debugfs_register(struct imr_device *idev)
+{
+	idev->file = debugfs_create_file("imr_state", S_IFREG | S_IRUGO, NULL,
+					 idev, &imr_state_ops);
+	return PTR_ERR_OR_ZERO(idev->file);
+}
+
+/**
+ * imr_debugfs_unregister - unregister debugfs hooks.
+ *
+ * @idev:	pointer to imr_device structure.
+ * @return:
+ */
+static void imr_debugfs_unregister(struct imr_device *idev)
+{
+	debugfs_remove(idev->file);
+}
+
+/**
+ * imr_check_params - check passed address range IMR alignment and non-zero size
+ *
+ * @base:	base address of intended IMR.
+ * @size:	size of intended IMR.
+ * @return:	zero on valid range -EINVAL on unaligned base/size.
+ */
+static int imr_check_params(phys_addr_t base, size_t size)
+{
+	if ((base & IMR_MASK) || (size & IMR_MASK)) {
+		pr_err("base %pa size 0x%08zx must align to 1KiB\n",
+			&base, size);
+		return -EINVAL;
+	}
+	if (size == 0)
+		return -EINVAL;
+
+	return 0;
+}
+
+/**
+ * imr_raw_size - account for the IMR_ALIGN bytes that addr_hi appends.
+ *
+ * IMR addr_hi has a built in offset of plus IMR_ALIGN (0x400) bytes from the
+ * value in the register. We need to subtract IMR_ALIGN bytes from input sizes
+ * as a result.
+ *
+ * @size:	input size bytes.
+ * @return:	reduced size.
+ */
+static inline size_t imr_raw_size(size_t size)
+{
+	return size - IMR_ALIGN;
+}
+
+/**
+ * imr_address_overlap - detects an address overlap.
+ *
+ * @addr:	address to check against an existing IMR.
+ * @imr:	imr being checked.
+ * @return:	true for overlap false for no overlap.
+ */
+static inline int imr_address_overlap(phys_addr_t addr, struct imr_regs *imr)
+{
+	return addr >= imr_to_phys(imr->addr_lo) && addr <= imr_to_phys(imr->addr_hi);
+}
+
+/**
+ * imr_add_range - add an Isolated Memory Region.
+ *
+ * @base:	physical base address of region aligned to 1KiB.
+ * @size:	physical size of region in bytes must be aligned to 1KiB.
+ * @read_mask:	read access mask.
+ * @write_mask:	write access mask.
+ * @lock:	indicates whether or not to permanently lock this region.
+ * @return:	zero on success or negative value indicating error.
+ */
+int imr_add_range(phys_addr_t base, size_t size,
+		  unsigned int rmask, unsigned int wmask, bool lock)
+{
+	phys_addr_t end;
+	unsigned int i;
+	struct imr_device *idev = &imr_dev;
+	struct imr_regs imr;
+	size_t raw_size;
+	int reg;
+	int ret;
+
+	if (WARN_ONCE(idev->init == false, "driver not initialized"))
+		return -ENODEV;
+
+	ret = imr_check_params(base, size);
+	if (ret)
+		return ret;
+
+	/* Tweak the size value. */
+	raw_size = imr_raw_size(size);
+	end = base + raw_size;
+
+	/*
+	 * Check for reserved IMR value common to firmware, kernel and grub
+	 * indicating a disabled IMR.
+	 */
+	imr.addr_lo = phys_to_imr(base);
+	imr.addr_hi = phys_to_imr(end);
+	imr.rmask = rmask;
+	imr.wmask = wmask;
+	if (!imr_is_enabled(&imr))
+		return -ENOTSUPP;
+
+	mutex_lock(&idev->lock);
+
+	/*
+	 * Find a free IMR while checking for an existing overlapping range.
+	 * Note there's no restriction in silicon to prevent IMR overlaps.
+	 * For the sake of simplicity and ease in defining/debugging an IMR
+	 * memory map we exclude IMR overlaps.
+	 */
+	reg = -1;
+	for (i = 0; i < idev->max_imr; i++) {
+		ret = imr_read(idev, i, &imr);
+		if (ret)
+			goto failed;
+
+		/* Find overlap @ base or end of requested range. */
+		ret = -EINVAL;
+		if (imr_is_enabled(&imr)) {
+			if (imr_address_overlap(base, &imr))
+				goto failed;
+			if (imr_address_overlap(end, &imr))
+				goto failed;
+		} else {
+			reg = i;
+		}
+	}
+
+	/* Error out if we have no free IMR entries. */
+	if (reg == -1) {
+		ret = -ENOMEM;
+		goto failed;
+	}
+
+	pr_debug("add %d phys %pa-%pa size %zx mask 0x%08x wmask 0x%08x\n",
+		 reg, &base, &end, raw_size, rmask, wmask);
+
+	/* Enable IMR at specified range and access mask. */
+	imr.addr_lo = phys_to_imr(base);
+	imr.addr_hi = phys_to_imr(end);
+	imr.rmask = rmask;
+	imr.wmask = wmask;
+
+	ret = imr_write(idev, reg, &imr, lock);
+	if (ret < 0) {
+		/*
+		 * In the highly unlikely event iosf_mbi_write failed
+		 * attempt to rollback the IMR setup skipping the trapping
+		 * of further IOSF write failures.
+		 */
+		imr.addr_lo = 0;
+		imr.addr_hi = 0;
+		imr.rmask = IMR_READ_ACCESS_ALL;
+		imr.wmask = IMR_WRITE_ACCESS_ALL;
+		imr_write(idev, reg, &imr, false);
+	}
+failed:
+	mutex_unlock(&idev->lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(imr_add_range);
+
+/**
+ * __imr_remove_range - delete an Isolated Memory Region.
+ *
+ * This function allows you to delete an IMR by its index specified by reg or
+ * by address range specified by base and size respectively. If you specify an
+ * index on its own the base and size parameters are ignored.
+ * imr_remove_range(0, base, size); delete IMR at index 0 base/size ignored.
+ * imr_remove_range(-1, base, size); delete IMR from base to base+size.
+ *
+ * @reg:	imr index to remove.
+ * @base:	physical base address of region aligned to 1 KiB.
+ * @size:	physical size of region in bytes aligned to 1 KiB.
+ * @return:	-EINVAL on invalid range or out or range id
+ *		-ENODEV if reg is valid but no IMR exists or is locked
+ *		0 on success.
+ */
+static int __imr_remove_range(int reg, phys_addr_t base, size_t size)
+{
+	phys_addr_t end;
+	bool found = false;
+	unsigned int i;
+	struct imr_device *idev = &imr_dev;
+	struct imr_regs imr;
+	size_t raw_size;
+	int ret = 0;
+
+	if (WARN_ONCE(idev->init == false, "driver not initialized"))
+		return -ENODEV;
+
+	/*
+	 * Validate address range if deleting by address, else we are
+	 * deleting by index where base and size will be ignored.
+	 */
+	if (reg == -1) {
+		ret = imr_check_params(base, size);
+		if (ret)
+			return ret;
+	}
+
+	/* Tweak the size value. */
+	raw_size = imr_raw_size(size);
+	end = base + raw_size;
+
+	mutex_lock(&idev->lock);
+
+	if (reg >= 0) {
+		/* If a specific IMR is given try to use it. */
+		ret = imr_read(idev, reg, &imr);
+		if (ret)
+			goto failed;
+
+		if (!imr_is_enabled(&imr) || imr.addr_lo & IMR_LOCK) {
+			ret = -ENODEV;
+			goto failed;
+		}
+		found = true;
+	} else {
+		/* Search for match based on address range. */
+		for (i = 0; i < idev->max_imr; i++) {
+			ret = imr_read(idev, i, &imr);
+			if (ret)
+				goto failed;
+
+			if (!imr_is_enabled(&imr) || imr.addr_lo & IMR_LOCK)
+				continue;
+
+			if ((imr_to_phys(imr.addr_lo) == base) &&
+			    (imr_to_phys(imr.addr_hi) == end)) {
+				found = true;
+				reg = i;
+				break;
+			}
+		}
+	}
+
+	if (!found) {
+		ret = -ENODEV;
+		goto failed;
+	}
+
+	pr_debug("remove %d phys %pa-%pa size %zx\n", reg, &base, &end, raw_size);
+
+	/* Tear down the IMR. */
+	imr.addr_lo = 0;
+	imr.addr_hi = 0;
+	imr.rmask = IMR_READ_ACCESS_ALL;
+	imr.wmask = IMR_WRITE_ACCESS_ALL;
+
+	ret = imr_write(idev, reg, &imr, false);
+
+failed:
+	mutex_unlock(&idev->lock);
+	return ret;
+}
+
+/**
+ * imr_remove_range - delete an Isolated Memory Region by address
+ *
+ * This function allows you to delete an IMR by an address range specified
+ * by base and size respectively.
+ * imr_remove_range(base, size); delete IMR from base to base+size.
+ *
+ * @base:	physical base address of region aligned to 1 KiB.
+ * @size:	physical size of region in bytes aligned to 1 KiB.
+ * @return:	-EINVAL on invalid range or out or range id
+ *		-ENODEV if reg is valid but no IMR exists or is locked
+ *		0 on success.
+ */
+int imr_remove_range(phys_addr_t base, size_t size)
+{
+	return __imr_remove_range(-1, base, size);
+}
+EXPORT_SYMBOL_GPL(imr_remove_range);
+
+/**
+ * imr_clear - delete an Isolated Memory Region by index
+ *
+ * This function allows you to delete an IMR by an address range specified
+ * by the index of the IMR. Useful for initial sanitization of the IMR
+ * address map.
+ * imr_ge(base, size); delete IMR from base to base+size.
+ *
+ * @reg:	imr index to remove.
+ * @return:	-EINVAL on invalid range or out or range id
+ *		-ENODEV if reg is valid but no IMR exists or is locked
+ *		0 on success.
+ */
+static inline int imr_clear(int reg)
+{
+	return __imr_remove_range(reg, 0, 0);
+}
+
+/**
+ * imr_fixup_memmap - Tear down IMRs used during bootup.
+ *
+ * BIOS and Grub both setup IMRs around compressed kernel, initrd memory
+ * that need to be removed before the kernel hands out one of the IMR
+ * encased addresses to a downstream DMA agent such as the SD or Ethernet.
+ * IMRs on Galileo are setup to immediately reset the system on violation.
+ * As a result if you're running a root filesystem from SD - you'll need
+ * the boot-time IMRs torn down or you'll find seemingly random resets when
+ * using your filesystem.
+ *
+ * @idev:	pointer to imr_device structure.
+ * @return:
+ */
+static void __init imr_fixup_memmap(struct imr_device *idev)
+{
+	phys_addr_t base = virt_to_phys(&_text);
+	size_t size = virt_to_phys(&__end_rodata) - base;
+	int i;
+	int ret;
+
+	/* Tear down all existing unlocked IMRs. */
+	for (i = 0; i < idev->max_imr; i++)
+		imr_clear(i);
+
+	/*
+	 * Setup a locked IMR around the physical extent of the kernel
+	 * from the beginning of the .text secton to the end of the
+	 * .rodata section as one physically contiguous block.
+	 */
+	ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, true);
+	if (ret < 0) {
+		pr_err("unable to setup IMR for kernel: (%p - %p)\n",
+			&_text, &__end_rodata);
+	} else {
+		pr_info("protecting kernel .text - .rodata: %zu KiB (%p - %p)\n",
+			size / 1024, &_text, &__end_rodata);
+	}
+
+}
+
+static const struct x86_cpu_id imr_ids[] __initconst = {
+	{ X86_VENDOR_INTEL, 5, 9 },	/* Intel Quark SoC X1000. */
+	{}
+};
+MODULE_DEVICE_TABLE(x86cpu, imr_ids);
+
+/**
+ * imr_init - entry point for IMR driver.
+ *
+ * return: -ENODEV for no IMR support 0 if good to go.
+ */
+static int __init imr_init(void)
+{
+	struct imr_device *idev = &imr_dev;
+	int ret;
+
+	if (!x86_match_cpu(imr_ids) || !iosf_mbi_available())
+		return -ENODEV;
+
+	idev->max_imr = QUARK_X1000_IMR_MAX;
+	idev->reg_base = QUARK_X1000_IMR_REGBASE;
+	idev->init = true;
+
+	mutex_init(&idev->lock);
+	ret = imr_debugfs_register(idev);
+	if (ret != 0)
+		pr_warn("debugfs register failed!\n");
+	imr_fixup_memmap(idev);
+	return 0;
+}
+
+/**
+ * imr_exit - exit point for IMR code.
+ *
+ * Deregisters debugfs, leave IMR state as-is.
+ *
+ * return:
+ */
+static void __exit imr_exit(void)
+{
+	imr_debugfs_unregister(&imr_dev);
+}
+
+module_init(imr_init);
+module_exit(imr_exit);
+
+MODULE_AUTHOR("Bryan O'Donoghue <pure.logic@nexus-software.ie>");
+MODULE_DESCRIPTION("Intel Isolated Memory Region driver");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/arch/x86/platform/intel-quark/imr_selftest.c b/arch/x86/platform/intel-quark/imr_selftest.c
new file mode 100644
index 000000000000..c9a0838890e2
--- /dev/null
+++ b/arch/x86/platform/intel-quark/imr_selftest.c
@@ -0,0 +1,129 @@
+/**
+ * imr_selftest.c
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ * Copyright(c) 2015 Bryan O'Donoghue <pure.logic@nexus-software.ie>
+ *
+ * IMR self test. The purpose of this module is to run a set of tests on the
+ * IMR API to validate it's sanity. We check for overlapping, reserved
+ * addresses and setup/teardown sanity.
+ *
+ */
+
+#include <asm-generic/sections.h>
+#include <asm/imr.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/types.h>
+
+#define SELFTEST KBUILD_MODNAME ": "
+/**
+ * imr_self_test_result - Print result string for self test.
+ *
+ * @res:	result code - true if test passed false otherwise.
+ * @fmt:	format string.
+ * ...		variadic argument list.
+ */
+static void __init imr_self_test_result(int res, const char *fmt, ...)
+{
+	va_list vlist;
+
+	/* Print pass/fail. */
+	if (res)
+		pr_info(SELFTEST "pass ");
+	else
+		pr_info(SELFTEST "fail ");
+
+	/* Print variable string. */
+	va_start(vlist, fmt);
+	vprintk(fmt, vlist);
+	va_end(vlist);
+
+	/* Optional warning. */
+	WARN(res == 0, "test failed");
+}
+#undef SELFTEST
+
+/**
+ * imr_self_test
+ *
+ * Verify IMR self_test with some simple tests to verify overlap,
+ * zero sized allocations and 1 KiB sized areas.
+ *
+ */
+static void __init imr_self_test(void)
+{
+	phys_addr_t base  = virt_to_phys(&_text);
+	size_t size = virt_to_phys(&__end_rodata) - base;
+	const char *fmt_over = "overlapped IMR @ (0x%08lx - 0x%08lx)\n";
+	int ret;
+
+	/* Test zero zero. */
+	ret = imr_add_range(0, 0, 0, 0, false);
+	imr_self_test_result(ret < 0, "zero sized IMR\n");
+
+	/* Test exact overlap. */
+	ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, false);
+	imr_self_test_result(ret < 0, fmt_over, __va(base), __va(base + size));
+
+	/* Test overlap with base inside of existing. */
+	base += size - IMR_ALIGN;
+	ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, false);
+	imr_self_test_result(ret < 0, fmt_over, __va(base), __va(base + size));
+
+	/* Test overlap with end inside of existing. */
+	base -= size + IMR_ALIGN * 2;
+	ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, false);
+	imr_self_test_result(ret < 0, fmt_over, __va(base), __va(base + size));
+
+	/* Test that a 1 KiB IMR @ zero with read/write all will bomb out. */
+	ret = imr_add_range(0, IMR_ALIGN, IMR_READ_ACCESS_ALL,
+			    IMR_WRITE_ACCESS_ALL, false);
+	imr_self_test_result(ret < 0, "1KiB IMR @ 0x00000000 - access-all\n");
+
+	/* Test that a 1 KiB IMR @ zero with CPU only will work. */
+	ret = imr_add_range(0, IMR_ALIGN, IMR_CPU, IMR_CPU, false);
+	imr_self_test_result(ret >= 0, "1KiB IMR @ 0x00000000 - cpu-access\n");
+	if (ret >= 0) {
+		ret = imr_remove_range(0, IMR_ALIGN);
+		imr_self_test_result(ret == 0, "teardown - cpu-access\n");
+	}
+
+	/* Test 2 KiB works. */
+	size = IMR_ALIGN * 2;
+	ret = imr_add_range(0, size, IMR_READ_ACCESS_ALL,
+			    IMR_WRITE_ACCESS_ALL, false);
+	imr_self_test_result(ret >= 0, "2KiB IMR @ 0x00000000\n");
+	if (ret >= 0) {
+		ret = imr_remove_range(0, size);
+		imr_self_test_result(ret == 0, "teardown 2KiB\n");
+	}
+}
+
+/**
+ * imr_self_test_init - entry point for IMR driver.
+ *
+ * return: -ENODEV for no IMR support 0 if good to go.
+ */
+static int __init imr_self_test_init(void)
+{
+	imr_self_test();
+	return 0;
+}
+
+/**
+ * imr_self_test_exit - exit point for IMR code.
+ *
+ * return:
+ */
+static void __exit imr_self_test_exit(void)
+{
+}
+
+module_init(imr_self_test_init);
+module_exit(imr_self_test_exit);
+
+MODULE_AUTHOR("Bryan O'Donoghue <pure.logic@nexus-software.ie>");
+MODULE_DESCRIPTION("Intel Isolated Memory Region self-test driver");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index bd8b8459c3d0..5240f563076d 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1070,6 +1070,23 @@ static inline void xen_write_cr8(unsigned long val)
 	BUG_ON(val);
 }
 #endif
+
+static u64 xen_read_msr_safe(unsigned int msr, int *err)
+{
+	u64 val;
+
+	val = native_read_msr_safe(msr, err);
+	switch (msr) {
+	case MSR_IA32_APICBASE:
+#ifdef CONFIG_X86_X2APIC
+		if (!(cpuid_ecx(1) & (1 << (X86_FEATURE_X2APIC & 31))))
+#endif
+			val &= ~X2APIC_ENABLE;
+		break;
+	}
+	return val;
+}
+
 static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
 {
 	int ret;
@@ -1240,7 +1257,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
 
 	.wbinvd = native_wbinvd,
 
-	.read_msr = native_read_msr_safe,
+	.read_msr = xen_read_msr_safe,
 	.write_msr = xen_write_msr_safe,
 
 	.read_tsc = native_read_tsc,
@@ -1741,6 +1758,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
 #ifdef CONFIG_X86_32
 	i386_start_kernel();
 #else
+	cr4_init_shadow(); /* 32b kernel does this in i386_start_kernel() */
 	x86_64_start_reservations((char *)__pa_symbol(&boot_params));
 #endif
 }
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 23b45eb9a89c..956374c1edbc 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -41,7 +41,7 @@ static u8 zero_stats;
 static inline void check_zero(void)
 {
 	u8 ret;
-	u8 old = ACCESS_ONCE(zero_stats);
+	u8 old = READ_ONCE(zero_stats);
 	if (unlikely(old)) {
 		ret = cmpxchg(&zero_stats, old, 0);
 		/* This ensures only one fellow resets the stat */
@@ -112,6 +112,7 @@ __visible void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
 	struct xen_lock_waiting *w = this_cpu_ptr(&lock_waiting);
 	int cpu = smp_processor_id();
 	u64 start;
+	__ticket_t head;
 	unsigned long flags;
 
 	/* If kicker interrupts not initialized yet, just spin */
@@ -159,11 +160,15 @@ __visible void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
 	 */
 	__ticket_enter_slowpath(lock);
 
+	/* make sure enter_slowpath, which is atomic does not cross the read */
+	smp_mb__after_atomic();
+
 	/*
 	 * check again make sure it didn't become free while
 	 * we weren't looking
 	 */
-	if (ACCESS_ONCE(lock->tickets.head) == want) {
+	head = READ_ONCE(lock->tickets.head);
+	if (__tickets_equal(head, want)) {
 		add_stats(TAKEN_SLOW_PICKUP, 1);
 		goto out;
 	}
@@ -204,8 +209,8 @@ static void xen_unlock_kick(struct arch_spinlock *lock, __ticket_t next)
 		const struct xen_lock_waiting *w = &per_cpu(lock_waiting, cpu);
 
 		/* Make sure we read lock before want */
-		if (ACCESS_ONCE(w->lock) == lock &&
-		    ACCESS_ONCE(w->want) == next) {
+		if (READ_ONCE(w->lock) == lock &&
+		    READ_ONCE(w->want) == next) {
 			add_stats(RELEASED_SLOW_KICKED, 1);
 			xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
 			break;