From 5f35eb0e29ca26da82febe49d7698dbeb8882ea0 Mon Sep 17 00:00:00 2001
From: Dave Young <dyoung@redhat.com>
Date: Thu, 1 May 2014 21:15:48 +0800
Subject: x86/efi: earlyprintk=efi,keep fix

earlyprintk=efi,keep will cause kernel hangs while freeing initmem like
below:

  VFS: Mounted root (ext4 filesystem) readonly on device 254:2.
  devtmpfs: mounted
  Freeing unused kernel memory: 880K (ffffffff817d4000 - ffffffff818b0000)

It is caused by efi earlyprintk use __init function which will be freed
later.  Such as early_efi_write is marked as __init, also it will use
early_ioremap which is init function as well.

To fix this issue, I added early initcall early_efi_map_fb which maps
the whole efi fb for later use. OTOH, adding a wrapper function
early_efi_map which calls early_ioremap before ioremap is available.

With this patch applied efi boot ok with earlyprintk=efi,keep console=efi

Signed-off-by: Dave Young <dyoung@redhat.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/x86/platform/efi/early_printk.c | 83 +++++++++++++++++++++++++++---------
 1 file changed, 64 insertions(+), 19 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/platform/efi/early_printk.c b/arch/x86/platform/efi/early_printk.c
index 81b506d5befd..524142117296 100644
--- a/arch/x86/platform/efi/early_printk.c
+++ b/arch/x86/platform/efi/early_printk.c
@@ -14,48 +14,92 @@
 
 static const struct font_desc *font;
 static u32 efi_x, efi_y;
+static void *efi_fb;
+static bool early_efi_keep;
 
-static __init void early_efi_clear_scanline(unsigned int y)
+/*
+ * efi earlyprintk need use early_ioremap to map the framebuffer.
+ * But early_ioremap is not usable for earlyprintk=efi,keep, ioremap should
+ * be used instead. ioremap will be available after paging_init() which is
+ * earlier than initcall callbacks. Thus adding this early initcall function
+ * early_efi_map_fb to map the whole efi framebuffer.
+ */
+static __init int early_efi_map_fb(void)
 {
-	unsigned long base, *dst;
-	u16 len;
+	unsigned long base, size;
+
+	if (!early_efi_keep)
+		return 0;
 
 	base = boot_params.screen_info.lfb_base;
-	len = boot_params.screen_info.lfb_linelength;
+	size = boot_params.screen_info.lfb_size;
+	efi_fb = ioremap(base, size);
+
+	return efi_fb ? 0 : -ENOMEM;
+}
+early_initcall(early_efi_map_fb);
+
+/*
+ * early_efi_map maps efi framebuffer region [start, start + len -1]
+ * In case earlyprintk=efi,keep we have the whole framebuffer mapped already
+ * so just return the offset efi_fb + start.
+ */
+static __init_refok void *early_efi_map(unsigned long start, unsigned long len)
+{
+	unsigned long base;
+
+	base = boot_params.screen_info.lfb_base;
+
+	if (efi_fb)
+		return (efi_fb + start);
+	else
+		return early_ioremap(base + start, len);
+}
 
-	dst = early_ioremap(base + y*len, len);
+static __init_refok void early_efi_unmap(void *addr, unsigned long len)
+{
+	if (!efi_fb)
+		early_iounmap(addr, len);
+}
+
+static void early_efi_clear_scanline(unsigned int y)
+{
+	unsigned long *dst;
+	u16 len;
+
+	len = boot_params.screen_info.lfb_linelength;
+	dst = early_efi_map(y*len, len);
 	if (!dst)
 		return;
 
 	memset(dst, 0, len);
-	early_iounmap(dst, len);
+	early_efi_unmap(dst, len);
 }
 
-static __init void early_efi_scroll_up(void)
+static void early_efi_scroll_up(void)
 {
-	unsigned long base, *dst, *src;
+	unsigned long *dst, *src;
 	u16 len;
 	u32 i, height;
 
-	base = boot_params.screen_info.lfb_base;
 	len = boot_params.screen_info.lfb_linelength;
 	height = boot_params.screen_info.lfb_height;
 
 	for (i = 0; i < height - font->height; i++) {
-		dst = early_ioremap(base + i*len, len);
+		dst = early_efi_map(i*len, len);
 		if (!dst)
 			return;
 
-		src = early_ioremap(base + (i + font->height) * len, len);
+		src = early_efi_map((i + font->height) * len, len);
 		if (!src) {
-			early_iounmap(dst, len);
+			early_efi_unmap(dst, len);
 			return;
 		}
 
 		memmove(dst, src, len);
 
-		early_iounmap(src, len);
-		early_iounmap(dst, len);
+		early_efi_unmap(src, len);
+		early_efi_unmap(dst, len);
 	}
 }
 
@@ -79,16 +123,14 @@ static void early_efi_write_char(u32 *dst, unsigned char c, unsigned int h)
 	}
 }
 
-static __init void
+static void
 early_efi_write(struct console *con, const char *str, unsigned int num)
 {
 	struct screen_info *si;
-	unsigned long base;
 	unsigned int len;
 	const char *s;
 	void *dst;
 
-	base = boot_params.screen_info.lfb_base;
 	si = &boot_params.screen_info;
 	len = si->lfb_linelength;
 
@@ -109,7 +151,7 @@ early_efi_write(struct console *con, const char *str, unsigned int num)
 		for (h = 0; h < font->height; h++) {
 			unsigned int n, x;
 
-			dst = early_ioremap(base + (efi_y + h) * len, len);
+			dst = early_efi_map((efi_y + h) * len, len);
 			if (!dst)
 				return;
 
@@ -123,7 +165,7 @@ early_efi_write(struct console *con, const char *str, unsigned int num)
 				s++;
 			}
 
-			early_iounmap(dst, len);
+			early_efi_unmap(dst, len);
 		}
 
 		num -= count;
@@ -179,6 +221,9 @@ static __init int early_efi_setup(struct console *con, char *options)
 	for (i = 0; i < (yres - efi_y) / font->height; i++)
 		early_efi_scroll_up();
 
+	/* early_console_register will unset CON_BOOT in case ,keep */
+	if (!(con->flags & CON_BOOT))
+		early_efi_keep = true;
 	return 0;
 }
 
-- 
cgit v1.2.3


From ac008fe0a3236729751ccde655c215b436dfdaeb Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Mon, 5 May 2014 15:23:35 -0700
Subject: x86, build: Don't get confused by local symbols

arch/x86/crypto/sha1_avx2_x86_64_asm.S introduced _end as a local
symbol, which broke the build under certain circumstances.  Although
the wisdom of _end as a local symbol can definitely be questioned, the
build should not break for that reason.

Thus, filter the output of nm to only get global symbols of
appropriate type.

Reported-by: Andy Lutomirski <luto@amacapital.net>
Cc: Chandramouli Narayanan <mouli@linux.intel.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Link: http://lkml.kernel.org/n/tip-uxm3j3w3odglcwhafwq5tjqu@git.kernel.org
---
 arch/x86/boot/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index abb9eba61b50..dbe8dd2fe247 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -71,7 +71,7 @@ $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE
 
 SETUP_OBJS = $(addprefix $(obj)/,$(setup-y))
 
-sed-voffset := -e 's/^\([0-9a-fA-F]*\) . \(_text\|_end\)$$/\#define VO_\2 0x\1/p'
+sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(_text\|_end\)$$/\#define VO_\2 0x\1/p'
 
 quiet_cmd_voffset = VOFFSET $@
       cmd_voffset = $(NM) $< | sed -n $(sed-voffset) > $@
@@ -80,7 +80,7 @@ targets += voffset.h
 $(obj)/voffset.h: vmlinux FORCE
 	$(call if_changed,voffset)
 
-sed-zoffset := -e 's/^\([0-9a-fA-F]*\) . \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|_end\|z_.*\)$$/\#define ZO_\2 0x\1/p'
+sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|_end\|z_.*\)$$/\#define ZO_\2 0x\1/p'
 
 quiet_cmd_zoffset = ZOFFSET $@
       cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@
-- 
cgit v1.2.3


From 2605fc216fa492f9e7c488bdc7f687cd6dcc703b Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Fri, 2 May 2014 00:44:37 +0200
Subject: asmlinkage, x86: Add explicit __visible to arch/x86/*

As requested by Linus add explicit __visible to the asmlinkage users.
This marks all functions visible to assembler.

Tree sweep for arch/x86/*

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Link: http://lkml.kernel.org/r/1398984278-29319-3-git-send-email-andi@firstfloor.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/boot/compressed/misc.c          |  2 +-
 arch/x86/kernel/acpi/sleep.c             |  2 +-
 arch/x86/kernel/apic/io_apic.c           |  2 +-
 arch/x86/kernel/cpu/mcheck/therm_throt.c |  4 ++--
 arch/x86/kernel/cpu/mcheck/threshold.c   |  4 ++--
 arch/x86/kernel/head32.c                 |  2 +-
 arch/x86/kernel/head64.c                 |  2 +-
 arch/x86/kernel/process_64.c             |  2 +-
 arch/x86/kernel/smp.c                    |  2 +-
 arch/x86/kernel/traps.c                  |  6 +++---
 arch/x86/kernel/vsmp_64.c                |  6 +++---
 arch/x86/kvm/x86.c                       |  2 +-
 arch/x86/lguest/boot.c                   |  4 ++--
 arch/x86/math-emu/errors.c               | 16 ++++++++--------
 arch/x86/platform/olpc/olpc-xo1-pm.c     |  2 +-
 arch/x86/power/hibernate_64.c            |  2 +-
 arch/x86/xen/enlighten.c                 |  2 +-
 arch/x86/xen/irq.c                       |  6 +++---
 18 files changed, 34 insertions(+), 34 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 17684615374b..57ab74df7eea 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -354,7 +354,7 @@ static void parse_elf(void *output)
 	free(phdrs);
 }
 
-asmlinkage void *decompress_kernel(void *rmode, memptr heap,
+asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap,
 				  unsigned char *input_data,
 				  unsigned long input_len,
 				  unsigned char *output,
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 3a2ae4c88948..31368207837c 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -31,7 +31,7 @@ static char temp_stack[4096];
  *
  * Wrapper around acpi_enter_sleep_state() to be called by assmebly.
  */
-acpi_status asmlinkage x86_acpi_enter_sleep_state(u8 state)
+acpi_status asmlinkage __visible x86_acpi_enter_sleep_state(u8 state)
 {
 	return acpi_enter_sleep_state(state);
 }
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index d23aa82e7a7b..992060e09897 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2189,7 +2189,7 @@ void send_cleanup_vector(struct irq_cfg *cfg)
 	cfg->move_in_progress = 0;
 }
 
-asmlinkage void smp_irq_move_cleanup_interrupt(void)
+asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
 {
 	unsigned vector, me;
 
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index d921b7ee6595..36a1bb6d1ee0 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -429,14 +429,14 @@ static inline void __smp_thermal_interrupt(void)
 	smp_thermal_vector();
 }
 
-asmlinkage void smp_thermal_interrupt(struct pt_regs *regs)
+asmlinkage __visible void smp_thermal_interrupt(struct pt_regs *regs)
 {
 	entering_irq();
 	__smp_thermal_interrupt();
 	exiting_ack_irq();
 }
 
-asmlinkage void smp_trace_thermal_interrupt(struct pt_regs *regs)
+asmlinkage __visible void smp_trace_thermal_interrupt(struct pt_regs *regs)
 {
 	entering_irq();
 	trace_thermal_apic_entry(THERMAL_APIC_VECTOR);
diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c
index fe6b1c86645b..7245980186ee 100644
--- a/arch/x86/kernel/cpu/mcheck/threshold.c
+++ b/arch/x86/kernel/cpu/mcheck/threshold.c
@@ -24,14 +24,14 @@ static inline void __smp_threshold_interrupt(void)
 	mce_threshold_vector();
 }
 
-asmlinkage void smp_threshold_interrupt(void)
+asmlinkage __visible void smp_threshold_interrupt(void)
 {
 	entering_irq();
 	__smp_threshold_interrupt();
 	exiting_ack_irq();
 }
 
-asmlinkage void smp_trace_threshold_interrupt(void)
+asmlinkage __visible void smp_trace_threshold_interrupt(void)
 {
 	entering_irq();
 	trace_threshold_apic_entry(THRESHOLD_APIC_VECTOR);
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index c61a14a4a310..d6c1b9836995 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -29,7 +29,7 @@ static void __init i386_default_early_setup(void)
 	reserve_ebda_region();
 }
 
-asmlinkage void __init i386_start_kernel(void)
+asmlinkage __visible void __init i386_start_kernel(void)
 {
 	sanitize_boot_params(&boot_params);
 
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 85126ccbdf6b..068054f4bf20 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -137,7 +137,7 @@ static void __init copy_bootdata(char *real_mode_data)
 	}
 }
 
-asmlinkage void __init x86_64_start_kernel(char * real_mode_data)
+asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
 {
 	int i;
 
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 9c0280f93d05..898d077617a9 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -52,7 +52,7 @@
 
 asmlinkage extern void ret_from_fork(void);
 
-asmlinkage DEFINE_PER_CPU(unsigned long, old_rsp);
+__visible DEFINE_PER_CPU(unsigned long, old_rsp);
 
 /* Prints also some state that isn't saved in the pt_regs */
 void __show_regs(struct pt_regs *regs, int all)
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 7c3a5a61f2e4..be8e1bde07aa 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -168,7 +168,7 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs)
  * this function calls the 'stop' function on all other CPUs in the system.
  */
 
-asmlinkage void smp_reboot_interrupt(void)
+asmlinkage __visible void smp_reboot_interrupt(void)
 {
 	ack_APIC_irq();
 	irq_enter();
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 57409f6b8c62..f73b5d435bdc 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -357,7 +357,7 @@ exit:
  * for scheduling or signal handling. The actual stack switch is done in
  * entry.S
  */
-asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
+asmlinkage __visible __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
 {
 	struct pt_regs *regs = eregs;
 	/* Did already sync */
@@ -601,11 +601,11 @@ do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
 #endif
 }
 
-asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
+asmlinkage __visible void __attribute__((weak)) smp_thermal_interrupt(void)
 {
 }
 
-asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void)
+asmlinkage __visible void __attribute__((weak)) smp_threshold_interrupt(void)
 {
 }
 
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index 5edc34b5b951..b99b9ad8540c 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -36,7 +36,7 @@ static int irq_routing_comply = 1;
  * and vice versa.
  */
 
-asmlinkage unsigned long vsmp_save_fl(void)
+asmlinkage __visible unsigned long vsmp_save_fl(void)
 {
 	unsigned long flags = native_save_fl();
 
@@ -56,7 +56,7 @@ __visible void vsmp_restore_fl(unsigned long flags)
 }
 PV_CALLEE_SAVE_REGS_THUNK(vsmp_restore_fl);
 
-asmlinkage void vsmp_irq_disable(void)
+asmlinkage __visible void vsmp_irq_disable(void)
 {
 	unsigned long flags = native_save_fl();
 
@@ -64,7 +64,7 @@ asmlinkage void vsmp_irq_disable(void)
 }
 PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_disable);
 
-asmlinkage void vsmp_irq_enable(void)
+asmlinkage __visible void vsmp_irq_enable(void)
 {
 	unsigned long flags = native_save_fl();
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8b8fc0b792ba..b6c0bacca9bd 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -280,7 +280,7 @@ int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 }
 EXPORT_SYMBOL_GPL(kvm_set_apic_base);
 
-asmlinkage void kvm_spurious_fault(void)
+asmlinkage __visible void kvm_spurious_fault(void)
 {
 	/* Fault while not rebooting.  We want the trace. */
 	BUG();
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index ad1fb5f53925..aae94132bc24 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -233,13 +233,13 @@ static void lguest_end_context_switch(struct task_struct *next)
  * flags word contains all kind of stuff, but in practice Linux only cares
  * about the interrupt flag.  Our "save_flags()" just returns that.
  */
-asmlinkage unsigned long lguest_save_fl(void)
+asmlinkage __visible unsigned long lguest_save_fl(void)
 {
 	return lguest_data.irq_enabled;
 }
 
 /* Interrupts go off... */
-asmlinkage void lguest_irq_disable(void)
+asmlinkage __visible void lguest_irq_disable(void)
 {
 	lguest_data.irq_enabled = 0;
 }
diff --git a/arch/x86/math-emu/errors.c b/arch/x86/math-emu/errors.c
index a5449089cd9f..9e6545f269e5 100644
--- a/arch/x86/math-emu/errors.c
+++ b/arch/x86/math-emu/errors.c
@@ -302,7 +302,7 @@ static struct {
 	      0x242  in div_Xsig.S
  */
 
-asmlinkage void FPU_exception(int n)
+asmlinkage __visible void FPU_exception(int n)
 {
 	int i, int_type;
 
@@ -492,7 +492,7 @@ int real_2op_NaN(FPU_REG const *b, u_char tagb,
 
 /* Invalid arith operation on Valid registers */
 /* Returns < 0 if the exception is unmasked */
-asmlinkage int arith_invalid(int deststnr)
+asmlinkage __visible int arith_invalid(int deststnr)
 {
 
 	EXCEPTION(EX_Invalid);
@@ -507,7 +507,7 @@ asmlinkage int arith_invalid(int deststnr)
 }
 
 /* Divide a finite number by zero */
-asmlinkage int FPU_divide_by_zero(int deststnr, u_char sign)
+asmlinkage __visible int FPU_divide_by_zero(int deststnr, u_char sign)
 {
 	FPU_REG *dest = &st(deststnr);
 	int tag = TAG_Valid;
@@ -539,7 +539,7 @@ int set_precision_flag(int flags)
 }
 
 /* This may be called often, so keep it lean */
-asmlinkage void set_precision_flag_up(void)
+asmlinkage __visible void set_precision_flag_up(void)
 {
 	if (control_word & CW_Precision)
 		partial_status |= (SW_Precision | SW_C1);	/* The masked response */
@@ -548,7 +548,7 @@ asmlinkage void set_precision_flag_up(void)
 }
 
 /* This may be called often, so keep it lean */
-asmlinkage void set_precision_flag_down(void)
+asmlinkage __visible void set_precision_flag_down(void)
 {
 	if (control_word & CW_Precision) {	/* The masked response */
 		partial_status &= ~SW_C1;
@@ -557,7 +557,7 @@ asmlinkage void set_precision_flag_down(void)
 		EXCEPTION(EX_Precision);
 }
 
-asmlinkage int denormal_operand(void)
+asmlinkage __visible int denormal_operand(void)
 {
 	if (control_word & CW_Denormal) {	/* The masked response */
 		partial_status |= SW_Denorm_Op;
@@ -568,7 +568,7 @@ asmlinkage int denormal_operand(void)
 	}
 }
 
-asmlinkage int arith_overflow(FPU_REG *dest)
+asmlinkage __visible int arith_overflow(FPU_REG *dest)
 {
 	int tag = TAG_Valid;
 
@@ -596,7 +596,7 @@ asmlinkage int arith_overflow(FPU_REG *dest)
 
 }
 
-asmlinkage int arith_underflow(FPU_REG *dest)
+asmlinkage __visible int arith_underflow(FPU_REG *dest)
 {
 	int tag = TAG_Valid;
 
diff --git a/arch/x86/platform/olpc/olpc-xo1-pm.c b/arch/x86/platform/olpc/olpc-xo1-pm.c
index ff0174dda810..a9acde72d4ed 100644
--- a/arch/x86/platform/olpc/olpc-xo1-pm.c
+++ b/arch/x86/platform/olpc/olpc-xo1-pm.c
@@ -75,7 +75,7 @@ static int xo1_power_state_enter(suspend_state_t pm_state)
 	return 0;
 }
 
-asmlinkage int xo1_do_sleep(u8 sleep_state)
+asmlinkage __visible int xo1_do_sleep(u8 sleep_state)
 {
 	void *pgd_addr = __va(read_cr3());
 
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index 304fca20d96e..35e2bb6c0f37 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -23,7 +23,7 @@
 extern __visible const void __nosave_begin, __nosave_end;
 
 /* Defined in hibernate_asm_64.S */
-extern asmlinkage int restore_image(void);
+extern asmlinkage __visible int restore_image(void);
 
 /*
  * Address to jump to in the last phase of restore in order to get to the image
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 201d09a7c46b..c34bfc4bbe7f 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1515,7 +1515,7 @@ static void __init xen_pvh_early_guest_init(void)
 }
 
 /* First C function to be called on Xen boot */
-asmlinkage void __init xen_start_kernel(void)
+asmlinkage __visible void __init xen_start_kernel(void)
 {
 	struct physdev_set_iopl set_iopl;
 	int rc;
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 08f763de26fe..a1207cb6472a 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -23,7 +23,7 @@ void xen_force_evtchn_callback(void)
 	(void)HYPERVISOR_xen_version(0, NULL);
 }
 
-asmlinkage unsigned long xen_save_fl(void)
+asmlinkage __visible unsigned long xen_save_fl(void)
 {
 	struct vcpu_info *vcpu;
 	unsigned long flags;
@@ -63,7 +63,7 @@ __visible void xen_restore_fl(unsigned long flags)
 }
 PV_CALLEE_SAVE_REGS_THUNK(xen_restore_fl);
 
-asmlinkage void xen_irq_disable(void)
+asmlinkage __visible void xen_irq_disable(void)
 {
 	/* There's a one instruction preempt window here.  We need to
 	   make sure we're don't switch CPUs between getting the vcpu
@@ -74,7 +74,7 @@ asmlinkage void xen_irq_disable(void)
 }
 PV_CALLEE_SAVE_REGS_THUNK(xen_irq_disable);
 
-asmlinkage void xen_irq_enable(void)
+asmlinkage __visible void xen_irq_enable(void)
 {
 	struct vcpu_info *vcpu;
 
-- 
cgit v1.2.3


From aadca6fa4068ad1f92c492bc8507b7ed350825a2 Mon Sep 17 00:00:00 2001
From: Christian Gmeiner <christian.gmeiner@gmail.com>
Date: Wed, 7 May 2014 09:01:54 +0200
Subject: x86/reboot: Add reboot quirk for Certec BPC600

Certec BPC600 needs reboot=pci to actually reboot.

Signed-off-by: Christian Gmeiner <christian.gmeiner@gmail.com>
Cc: Matthew Garrett <mjg59@srcf.ucam.org>
Cc: Li Aubrey <aubrey.li@linux.intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Dave Jones <davej@redhat.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/1399446114-2147-1-git-send-email-christian.gmeiner@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/reboot.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 3399d3a99730..52b1157c53eb 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -191,6 +191,16 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
 		},
 	},
 
+	/* Certec */
+	{       /* Handle problems with rebooting on Certec BPC600 */
+		.callback = set_pci_reboot,
+		.ident = "Certec BPC600",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Certec"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "BPC600"),
+		},
+	},
+
 	/* Dell */
 	{	/* Handle problems with rebooting on Dell DXP061 */
 		.callback = set_bios_reboot,
-- 
cgit v1.2.3


From 696dfd95ba9838327a7013e5988ff3ba60dcc8c8 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 7 May 2014 11:20:54 +0200
Subject: KVM: vmx: disable APIC virtualization in nested guests

While running a nested guest, we should disable APIC virtualization
controls (virtualized APIC register accesses, virtual interrupt
delivery and posted interrupts), because we do not expose them to
the nested guest.

Reported-by: Hu Yaohui <loki2441@gmail.com>
Suggested-by: Abel Gordon <abel@stratoscale.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 33e8c028842f..138ceffc6377 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -7778,7 +7778,8 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 
 	exec_control = vmcs12->pin_based_vm_exec_control;
 	exec_control |= vmcs_config.pin_based_exec_ctrl;
-	exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
+	exec_control &= ~(PIN_BASED_VMX_PREEMPTION_TIMER |
+                          PIN_BASED_POSTED_INTR);
 	vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, exec_control);
 
 	vmx->nested.preemption_timer_expired = false;
@@ -7815,7 +7816,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 		if (!vmx->rdtscp_enabled)
 			exec_control &= ~SECONDARY_EXEC_RDTSCP;
 		/* Take the following fields only from vmcs12 */
-		exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+		exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+				  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
+                                  SECONDARY_EXEC_APIC_REGISTER_VIRT);
 		if (nested_cpu_has(vmcs12,
 				CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
 			exec_control |= vmcs12->secondary_vm_exec_control;
-- 
cgit v1.2.3


From 14262d67fe348018af368a07430fbc06eadeabb1 Mon Sep 17 00:00:00 2001
From: George Spelvin <linux@horizon.com>
Date: Wed, 7 May 2014 17:05:52 -0400
Subject: x86-64, build: Fix stack protector Makefile breakage with 32-bit
 userland

If you are using a 64-bit kernel with 32-bit userland, then
scripts/gcc-x86_64-has-stack-protector.sh invokes 32-bit gcc
with -mcmodel=kernel, which produces:

<stdin>:1:0: error: code model 'kernel' not supported in the 32 bit mode

and trips the "broken compiler" test at arch/x86/Makefile:120.

There are several places a fix is possible, but the following seems
cleanest.  (But it's minimal; it would also be possible to factor
out a bunch of stuff from the two branches of the if.)

Signed-off-by: George Spelvin <linux@horizon.com>
Link: http://lkml.kernel.org/r/20140507210552.7581.qmail@ns.horizon.com
Cc: <stable@vger.kernel.org> # v3.14
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/Makefile | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86')

diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index ce6ad7e6a7d7..33f71b01fd22 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -79,6 +79,7 @@ else
         UTS_MACHINE := x86_64
         CHECKFLAGS += -D__x86_64__ -m64
 
+        biarch := -m64
         KBUILD_AFLAGS += -m64
         KBUILD_CFLAGS += -m64
 
-- 
cgit v1.2.3


From f10f383d8414bfe3357e24432ed8a26eeb58ffb8 Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Thu, 24 Apr 2014 16:18:17 +0800
Subject: x86/hpet: Make boot_hpet_disable extern

HPET on some platform has accuracy problem. Making
"boot_hpet_disable" extern so that we can runtime disable
the HPET timer by using quirk to check the platform.

Signed-off-by: Feng Tang <feng.tang@intel.com>
Cc: Clemens Ladisch <clemens@ladisch.de>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/1398327498-13163-1-git-send-email-feng.tang@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/hpet.h | 1 +
 arch/x86/kernel/hpet.c      | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h
index b18df579c0e9..36f7125945e3 100644
--- a/arch/x86/include/asm/hpet.h
+++ b/arch/x86/include/asm/hpet.h
@@ -63,6 +63,7 @@
 /* hpet memory map physical address */
 extern unsigned long hpet_address;
 extern unsigned long force_hpet_address;
+extern int boot_hpet_disable;
 extern u8 hpet_blockid;
 extern int hpet_force_user;
 extern u8 hpet_msi_disable;
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 8d80ae011603..4177bfbc80b0 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -88,7 +88,7 @@ static inline void hpet_clear_mapping(void)
 /*
  * HPET command line enable / disable
  */
-static int boot_hpet_disable;
+int boot_hpet_disable;
 int hpet_force_user;
 static int hpet_verbose;
 
-- 
cgit v1.2.3


From 62187910b0fc7a75cfec9c30fda58ce2f39d689b Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Thu, 24 Apr 2014 16:18:18 +0800
Subject: x86/intel: Add quirk to disable HPET for the Baytrail platform

HPET on current Baytrail platform has accuracy problem to be
used as reliable clocksource/clockevent, so add a early quirk to
disable it.

Signed-off-by: Feng Tang <feng.tang@intel.com>
Cc: Clemens Ladisch <clemens@ladisch.de>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/1398327498-13163-2-git-send-email-feng.tang@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/early-quirks.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 6e2537c32190..6cda0baeac9d 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -17,6 +17,7 @@
 #include <asm/dma.h>
 #include <asm/io_apic.h>
 #include <asm/apic.h>
+#include <asm/hpet.h>
 #include <asm/iommu.h>
 #include <asm/gart.h>
 #include <asm/irq_remapping.h>
@@ -530,6 +531,15 @@ static void __init intel_graphics_stolen(int num, int slot, int func)
 	}
 }
 
+static void __init force_disable_hpet(int num, int slot, int func)
+{
+#ifdef CONFIG_HPET_TIMER
+	boot_hpet_disable = 1;
+	pr_info("x86/hpet: Will disable the HPET for this platform because it's not reliable\n");
+#endif
+}
+
+
 #define QFLAG_APPLY_ONCE 	0x1
 #define QFLAG_APPLIED		0x2
 #define QFLAG_DONE		(QFLAG_APPLY_ONCE|QFLAG_APPLIED)
@@ -567,6 +577,12 @@ static struct chipset early_qrk[] __initdata = {
 	  PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check },
 	{ PCI_VENDOR_ID_INTEL, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA, PCI_ANY_ID,
 	  QFLAG_APPLY_ONCE, intel_graphics_stolen },
+	/*
+	 * HPET on current version of Baytrail platform has accuracy
+	 * problems, disable it for now:
+	 */
+	{ PCI_VENDOR_ID_INTEL, 0x0f00,
+		PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet},
 	{}
 };
 
-- 
cgit v1.2.3


From 722a0d22d028bd74061cf582de1764884e73674f Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Fri, 9 May 2014 03:29:16 +0200
Subject: x86: Fix typo preventing msr_set/clear_bit from having an effect

Due to a typo the msr accessor function introduced in
22085a66c2fab6cf9b9393c056a3600a6b4735de didn't have any lasting
effects because they accidentally wrote the old value back.

After c0a639ad0bc6b178b46996bd1f821a04643e2bde this at the very least
this causes cpuid limits not to be lifted on some cpus leading to
missing capabilities for those.

Signed-off-by: Andres Freund <andres@anarazel.de>
Link: http://lkml.kernel.org/r/1399598957-7011-2-git-send-email-andres@anarazel.de
Cc: Borislav Petkov <bp@suse.de>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/lib/msr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c
index db9db446b71a..43623739c7cf 100644
--- a/arch/x86/lib/msr.c
+++ b/arch/x86/lib/msr.c
@@ -76,7 +76,7 @@ static inline int __flip_bit(u32 msr, u8 bit, bool set)
 	if (m1.q == m.q)
 		return 0;
 
-	err = msr_write(msr, &m);
+	err = msr_write(msr, &m1);
 	if (err)
 		return err;
 
-- 
cgit v1.2.3


From c45f77364ba060395b7eff1bf45e6c537f913380 Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Fri, 9 May 2014 03:29:17 +0200
Subject: x86: Fix typo in MSR_IA32_MISC_ENABLE_LIMIT_CPUID macro

The spuriously added semicolon didn't have any effect because the
macro isn't currently in use.

c0a639ad0bc6b178b46996bd1f821a04643e2bde

Signed-off-by: Andres Freund <andres@anarazel.de>
Link: http://lkml.kernel.org/r/1399598957-7011-3-git-send-email-andres@anarazel.de
Cc: Borislav Petkov <bp@suse.de>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/uapi/asm/msr-index.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index c827ace3121b..fcf2b3ae1bf0 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -384,7 +384,7 @@
 #define MSR_IA32_MISC_ENABLE_MWAIT_BIT			18
 #define MSR_IA32_MISC_ENABLE_MWAIT			(1ULL << MSR_IA32_MISC_ENABLE_MWAIT_BIT)
 #define MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT		22
-#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID		(1ULL << MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT);
+#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID		(1ULL << MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT)
 #define MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT		23
 #define MSR_IA32_MISC_ENABLE_XTPR_DISABLE		(1ULL << MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT)
 #define MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT		34
-- 
cgit v1.2.3


From 28b92e09e25bdc0ae864b22eacf195a74f861389 Mon Sep 17 00:00:00 2001
From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Date: Fri, 9 May 2014 11:11:27 -0400
Subject: x86, vdso, time: Cast tv_nsec to u64 for proper shifting in
 update_vsyscall()

With tk->wall_to_monotonic.tv_nsec being a 32-bit value on 32-bit
systems, (tk->wall_to_monotonic.tv_nsec << tk->shift) in update_vsyscall()
may lose upper bits or, worse, add them since compiler will do this:
	(u64)(tk->wall_to_monotonic.tv_nsec << tk->shift)
instead of
	((u64)tk->wall_to_monotonic.tv_nsec << tk->shift)

So if, for example, tv_nsec is 0x800000 and shift is 8 we will end up
with 0xffffffff80000000 instead of 0x80000000. And then we are stuck in
the subsequent 'while' loop.

We need an explicit cast.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Link: http://lkml.kernel.org/r/1399648287-15178-1-git-send-email-boris.ostrovsky@oracle.com
Acked-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: <stable@vger.kernel.org> # v3.14
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/vsyscall_gtod.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/vsyscall_gtod.c b/arch/x86/kernel/vsyscall_gtod.c
index f9c6e56e14b5..9531fbb123ba 100644
--- a/arch/x86/kernel/vsyscall_gtod.c
+++ b/arch/x86/kernel/vsyscall_gtod.c
@@ -43,7 +43,7 @@ void update_vsyscall(struct timekeeper *tk)
 	vdata->monotonic_time_sec	= tk->xtime_sec
 					+ tk->wall_to_monotonic.tv_sec;
 	vdata->monotonic_time_snsec	= tk->xtime_nsec
-					+ (tk->wall_to_monotonic.tv_nsec
+					+ ((u64)tk->wall_to_monotonic.tv_nsec
 						<< tk->shift);
 	while (vdata->monotonic_time_snsec >=
 					(((u64)NSEC_PER_SEC) << tk->shift)) {
-- 
cgit v1.2.3


From 7a5091d58419b4e5222abce58a40c072786ea1d6 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Sun, 11 May 2014 20:25:20 -0700
Subject: x86, rdrand: When nordrand is specified, disable RDSEED as well

One can logically expect that when the user has specified "nordrand",
the user doesn't want any use of the CPU random number generator,
neither RDRAND nor RDSEED, so disable both.

Reported-by: Stephan Mueller <smueller@chronox.de>
Cc: Theodore Ts'o <tytso@mit.edu>
Link: http://lkml.kernel.org/r/21542339.0lFnPSyGRS@myon.chronox.de
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 Documentation/kernel-parameters.txt | 8 ++++----
 arch/x86/kernel/cpu/rdrand.c        | 1 +
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 43842177b771..30a8ad0dae53 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2218,10 +2218,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 	noreplace-smp	[X86-32,SMP] Don't replace SMP instructions
 			with UP alternatives
 
-	nordrand	[X86] Disable the direct use of the RDRAND
-			instruction even if it is supported by the
-			processor.  RDRAND is still available to user
-			space applications.
+	nordrand	[X86] Disable kernel use of the RDRAND and
+			RDSEED instructions even if they are supported
+			by the processor.  RDRAND and RDSEED are still
+			available to user space applications.
 
 	noresume	[SWSUSP] Disables resume and restores original swap
 			space.
diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c
index 384df5105fbc..136ac74dee82 100644
--- a/arch/x86/kernel/cpu/rdrand.c
+++ b/arch/x86/kernel/cpu/rdrand.c
@@ -27,6 +27,7 @@
 static int __init x86_rdrand_setup(char *s)
 {
 	setup_clear_cpu_cap(X86_FEATURE_RDRAND);
+	setup_clear_cpu_cap(X86_FEATURE_RDSEED);
 	return 1;
 }
 __setup("nordrand", x86_rdrand_setup);
-- 
cgit v1.2.3


From 773cd38f40b8834be991dbfed36683acc1dd41ee Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Tue, 13 May 2014 15:05:55 -0700
Subject: net: filter: x86: fix JIT address randomization

bpf_alloc_binary() adds 128 bytes of room to JITed program image
and rounds it up to the nearest page size. If image size is close
to page size (like 4000), it is rounded to two pages:
round_up(4000 + 4 + 128) == 8192
then 'hole' is computed as 8192 - (4000 + 4) = 4188
If prandom_u32() % hole selects a number >= PAGE_SIZE - sizeof(*header)
then kernel will crash during bpf_jit_free():

kernel BUG at arch/x86/mm/pageattr.c:887!
Call Trace:
 [<ffffffff81037285>] change_page_attr_set_clr+0x135/0x460
 [<ffffffff81694cc0>] ? _raw_spin_unlock_irq+0x30/0x50
 [<ffffffff810378ff>] set_memory_rw+0x2f/0x40
 [<ffffffffa01a0d8d>] bpf_jit_free_deferred+0x2d/0x60
 [<ffffffff8106bf98>] process_one_work+0x1d8/0x6a0
 [<ffffffff8106bf38>] ? process_one_work+0x178/0x6a0
 [<ffffffff8106c90c>] worker_thread+0x11c/0x370

since bpf_jit_free() does:
  unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
  struct bpf_binary_header *header = (void *)addr;
to compute start address of 'bpf_binary_header'
and header->pages will pass junk to:
  set_memory_rw(addr, header->pages);

Fix it by making sure that &header->image[prandom_u32() % hole] and &header
are in the same page

Fixes: 314beb9bcabfd ("x86: bpf_jit_comp: secure bpf jit against spraying attacks")
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/x86/net/bpf_jit_comp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index dc017735bb91..6d5663a599a7 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -171,7 +171,7 @@ static struct bpf_binary_header *bpf_alloc_binary(unsigned int proglen,
 	memset(header, 0xcc, sz); /* fill whole space with int3 instructions */
 
 	header->pages = sz / PAGE_SIZE;
-	hole = sz - (proglen + sizeof(*header));
+	hole = min(sz - (proglen + sizeof(*header)), PAGE_SIZE - sizeof(*header));
 
 	/* insert a random number of int3 instructions before BPF code */
 	*image_ptr = &header->image[prandom_u32() % hole];
-- 
cgit v1.2.3


From 9844f5462392b53824e8b86726e7c33b5ecbb676 Mon Sep 17 00:00:00 2001
From: Anthony Iliopoulos <anthony.iliopoulos@huawei.com>
Date: Wed, 14 May 2014 11:29:48 +0200
Subject: x86, mm, hugetlb: Add missing TLB page invalidation for hugetlb_cow()

The invalidation is required in order to maintain proper semantics
under CoW conditions. In scenarios where a process clones several
threads, a thread operating on a core whose DTLB entry for a
particular hugepage has not been invalidated, will be reading from
the hugepage that belongs to the forked child process, even after
hugetlb_cow().

The thread will not see the updated page as long as the stale DTLB
entry remains cached, the thread attempts to write into the page,
the child process exits, or the thread gets migrated to a different
processor.

Signed-off-by: Anthony Iliopoulos <anthony.iliopoulos@huawei.com>
Link: http://lkml.kernel.org/r/20140514092948.GA17391@server-36.huawei.corp
Suggested-by: Shay Goikhman <shay.goikhman@huawei.com>
Acked-by: Dave Hansen <dave.hansen@intel.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Cc: <stable@vger.kernel.org> # v2.6.16+ (!)
---
 arch/x86/include/asm/hugetlb.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h
index a8091216963b..68c05398bba9 100644
--- a/arch/x86/include/asm/hugetlb.h
+++ b/arch/x86/include/asm/hugetlb.h
@@ -52,6 +52,7 @@ static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
 static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 					 unsigned long addr, pte_t *ptep)
 {
+	ptep_clear_flush(vma, addr, ptep);
 }
 
 static inline int huge_pte_none(pte_t pte)
-- 
cgit v1.2.3


From 16a9602158861687c78b6de6dc6a79e6e8a9136f Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Wed, 14 May 2014 12:43:24 -0300
Subject: KVM: x86: disable master clock if TSC is reset during suspend

Updating system_time from the kernel clock once master clock
has been enabled can result in time backwards event, in case
kernel clock frequency is lower than TSC frequency.

Disable master clock in case it is necessary to update it
from the resume path.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8b8fc0b792ba..84a2d4152a63 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -106,6 +106,8 @@ EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
 static u32 tsc_tolerance_ppm = 250;
 module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
 
+static bool backwards_tsc_observed = false;
+
 #define KVM_NR_SHARED_MSRS 16
 
 struct kvm_shared_msrs_global {
@@ -1486,7 +1488,8 @@ static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
 					&ka->master_kernel_ns,
 					&ka->master_cycle_now);
 
-	ka->use_master_clock = host_tsc_clocksource & vcpus_matched;
+	ka->use_master_clock = host_tsc_clocksource && vcpus_matched
+				&& !backwards_tsc_observed;
 
 	if (ka->use_master_clock)
 		atomic_set(&kvm_guest_has_master_clock, 1);
@@ -6945,6 +6948,7 @@ int kvm_arch_hardware_enable(void *garbage)
 	 */
 	if (backwards_tsc) {
 		u64 delta_cyc = max_tsc - local_tsc;
+		backwards_tsc_observed = true;
 		list_for_each_entry(kvm, &vm_list, vm_list) {
 			kvm_for_each_vcpu(i, vcpu, kvm) {
 				vcpu->arch.tsc_offset_adjustment += delta_cyc;
-- 
cgit v1.2.3


From fa81511bb0bbb2b1aace3695ce869da9762624ff Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 14 May 2014 16:33:54 -0700
Subject: x86-64, modify_ldt: Make support for 16-bit segments a runtime option

Checkin:

b3b42ac2cbae x86-64, modify_ldt: Ban 16-bit segments on 64-bit kernels

disabled 16-bit segments on 64-bit kernels due to an information
leak.  However, it does seem that people are genuinely using Wine to
run old 16-bit Windows programs on Linux.

A proper fix for this ("espfix64") is coming in the upcoming merge
window, but as a temporary fix, create a sysctl to allow the
administrator to re-enable support for 16-bit segments.

It adds a "/proc/sys/abi/ldt16" sysctl that defaults to zero (off). If
you hit this issue and care about your old Windows program more than
you care about a kernel stack address information leak, you can do

   echo 1 > /proc/sys/abi/ldt16

as root (add it to your startup scripts), and you should be ok.

The sysctl table is only added if you have COMPAT support enabled on
x86-64, but I assume anybody who runs old windows binaries very much
does that ;)

Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Link: http://lkml.kernel.org/r/CA%2B55aFw9BPoD10U1LfHbOMpHWZkvJTkMcfCs9s3urPr1YyWBxw@mail.gmail.com
Cc: <stable@vger.kernel.org>
---
 arch/x86/kernel/ldt.c        | 4 +++-
 arch/x86/vdso/vdso32-setup.c | 8 ++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index af1d14a9ebda..dcbbaa165bde 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -20,6 +20,8 @@
 #include <asm/mmu_context.h>
 #include <asm/syscalls.h>
 
+int sysctl_ldt16 = 0;
+
 #ifdef CONFIG_SMP
 static void flush_ldt(void *current_mm)
 {
@@ -234,7 +236,7 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
 	 * IRET leaking the high bits of the kernel stack address.
 	 */
 #ifdef CONFIG_X86_64
-	if (!ldt_info.seg_32bit) {
+	if (!ldt_info.seg_32bit && !sysctl_ldt16) {
 		error = -EINVAL;
 		goto out_unlock;
 	}
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index 00348980a3a6..e1f220e3ca68 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -39,6 +39,7 @@
 #ifdef CONFIG_X86_64
 #define vdso_enabled			sysctl_vsyscall32
 #define arch_setup_additional_pages	syscall32_setup_pages
+extern int sysctl_ldt16;
 #endif
 
 /*
@@ -249,6 +250,13 @@ static struct ctl_table abi_table2[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+	{
+		.procname	= "ldt16",
+		.data		= &sysctl_ldt16,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
 	{}
 };
 
-- 
cgit v1.2.3


From 6538b8ea886e472f4431db8ca1d60478f838d14b Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Wed, 28 May 2014 15:53:59 +0900
Subject: x86_64: expand kernel stack to 16K

While I play inhouse patches with much memory pressure on qemu-kvm,
3.14 kernel was randomly crashed. The reason was kernel stack overflow.

When I investigated the problem, the callstack was a little bit deeper
by involve with reclaim functions but not direct reclaim path.

I tried to diet stack size of some functions related with alloc/reclaim
so did a hundred of byte but overflow was't disappeard so that I encounter
overflow by another deeper callstack on reclaim/allocator path.

Of course, we might sweep every sites we have found for reducing
stack usage but I'm not sure how long it saves the world(surely,
lots of developer start to add nice features which will use stack
agains) and if we consider another more complex feature in I/O layer
and/or reclaim path, it might be better to increase stack size(
meanwhile, stack usage on 64bit machine was doubled compared to 32bit
while it have sticked to 8K. Hmm, it's not a fair to me and arm64
already expaned to 16K. )

So, my stupid idea is just let's expand stack size and keep an eye
toward stack consumption on each kernel functions via stacktrace of ftrace.
For example, we can have a bar like that each funcion shouldn't exceed 200K
and emit the warning when some function consumes more in runtime.
Of course, it could make false positive but at least, it could make a
chance to think over it.

I guess this topic was discussed several time so there might be
strong reason not to increase kernel stack size on x86_64, for me not
knowing so Ccing x86_64 maintainers, other MM guys and virtio
maintainers.

Here's an example call trace using up the kernel stack:

         Depth    Size   Location    (51 entries)
         -----    ----   --------
   0)     7696      16   lookup_address
   1)     7680      16   _lookup_address_cpa.isra.3
   2)     7664      24   __change_page_attr_set_clr
   3)     7640     392   kernel_map_pages
   4)     7248     256   get_page_from_freelist
   5)     6992     352   __alloc_pages_nodemask
   6)     6640       8   alloc_pages_current
   7)     6632     168   new_slab
   8)     6464       8   __slab_alloc
   9)     6456      80   __kmalloc
  10)     6376     376   vring_add_indirect
  11)     6000     144   virtqueue_add_sgs
  12)     5856     288   __virtblk_add_req
  13)     5568      96   virtio_queue_rq
  14)     5472     128   __blk_mq_run_hw_queue
  15)     5344      16   blk_mq_run_hw_queue
  16)     5328      96   blk_mq_insert_requests
  17)     5232     112   blk_mq_flush_plug_list
  18)     5120     112   blk_flush_plug_list
  19)     5008      64   io_schedule_timeout
  20)     4944     128   mempool_alloc
  21)     4816      96   bio_alloc_bioset
  22)     4720      48   get_swap_bio
  23)     4672     160   __swap_writepage
  24)     4512      32   swap_writepage
  25)     4480     320   shrink_page_list
  26)     4160     208   shrink_inactive_list
  27)     3952     304   shrink_lruvec
  28)     3648      80   shrink_zone
  29)     3568     128   do_try_to_free_pages
  30)     3440     208   try_to_free_pages
  31)     3232     352   __alloc_pages_nodemask
  32)     2880       8   alloc_pages_current
  33)     2872     200   __page_cache_alloc
  34)     2672      80   find_or_create_page
  35)     2592      80   ext4_mb_load_buddy
  36)     2512     176   ext4_mb_regular_allocator
  37)     2336     128   ext4_mb_new_blocks
  38)     2208     256   ext4_ext_map_blocks
  39)     1952     160   ext4_map_blocks
  40)     1792     384   ext4_writepages
  41)     1408      16   do_writepages
  42)     1392      96   __writeback_single_inode
  43)     1296     176   writeback_sb_inodes
  44)     1120      80   __writeback_inodes_wb
  45)     1040     160   wb_writeback
  46)      880     208   bdi_writeback_workfn
  47)      672     144   process_one_work
  48)      528     112   worker_thread
  49)      416     240   kthread
  50)      176     176   ret_from_fork

[ Note: the problem is exacerbated by certain gcc versions that seem to
  generate much bigger stack frames due to apparently bad coalescing of
  temporaries and generating too many spills.  Rusty saw gcc-4.6.4 using
  35% more stack on the virtio path than 4.8.2 does, for example.

  Minchan not only uses such a bad gcc version (4.6.3 in his case), but
  some of the stack use is due to debugging (CONFIG_DEBUG_PAGEALLOC is
  what causes that kernel_map_pages() frame, for example). But we're
  clearly getting too close.

  The VM code also seems to have excessive stack frames partly for the
  same compiler reason, triggered by excessive inlining and lots of
  function arguments.

  We need to improve on our stack use, but in the meantime let's do this
  simple stack increase too.  Unlike most earlier reports, there is
  nothing simple that stands out as being really horribly wrong here,
  apart from the fact that the stack frames are just bigger than they
  should need to be.        - Linus ]

Signed-off-by: Minchan Kim <minchan@kernel.org>
Cc: Peter Anvin <hpa@zytor.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Dave Jones <davej@redhat.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Rik van Riel <riel@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Michael S Tsirkin <mst@redhat.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: PJ Waskiewicz <pjwaskiewicz@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/page_64_types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 8de6d9cf3b95..678205195ae1 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -1,7 +1,7 @@
 #ifndef _ASM_X86_PAGE_64_DEFS_H
 #define _ASM_X86_PAGE_64_DEFS_H
 
-#define THREAD_SIZE_ORDER	1
+#define THREAD_SIZE_ORDER	2
 #define THREAD_SIZE  (PAGE_SIZE << THREAD_SIZE_ORDER)
 #define CURRENT_MASK (~(THREAD_SIZE - 1))
 
-- 
cgit v1.2.3