diff options
Diffstat (limited to 'arch/x86')
204 files changed, 6528 insertions, 7610 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6c30b9e93d8c..4742ddff464f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -52,6 +52,7 @@ config X86  	select HAVE_HW_BREAKPOINT  	select HAVE_MIXED_BREAKPOINTS_REGS  	select PERF_EVENTS +	select HAVE_PERF_EVENTS_NMI  	select ANON_INODES  	select HAVE_ARCH_KMEMCHECK  	select HAVE_USER_RETURN_NOTIFIER @@ -69,9 +70,6 @@ config ARCH_DEFCONFIG  	default "arch/x86/configs/i386_defconfig" if X86_32  	default "arch/x86/configs/x86_64_defconfig" if X86_64 -config GENERIC_TIME -	def_bool y -  config GENERIC_CMOS_UPDATE  	def_bool y @@ -2043,7 +2041,7 @@ config SCx200  config SCx200HR_TIMER  	tristate "NatSemi SCx200 27MHz High-Resolution Timer Support" -	depends on SCx200 && GENERIC_TIME +	depends on SCx200  	default y  	---help---  	  This driver provides a clocksource built upon the on-chip @@ -2059,6 +2057,15 @@ config OLPC  	  Add support for detecting the unique features of the OLPC  	  XO hardware. +config OLPC_OPENFIRMWARE +	bool "Support for OLPC's Open Firmware" +	depends on !X86_64 && !X86_PAE +	default y if OLPC +	help +	  This option adds support for the implementation of Open Firmware +	  that is used on the OLPC XO-1 Children's Machine. +	  If unsure, say N here. +  endif # X86_32  config K8_NB diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index ec749c2bfdd7..f7cb086b4add 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -26,10 +26,10 @@ targets		:= vmlinux.bin setup.bin setup.elf bzImage  targets		+= fdimage fdimage144 fdimage288 image.iso mtools.conf  subdir-		:= compressed -setup-y		+= a20.o bioscall.o cmdline.o copy.o cpu.o cpucheck.o edd.o -setup-y		+= header.o main.o mca.o memory.o pm.o pmjump.o -setup-y		+= printf.o regs.o string.o tty.o video.o video-mode.o -setup-y		+= version.o +setup-y		+= a20.o bioscall.o cmdline.o copy.o cpu.o cpucheck.o +setup-y		+= early_serial_console.o edd.o header.o main.o mca.o memory.o +setup-y		+= pm.o pmjump.o printf.o regs.o string.o tty.o video.o +setup-y		+= video-mode.o version.o  setup-$(CONFIG_X86_APM_BOOT) += apm.o  # The link order of the video-*.o modules can matter.  In particular, diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index 98239d2658f2..c7093bd9f2d3 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h @@ -28,6 +28,7 @@  #include "bitops.h"  #include <asm/cpufeature.h>  #include <asm/processor-flags.h> +#include "ctype.h"  /* Useful macros */  #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) @@ -37,6 +38,8 @@  extern struct setup_header hdr;  extern struct boot_params boot_params; +#define cpu_relax()	asm volatile("rep; nop") +  /* Basic port I/O */  static inline void outb(u8 v, u16 port)  { @@ -198,11 +201,6 @@ static inline int memcmp_gs(const void *s1, addr_t s2, size_t len)  	return diff;  } -static inline int isdigit(int ch) -{ -	return (ch >= '0') && (ch <= '9'); -} -  /* Heap -- available for dynamic lists. */  extern char _end[];  extern char *HEAP; @@ -287,8 +285,18 @@ struct biosregs {  void intcall(u8 int_no, const struct biosregs *ireg, struct biosregs *oreg);  /* cmdline.c */ -int cmdline_find_option(const char *option, char *buffer, int bufsize); -int cmdline_find_option_bool(const char *option); +int __cmdline_find_option(u32 cmdline_ptr, const char *option, char *buffer, int bufsize); +int __cmdline_find_option_bool(u32 cmdline_ptr, const char *option); +static inline int cmdline_find_option(const char *option, char *buffer, int bufsize) +{ +	return __cmdline_find_option(boot_params.hdr.cmd_line_ptr, option, buffer, bufsize); +} + +static inline int cmdline_find_option_bool(const char *option) +{ +	return __cmdline_find_option_bool(boot_params.hdr.cmd_line_ptr, option); +} +  /* cpu.c, cpucheck.c */  struct cpu_features { @@ -300,6 +308,10 @@ extern struct cpu_features cpu;  int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr);  int validate_cpu(void); +/* early_serial_console.c */ +extern int early_serial_base; +void console_init(void); +  /* edd.c */  void query_edd(void); @@ -329,8 +341,10 @@ void initregs(struct biosregs *regs);  /* string.c */  int strcmp(const char *str1, const char *str2); +int strncmp(const char *cs, const char *ct, size_t count);  size_t strnlen(const char *s, size_t maxlen);  unsigned int atou(const char *s); +unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base);  /* tty.c */  void puts(const char *); diff --git a/arch/x86/boot/cmdline.c b/arch/x86/boot/cmdline.c index a1d35634bce0..6b3b6f708c04 100644 --- a/arch/x86/boot/cmdline.c +++ b/arch/x86/boot/cmdline.c @@ -27,9 +27,8 @@ static inline int myisspace(u8 c)   * Returns the length of the argument (regardless of if it was   * truncated to fit in the buffer), or -1 on not found.   */ -int cmdline_find_option(const char *option, char *buffer, int bufsize) +int __cmdline_find_option(u32 cmdline_ptr, const char *option, char *buffer, int bufsize)  { -	u32 cmdline_ptr = boot_params.hdr.cmd_line_ptr;  	addr_t cptr;  	char c;  	int len = -1; @@ -100,9 +99,8 @@ int cmdline_find_option(const char *option, char *buffer, int bufsize)   * Returns the position of that option (starts counting with 1)   * or 0 on not found   */ -int cmdline_find_option_bool(const char *option) +int __cmdline_find_option_bool(u32 cmdline_ptr, const char *option)  { -	u32 cmdline_ptr = boot_params.hdr.cmd_line_ptr;  	addr_t cptr;  	char c;  	int pos = 0, wstart = 0; diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index fbb47daf2459..0c229551eead 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -4,7 +4,7 @@  # create a compressed vmlinux image from the original vmlinux  # -targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma vmlinux.bin.lzo head_$(BITS).o misc.o piggy.o +targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma vmlinux.bin.lzo head_$(BITS).o misc.o string.o cmdline.o early_serial_console.o piggy.o  KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2  KBUILD_CFLAGS += -fno-strict-aliasing -fPIC @@ -23,7 +23,7 @@ LDFLAGS_vmlinux := -T  hostprogs-y	:= mkpiggy -$(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o $(obj)/piggy.o FORCE +$(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o $(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o $(obj)/piggy.o FORCE  	$(call if_changed,ld)  	@: diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c new file mode 100644 index 000000000000..cb62f786990d --- /dev/null +++ b/arch/x86/boot/compressed/cmdline.c @@ -0,0 +1,21 @@ +#include "misc.h" + +static unsigned long fs; +static inline void set_fs(unsigned long seg) +{ +	fs = seg << 4;  /* shift it back */ +} +typedef unsigned long addr_t; +static inline char rdfs8(addr_t addr) +{ +	return *((char *)(fs + addr)); +} +#include "../cmdline.c" +int cmdline_find_option(const char *option, char *buffer, int bufsize) +{ +	return __cmdline_find_option(real_mode->hdr.cmd_line_ptr, option, buffer, bufsize); +} +int cmdline_find_option_bool(const char *option) +{ +	return __cmdline_find_option_bool(real_mode->hdr.cmd_line_ptr, option); +} diff --git a/arch/x86/boot/compressed/early_serial_console.c b/arch/x86/boot/compressed/early_serial_console.c new file mode 100644 index 000000000000..261e81fb9582 --- /dev/null +++ b/arch/x86/boot/compressed/early_serial_console.c @@ -0,0 +1,5 @@ +#include "misc.h" + +int early_serial_base; + +#include "../early_serial_console.c" diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index f543b70ffae2..67a655a39ce4 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -124,6 +124,19 @@ relocated:  	rep	stosl  /* + * Adjust our own GOT + */ +	leal	_got(%ebx), %edx +	leal	_egot(%ebx), %ecx +1: +	cmpl	%ecx, %edx +	jae	2f +	addl	%ebx, (%edx) +	addl	$4, %edx +	jmp	1b +2: + +/*   * Do the decompression, and jump to the new kernel..   */  	leal	z_extract_offset_negative(%ebx), %ebp diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index faff0dc9c06a..52f85a196fa0 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -280,6 +280,19 @@ relocated:  	rep	stosq  /* + * Adjust our own GOT + */ +	leaq	_got(%rip), %rdx +	leaq	_egot(%rip), %rcx +1: +	cmpq	%rcx, %rdx +	jae	2f +	addq	%rbx, (%rdx) +	addq	$8, %rdx +	jmp	1b +2: +	 +/*   * Do the decompression, and jump to the new kernel..   */  	pushq	%rsi			/* Save the real mode argument */ diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 51e240779a44..8f7bef8e9fff 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -9,23 +9,7 @@   * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996   */ -/* - * we have to be careful, because no indirections are allowed here, and - * paravirt_ops is a kind of one. As it will only run in baremetal anyway, - * we just keep it from happening - */ -#undef CONFIG_PARAVIRT -#ifdef CONFIG_X86_32 -#define _ASM_X86_DESC_H 1 -#endif - -#include <linux/linkage.h> -#include <linux/screen_info.h> -#include <linux/elf.h> -#include <linux/io.h> -#include <asm/page.h> -#include <asm/boot.h> -#include <asm/bootparam.h> +#include "misc.h"  /* WARNING!!   * This code is compiled with -fPIC and it is relocated dynamically @@ -123,15 +107,13 @@ static void error(char *m);  /*   * This is set up by the setup-routine at boot-time   */ -static struct boot_params *real_mode;		/* Pointer to real-mode data */ +struct boot_params *real_mode;		/* Pointer to real-mode data */  static int quiet; +static int debug;  void *memset(void *s, int c, size_t n);  void *memcpy(void *dest, const void *src, size_t n); -static void __putstr(int, const char *); -#define putstr(__x)  __putstr(0, __x) -  #ifdef CONFIG_X86_64  #define memptr long  #else @@ -170,7 +152,21 @@ static void scroll(void)  		vidmem[i] = ' ';  } -static void __putstr(int error, const char *s) +#define XMTRDY          0x20 + +#define TXR             0       /*  Transmit register (WRITE) */ +#define LSR             5       /*  Line Status               */ +static void serial_putchar(int ch) +{ +	unsigned timeout = 0xffff; + +	while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout) +		cpu_relax(); + +	outb(ch, early_serial_base + TXR); +} + +void __putstr(int error, const char *s)  {  	int x, y, pos;  	char c; @@ -179,6 +175,14 @@ static void __putstr(int error, const char *s)  	if (!error)  		return;  #endif +	if (early_serial_base) { +		const char *str = s; +		while (*str) { +			if (*str == '\n') +				serial_putchar('\r'); +			serial_putchar(*str++); +		} +	}  	if (real_mode->screen_info.orig_video_mode == 0 &&  	    lines == 0 && cols == 0) @@ -305,8 +309,10 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,  {  	real_mode = rmode; -	if (real_mode->hdr.loadflags & QUIET_FLAG) +	if (cmdline_find_option_bool("quiet"))  		quiet = 1; +	if (cmdline_find_option_bool("debug")) +		debug = 1;  	if (real_mode->screen_info.orig_video_mode == 7) {  		vidmem = (char *) 0xb0000; @@ -319,6 +325,10 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,  	lines = real_mode->screen_info.orig_video_lines;  	cols = real_mode->screen_info.orig_video_cols; +	console_init(); +	if (debug) +		putstr("early console in decompress_kernel\n"); +  	free_mem_ptr     = heap;	/* Heap */  	free_mem_end_ptr = heap + BOOT_HEAP_SIZE; diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h new file mode 100644 index 000000000000..3f19c81a6203 --- /dev/null +++ b/arch/x86/boot/compressed/misc.h @@ -0,0 +1,39 @@ +#ifndef BOOT_COMPRESSED_MISC_H +#define BOOT_COMPRESSED_MISC_H + +/* + * we have to be careful, because no indirections are allowed here, and + * paravirt_ops is a kind of one. As it will only run in baremetal anyway, + * we just keep it from happening + */ +#undef CONFIG_PARAVIRT +#ifdef CONFIG_X86_32 +#define _ASM_X86_DESC_H 1 +#endif + +#include <linux/linkage.h> +#include <linux/screen_info.h> +#include <linux/elf.h> +#include <linux/io.h> +#include <asm/page.h> +#include <asm/boot.h> +#include <asm/bootparam.h> + +#define BOOT_BOOT_H +#include "../ctype.h" + +/* misc.c */ +extern struct boot_params *real_mode;		/* Pointer to real-mode data */ +void __putstr(int error, const char *s); +#define putstr(__x)  __putstr(0, __x) +#define puts(__x)  __putstr(0, __x) + +/* cmdline.c */ +int cmdline_find_option(const char *option, char *buffer, int bufsize); +int cmdline_find_option_bool(const char *option); + +/* early_serial_console.c */ +extern int early_serial_base; +void console_init(void); + +#endif diff --git a/arch/x86/boot/compressed/string.c b/arch/x86/boot/compressed/string.c new file mode 100644 index 000000000000..19b3e693cd72 --- /dev/null +++ b/arch/x86/boot/compressed/string.c @@ -0,0 +1,2 @@ +#include "misc.h" +#include "../string.c" diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S index 5ddabceee124..34d047c98284 100644 --- a/arch/x86/boot/compressed/vmlinux.lds.S +++ b/arch/x86/boot/compressed/vmlinux.lds.S @@ -41,6 +41,12 @@ SECTIONS  		*(.rodata.*)  		_erodata = . ;  	} +	.got : { +		_got = .; +		KEEP(*(.got.plt)) +		KEEP(*(.got)) +		_egot = .; +	}  	.data :	{  		_data = . ;  		*(.data) diff --git a/arch/x86/boot/ctype.h b/arch/x86/boot/ctype.h new file mode 100644 index 000000000000..25e13403193c --- /dev/null +++ b/arch/x86/boot/ctype.h @@ -0,0 +1,21 @@ +#ifndef BOOT_ISDIGIT_H + +#define BOOT_ISDIGIT_H + +static inline int isdigit(int ch) +{ +	return (ch >= '0') && (ch <= '9'); +} + +static inline int isxdigit(int ch) +{ +	if (isdigit(ch)) +		return true; + +	if ((ch >= 'a') && (ch <= 'f')) +		return true; + +	return (ch >= 'A') && (ch <= 'F'); +} + +#endif diff --git a/arch/x86/boot/early_serial_console.c b/arch/x86/boot/early_serial_console.c new file mode 100644 index 000000000000..030f4b93e255 --- /dev/null +++ b/arch/x86/boot/early_serial_console.c @@ -0,0 +1,139 @@ +#include "boot.h" + +#define DEFAULT_SERIAL_PORT 0x3f8 /* ttyS0 */ + +#define XMTRDY          0x20 + +#define DLAB		0x80 + +#define TXR             0       /*  Transmit register (WRITE) */ +#define RXR             0       /*  Receive register  (READ)  */ +#define IER             1       /*  Interrupt Enable          */ +#define IIR             2       /*  Interrupt ID              */ +#define FCR             2       /*  FIFO control              */ +#define LCR             3       /*  Line control              */ +#define MCR             4       /*  Modem control             */ +#define LSR             5       /*  Line Status               */ +#define MSR             6       /*  Modem Status              */ +#define DLL             0       /*  Divisor Latch Low         */ +#define DLH             1       /*  Divisor latch High        */ + +#define DEFAULT_BAUD 9600 + +static void early_serial_init(int port, int baud) +{ +	unsigned char c; +	unsigned divisor; + +	outb(0x3, port + LCR);	/* 8n1 */ +	outb(0, port + IER);	/* no interrupt */ +	outb(0, port + FCR);	/* no fifo */ +	outb(0x3, port + MCR);	/* DTR + RTS */ + +	divisor	= 115200 / baud; +	c = inb(port + LCR); +	outb(c | DLAB, port + LCR); +	outb(divisor & 0xff, port + DLL); +	outb((divisor >> 8) & 0xff, port + DLH); +	outb(c & ~DLAB, port + LCR); + +	early_serial_base = port; +} + +static void parse_earlyprintk(void) +{ +	int baud = DEFAULT_BAUD; +	char arg[32]; +	int pos = 0; +	int port = 0; + +	if (cmdline_find_option("earlyprintk", arg, sizeof arg) > 0) { +		char *e; + +		if (!strncmp(arg, "serial", 6)) { +			port = DEFAULT_SERIAL_PORT; +			pos += 6; +		} + +		if (arg[pos] == ',') +			pos++; + +		if (!strncmp(arg, "ttyS", 4)) { +			static const int bases[] = { 0x3f8, 0x2f8 }; +			int idx = 0; + +			if (!strncmp(arg + pos, "ttyS", 4)) +				pos += 4; + +			if (arg[pos++] == '1') +				idx = 1; + +			port = bases[idx]; +		} + +		if (arg[pos] == ',') +			pos++; + +		baud = simple_strtoull(arg + pos, &e, 0); +		if (baud == 0 || arg + pos == e) +			baud = DEFAULT_BAUD; +	} + +	if (port) +		early_serial_init(port, baud); +} + +#define BASE_BAUD (1843200/16) +static unsigned int probe_baud(int port) +{ +	unsigned char lcr, dll, dlh; +	unsigned int quot; + +	lcr = inb(port + LCR); +	outb(lcr | DLAB, port + LCR); +	dll = inb(port + DLL); +	dlh = inb(port + DLH); +	outb(lcr, port + LCR); +	quot = (dlh << 8) | dll; + +	return BASE_BAUD / quot; +} + +static void parse_console_uart8250(void) +{ +	char optstr[64], *options; +	int baud = DEFAULT_BAUD; +	int port = 0; + +	/* +	 * console=uart8250,io,0x3f8,115200n8 +	 * need to make sure it is last one console ! +	 */ +	if (cmdline_find_option("console", optstr, sizeof optstr) <= 0) +		return; + +	options = optstr; + +	if (!strncmp(options, "uart8250,io,", 12)) +		port = simple_strtoull(options + 12, &options, 0); +	else if (!strncmp(options, "uart,io,", 8)) +		port = simple_strtoull(options + 8, &options, 0); +	else +		return; + +	if (options && (options[0] == ',')) +		baud = simple_strtoull(options + 1, &options, 0); +	else +		baud = probe_baud(port); + +	if (port) +		early_serial_init(port, baud); +} + +void console_init(void) +{ +	parse_earlyprintk(); + +	if (!early_serial_base) +		parse_console_uart8250(); +} diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c index 140172b895bd..40358c8905be 100644 --- a/arch/x86/boot/main.c +++ b/arch/x86/boot/main.c @@ -130,6 +130,11 @@ void main(void)  	/* First, copy the boot header into the "zeropage" */  	copy_boot_params(); +	/* Initialize the early-boot console */ +	console_init(); +	if (cmdline_find_option_bool("debug")) +		puts("early console in setup code\n"); +  	/* End of heap check */  	init_heap(); @@ -168,10 +173,6 @@ void main(void)  	/* Set the video mode */  	set_video(); -	/* Parse command line for 'quiet' and pass it to decompressor. */ -	if (cmdline_find_option_bool("quiet")) -		boot_params.hdr.loadflags |= QUIET_FLAG; -  	/* Do the last things and invoke protected mode */  	go_to_protected_mode();  } diff --git a/arch/x86/boot/printf.c b/arch/x86/boot/printf.c index 50e47cdbdddd..cdac91ca55d3 100644 --- a/arch/x86/boot/printf.c +++ b/arch/x86/boot/printf.c @@ -34,7 +34,7 @@ static int skip_atoi(const char **s)  #define SMALL	32		/* Must be 32 == 0x20 */  #define SPECIAL	64		/* 0x */ -#define do_div(n,base) ({ \ +#define __do_div(n, base) ({ \  int __res; \  __res = ((unsigned long) n) % (unsigned) base; \  n = ((unsigned long) n) / (unsigned) base; \ @@ -83,7 +83,7 @@ static char *number(char *str, long num, int base, int size, int precision,  		tmp[i++] = '0';  	else  		while (num != 0) -			tmp[i++] = (digits[do_div(num, base)] | locase); +			tmp[i++] = (digits[__do_div(num, base)] | locase);  	if (i > precision)  		precision = i;  	size -= precision; diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c index f94b7a0c2abf..3cbc4058dd26 100644 --- a/arch/x86/boot/string.c +++ b/arch/x86/boot/string.c @@ -30,6 +30,22 @@ int strcmp(const char *str1, const char *str2)  	return 0;  } +int strncmp(const char *cs, const char *ct, size_t count) +{ +	unsigned char c1, c2; + +	while (count) { +		c1 = *cs++; +		c2 = *ct++; +		if (c1 != c2) +			return c1 < c2 ? -1 : 1; +		if (!c1) +			break; +		count--; +	} +	return 0; +} +  size_t strnlen(const char *s, size_t maxlen)  {  	const char *es = s; @@ -48,3 +64,50 @@ unsigned int atou(const char *s)  		i = i * 10 + (*s++ - '0');  	return i;  } + +/* Works only for digits and letters, but small and fast */ +#define TOLOWER(x) ((x) | 0x20) + +static unsigned int simple_guess_base(const char *cp) +{ +	if (cp[0] == '0') { +		if (TOLOWER(cp[1]) == 'x' && isxdigit(cp[2])) +			return 16; +		else +			return 8; +	} else { +		return 10; +	} +} + +/** + * simple_strtoull - convert a string to an unsigned long long + * @cp: The start of the string + * @endp: A pointer to the end of the parsed string will be placed here + * @base: The number base to use + */ + +unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base) +{ +	unsigned long long result = 0; + +	if (!base) +		base = simple_guess_base(cp); + +	if (base == 16 && cp[0] == '0' && TOLOWER(cp[1]) == 'x') +		cp += 2; + +	while (isxdigit(*cp)) { +		unsigned int value; + +		value = isdigit(*cp) ? *cp - '0' : TOLOWER(*cp) - 'a' + 10; +		if (value >= base) +			break; +		result = result * base + value; +		cp++; +	} +	if (endp) +		*endp = (char *)cp; + +	return result; +} diff --git a/arch/x86/boot/tty.c b/arch/x86/boot/tty.c index 01ec69c901c7..def2451f46ae 100644 --- a/arch/x86/boot/tty.c +++ b/arch/x86/boot/tty.c @@ -10,23 +10,36 @@   * ----------------------------------------------------------------------- */  /* - * Very simple screen I/O - * XXX: Probably should add very simple serial I/O? + * Very simple screen and serial I/O   */  #include "boot.h" +int early_serial_base; + +#define XMTRDY          0x20 + +#define TXR             0       /*  Transmit register (WRITE) */ +#define LSR             5       /*  Line Status               */ +  /*   * These functions are in .inittext so they can be used to signal   * error during initialization.   */ -void __attribute__((section(".inittext"))) putchar(int ch) +static void __attribute__((section(".inittext"))) serial_putchar(int ch)  { -	struct biosregs ireg; +	unsigned timeout = 0xffff; -	if (ch == '\n') -		putchar('\r');	/* \n -> \r\n */ +	while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout) +		cpu_relax(); + +	outb(ch, early_serial_base + TXR); +} + +static void __attribute__((section(".inittext"))) bios_putchar(int ch) +{ +	struct biosregs ireg;  	initregs(&ireg);  	ireg.bx = 0x0007; @@ -36,6 +49,17 @@ void __attribute__((section(".inittext"))) putchar(int ch)  	intcall(0x10, &ireg, NULL);  } +void __attribute__((section(".inittext"))) putchar(int ch) +{ +	if (ch == '\n') +		putchar('\r');	/* \n -> \r\n */ + +	bios_putchar(ch); + +	if (early_serial_base != 0) +		serial_putchar(ch); +} +  void __attribute__((section(".inittext"))) puts(const char *str)  {  	while (*str) @@ -112,3 +136,4 @@ int getchar_timeout(void)  	return 0;		/* Timeout! */  } + diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index d28fad19654a..6f9872658dd2 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -1,524 +1,84 @@ -# -# Automatically generated make config: don't edit -# Linux kernel version: 2.6.30-rc2 -# Mon May 11 16:21:55 2009 -# -# CONFIG_64BIT is not set -CONFIG_X86_32=y -# CONFIG_X86_64 is not set -CONFIG_X86=y -CONFIG_OUTPUT_FORMAT="elf32-i386" -CONFIG_ARCH_DEFCONFIG="arch/x86/configs/i386_defconfig" -CONFIG_GENERIC_TIME=y -CONFIG_GENERIC_CMOS_UPDATE=y -CONFIG_CLOCKSOURCE_WATCHDOG=y -CONFIG_GENERIC_CLOCKEVENTS=y -CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y -CONFIG_LOCKDEP_SUPPORT=y -CONFIG_STACKTRACE_SUPPORT=y -CONFIG_HAVE_LATENCYTOP_SUPPORT=y -CONFIG_FAST_CMPXCHG_LOCAL=y -CONFIG_MMU=y -CONFIG_ZONE_DMA=y -CONFIG_GENERIC_ISA_DMA=y -CONFIG_GENERIC_IOMAP=y -CONFIG_GENERIC_BUG=y -CONFIG_GENERIC_HWEIGHT=y -CONFIG_ARCH_MAY_HAVE_PC_FDC=y -# CONFIG_RWSEM_GENERIC_SPINLOCK is not set -CONFIG_RWSEM_XCHGADD_ALGORITHM=y -CONFIG_ARCH_HAS_CPU_IDLE_WAIT=y -CONFIG_GENERIC_CALIBRATE_DELAY=y -# CONFIG_GENERIC_TIME_VSYSCALL is not set -CONFIG_ARCH_HAS_CPU_RELAX=y -CONFIG_ARCH_HAS_DEFAULT_IDLE=y -CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y -CONFIG_HAVE_SETUP_PER_CPU_AREA=y -CONFIG_HAVE_DYNAMIC_PER_CPU_AREA=y -# CONFIG_HAVE_CPUMASK_OF_CPU_MAP is not set -CONFIG_ARCH_HIBERNATION_POSSIBLE=y -CONFIG_ARCH_SUSPEND_POSSIBLE=y -# CONFIG_ZONE_DMA32 is not set -CONFIG_ARCH_POPULATES_NODE_MAP=y -# CONFIG_AUDIT_ARCH is not set -CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y -CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y -CONFIG_GENERIC_HARDIRQS=y -CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y -CONFIG_GENERIC_IRQ_PROBE=y -CONFIG_GENERIC_PENDING_IRQ=y -CONFIG_USE_GENERIC_SMP_HELPERS=y -CONFIG_X86_32_SMP=y -CONFIG_X86_HT=y -CONFIG_X86_TRAMPOLINE=y -CONFIG_X86_32_LAZY_GS=y -CONFIG_KTIME_SCALAR=y -CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" - -# -# General setup -#  CONFIG_EXPERIMENTAL=y -CONFIG_LOCK_KERNEL=y -CONFIG_INIT_ENV_ARG_LIMIT=32 -CONFIG_LOCALVERSION=""  # CONFIG_LOCALVERSION_AUTO is not set -CONFIG_HAVE_KERNEL_GZIP=y -CONFIG_HAVE_KERNEL_BZIP2=y -CONFIG_HAVE_KERNEL_LZMA=y -CONFIG_KERNEL_GZIP=y -# CONFIG_KERNEL_BZIP2 is not set -# CONFIG_KERNEL_LZMA is not set -CONFIG_SWAP=y  CONFIG_SYSVIPC=y -CONFIG_SYSVIPC_SYSCTL=y  CONFIG_POSIX_MQUEUE=y -CONFIG_POSIX_MQUEUE_SYSCTL=y  CONFIG_BSD_PROCESS_ACCT=y -# CONFIG_BSD_PROCESS_ACCT_V3 is not set  CONFIG_TASKSTATS=y  CONFIG_TASK_DELAY_ACCT=y  CONFIG_TASK_XACCT=y  CONFIG_TASK_IO_ACCOUNTING=y  CONFIG_AUDIT=y -CONFIG_AUDITSYSCALL=y -CONFIG_AUDIT_TREE=y - -# -# RCU Subsystem -# -# CONFIG_CLASSIC_RCU is not set -CONFIG_TREE_RCU=y -# CONFIG_PREEMPT_RCU is not set -# CONFIG_RCU_TRACE is not set -CONFIG_RCU_FANOUT=32 -# CONFIG_RCU_FANOUT_EXACT is not set -# CONFIG_TREE_RCU_TRACE is not set -# CONFIG_PREEMPT_RCU_TRACE is not set -# CONFIG_IKCONFIG is not set  CONFIG_LOG_BUF_SHIFT=18 -CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y -CONFIG_GROUP_SCHED=y -CONFIG_FAIR_GROUP_SCHED=y -# CONFIG_RT_GROUP_SCHED is not set -# CONFIG_USER_SCHED is not set -CONFIG_CGROUP_SCHED=y  CONFIG_CGROUPS=y -# CONFIG_CGROUP_DEBUG is not set  CONFIG_CGROUP_NS=y  CONFIG_CGROUP_FREEZER=y -# CONFIG_CGROUP_DEVICE is not set  CONFIG_CPUSETS=y -CONFIG_PROC_PID_CPUSET=y  CONFIG_CGROUP_CPUACCT=y  CONFIG_RESOURCE_COUNTERS=y -# CONFIG_CGROUP_MEM_RES_CTLR is not set -# CONFIG_SYSFS_DEPRECATED_V2 is not set -CONFIG_RELAY=y -CONFIG_NAMESPACES=y +CONFIG_CGROUP_SCHED=y  CONFIG_UTS_NS=y  CONFIG_IPC_NS=y  CONFIG_USER_NS=y  CONFIG_PID_NS=y  CONFIG_NET_NS=y  CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="" -CONFIG_RD_GZIP=y -CONFIG_RD_BZIP2=y -CONFIG_RD_LZMA=y -CONFIG_CC_OPTIMIZE_FOR_SIZE=y -CONFIG_SYSCTL=y -CONFIG_ANON_INODES=y -# CONFIG_EMBEDDED is not set -CONFIG_UID16=y -CONFIG_SYSCTL_SYSCALL=y -CONFIG_KALLSYMS=y -CONFIG_KALLSYMS_ALL=y  CONFIG_KALLSYMS_EXTRA_PASS=y -# CONFIG_STRIP_ASM_SYMS is not set -CONFIG_HOTPLUG=y -CONFIG_PRINTK=y -CONFIG_BUG=y -CONFIG_ELF_CORE=y -CONFIG_PCSPKR_PLATFORM=y -CONFIG_BASE_FULL=y -CONFIG_FUTEX=y -CONFIG_EPOLL=y -CONFIG_SIGNALFD=y -CONFIG_TIMERFD=y -CONFIG_EVENTFD=y -CONFIG_SHMEM=y -CONFIG_AIO=y -CONFIG_VM_EVENT_COUNTERS=y -CONFIG_PCI_QUIRKS=y -CONFIG_SLUB_DEBUG=y  # CONFIG_COMPAT_BRK is not set -# CONFIG_SLAB is not set -CONFIG_SLUB=y -# CONFIG_SLOB is not set  CONFIG_PROFILING=y -CONFIG_TRACEPOINTS=y -CONFIG_MARKERS=y -# CONFIG_OPROFILE is not set -CONFIG_HAVE_OPROFILE=y  CONFIG_KPROBES=y -CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y -CONFIG_KRETPROBES=y -CONFIG_HAVE_IOREMAP_PROT=y -CONFIG_HAVE_KPROBES=y -CONFIG_HAVE_KRETPROBES=y -CONFIG_HAVE_ARCH_TRACEHOOK=y -CONFIG_HAVE_DMA_API_DEBUG=y -# CONFIG_SLOW_WORK is not set -CONFIG_HAVE_GENERIC_DMA_COHERENT=y -CONFIG_SLABINFO=y -CONFIG_RT_MUTEXES=y -CONFIG_BASE_SMALL=0  CONFIG_MODULES=y -# CONFIG_MODULE_FORCE_LOAD is not set  CONFIG_MODULE_UNLOAD=y  CONFIG_MODULE_FORCE_UNLOAD=y -# CONFIG_MODVERSIONS is not set -# CONFIG_MODULE_SRCVERSION_ALL is not set -CONFIG_STOP_MACHINE=y -CONFIG_BLOCK=y -# CONFIG_LBD is not set -CONFIG_BLK_DEV_BSG=y -# CONFIG_BLK_DEV_INTEGRITY is not set - -# -# IO Schedulers -# -CONFIG_IOSCHED_NOOP=y -CONFIG_IOSCHED_AS=y -CONFIG_IOSCHED_DEADLINE=y -CONFIG_IOSCHED_CFQ=y -# CONFIG_DEFAULT_AS is not set -# CONFIG_DEFAULT_DEADLINE is not set -CONFIG_DEFAULT_CFQ=y -# CONFIG_DEFAULT_NOOP is not set -CONFIG_DEFAULT_IOSCHED="cfq" -CONFIG_FREEZER=y - -# -# Processor type and features -# -CONFIG_TICK_ONESHOT=y  CONFIG_NO_HZ=y  CONFIG_HIGH_RES_TIMERS=y -CONFIG_GENERIC_CLOCKEVENTS_BUILD=y  CONFIG_SMP=y  CONFIG_SPARSE_IRQ=y -CONFIG_X86_MPPARSE=y -# CONFIG_X86_BIGSMP is not set -CONFIG_X86_EXTENDED_PLATFORM=y -# CONFIG_X86_ELAN is not set -# CONFIG_X86_RDC321X is not set -# CONFIG_X86_32_NON_STANDARD is not set -CONFIG_SCHED_OMIT_FRAME_POINTER=y -# CONFIG_PARAVIRT_GUEST is not set -# CONFIG_MEMTEST is not set -# CONFIG_M386 is not set -# CONFIG_M486 is not set -# CONFIG_M586 is not set -# CONFIG_M586TSC is not set -# CONFIG_M586MMX is not set -CONFIG_M686=y -# CONFIG_MPENTIUMII is not set -# CONFIG_MPENTIUMIII is not set -# CONFIG_MPENTIUMM is not set -# CONFIG_MPENTIUM4 is not set -# CONFIG_MK6 is not set -# CONFIG_MK7 is not set -# CONFIG_MK8 is not set -# CONFIG_MCRUSOE is not set -# CONFIG_MEFFICEON is not set -# CONFIG_MWINCHIPC6 is not set -# CONFIG_MWINCHIP3D is not set -# CONFIG_MGEODEGX1 is not set -# CONFIG_MGEODE_LX is not set -# CONFIG_MCYRIXIII is not set -# CONFIG_MVIAC3_2 is not set -# CONFIG_MVIAC7 is not set -# CONFIG_MPSC is not set -# CONFIG_MCORE2 is not set -# CONFIG_GENERIC_CPU is not set  CONFIG_X86_GENERIC=y -CONFIG_X86_CPU=y -CONFIG_X86_L1_CACHE_BYTES=64 -CONFIG_X86_INTERNODE_CACHE_BYTES=64 -CONFIG_X86_CMPXCHG=y -CONFIG_X86_L1_CACHE_SHIFT=5 -CONFIG_X86_XADD=y -# CONFIG_X86_PPRO_FENCE is not set -CONFIG_X86_WP_WORKS_OK=y -CONFIG_X86_INVLPG=y -CONFIG_X86_BSWAP=y -CONFIG_X86_POPAD_OK=y -CONFIG_X86_INTEL_USERCOPY=y -CONFIG_X86_USE_PPRO_CHECKSUM=y -CONFIG_X86_TSC=y -CONFIG_X86_CMOV=y -CONFIG_X86_MINIMUM_CPU_FAMILY=4 -CONFIG_X86_DEBUGCTLMSR=y -CONFIG_CPU_SUP_INTEL=y -CONFIG_CPU_SUP_CYRIX_32=y -CONFIG_CPU_SUP_AMD=y -CONFIG_CPU_SUP_CENTAUR=y -CONFIG_CPU_SUP_TRANSMETA_32=y -CONFIG_CPU_SUP_UMC_32=y -CONFIG_X86_DS=y -CONFIG_X86_PTRACE_BTS=y  CONFIG_HPET_TIMER=y -CONFIG_HPET_EMULATE_RTC=y -CONFIG_DMI=y -# CONFIG_IOMMU_HELPER is not set -# CONFIG_IOMMU_API is not set -CONFIG_NR_CPUS=64  CONFIG_SCHED_SMT=y -CONFIG_SCHED_MC=y -# CONFIG_PREEMPT_NONE is not set  CONFIG_PREEMPT_VOLUNTARY=y -# CONFIG_PREEMPT is not set -CONFIG_X86_LOCAL_APIC=y -CONFIG_X86_IO_APIC=y  CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y  CONFIG_X86_MCE=y -CONFIG_X86_MCE_NONFATAL=y -CONFIG_X86_MCE_P4THERMAL=y -CONFIG_VM86=y -# CONFIG_TOSHIBA is not set -# CONFIG_I8K is not set  CONFIG_X86_REBOOTFIXUPS=y  CONFIG_MICROCODE=y -CONFIG_MICROCODE_INTEL=y  CONFIG_MICROCODE_AMD=y -CONFIG_MICROCODE_OLD_INTERFACE=y  CONFIG_X86_MSR=y  CONFIG_X86_CPUID=y -# CONFIG_X86_CPU_DEBUG is not set -# CONFIG_NOHIGHMEM is not set -CONFIG_HIGHMEM4G=y -# CONFIG_HIGHMEM64G is not set -CONFIG_PAGE_OFFSET=0xC0000000 -CONFIG_HIGHMEM=y -# CONFIG_ARCH_PHYS_ADDR_T_64BIT is not set -CONFIG_ARCH_FLATMEM_ENABLE=y -CONFIG_ARCH_SPARSEMEM_ENABLE=y -CONFIG_ARCH_SELECT_MEMORY_MODEL=y -CONFIG_SELECT_MEMORY_MODEL=y -CONFIG_FLATMEM_MANUAL=y -# CONFIG_DISCONTIGMEM_MANUAL is not set -# CONFIG_SPARSEMEM_MANUAL is not set -CONFIG_FLATMEM=y -CONFIG_FLAT_NODE_MEM_MAP=y -CONFIG_SPARSEMEM_STATIC=y -CONFIG_PAGEFLAGS_EXTENDED=y -CONFIG_SPLIT_PTLOCK_CPUS=4 -# CONFIG_PHYS_ADDR_T_64BIT is not set -CONFIG_ZONE_DMA_FLAG=1 -CONFIG_BOUNCE=y -CONFIG_VIRT_TO_BUS=y -CONFIG_UNEVICTABLE_LRU=y -CONFIG_HAVE_MLOCK=y -CONFIG_HAVE_MLOCKED_PAGE_BIT=y  CONFIG_HIGHPTE=y  CONFIG_X86_CHECK_BIOS_CORRUPTION=y -CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK=y -CONFIG_X86_RESERVE_LOW_64K=y -# CONFIG_MATH_EMULATION is not set -CONFIG_MTRR=y  # CONFIG_MTRR_SANITIZER is not set -CONFIG_X86_PAT=y  CONFIG_EFI=y -CONFIG_SECCOMP=y -# CONFIG_CC_STACKPROTECTOR is not set -# CONFIG_HZ_100 is not set -# CONFIG_HZ_250 is not set -# CONFIG_HZ_300 is not set  CONFIG_HZ_1000=y -CONFIG_HZ=1000 -CONFIG_SCHED_HRTICK=y  CONFIG_KEXEC=y  CONFIG_CRASH_DUMP=y -# CONFIG_KEXEC_JUMP is not set -CONFIG_PHYSICAL_START=0x1000000 -CONFIG_RELOCATABLE=y -CONFIG_X86_NEED_RELOCS=y -CONFIG_PHYSICAL_ALIGN=0x1000000 -CONFIG_HOTPLUG_CPU=y  # CONFIG_COMPAT_VDSO is not set -# CONFIG_CMDLINE_BOOL is not set -CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y - -# -# Power management and ACPI options -#  CONFIG_PM=y  CONFIG_PM_DEBUG=y -# CONFIG_PM_VERBOSE is not set -CONFIG_CAN_PM_TRACE=y -CONFIG_PM_TRACE=y  CONFIG_PM_TRACE_RTC=y -CONFIG_PM_SLEEP_SMP=y -CONFIG_PM_SLEEP=y -CONFIG_SUSPEND=y -# CONFIG_PM_TEST_SUSPEND is not set -CONFIG_SUSPEND_FREEZER=y  CONFIG_HIBERNATION=y -CONFIG_PM_STD_PARTITION="" -CONFIG_ACPI=y -CONFIG_ACPI_SLEEP=y  CONFIG_ACPI_PROCFS=y -CONFIG_ACPI_PROCFS_POWER=y -CONFIG_ACPI_SYSFS_POWER=y -CONFIG_ACPI_PROC_EVENT=y -CONFIG_ACPI_AC=y -CONFIG_ACPI_BATTERY=y -CONFIG_ACPI_BUTTON=y -CONFIG_ACPI_FAN=y  CONFIG_ACPI_DOCK=y -CONFIG_ACPI_PROCESSOR=y -CONFIG_ACPI_HOTPLUG_CPU=y -CONFIG_ACPI_THERMAL=y -# CONFIG_ACPI_CUSTOM_DSDT is not set -CONFIG_ACPI_BLACKLIST_YEAR=0 -# CONFIG_ACPI_DEBUG is not set -# CONFIG_ACPI_PCI_SLOT is not set -CONFIG_X86_PM_TIMER=y -CONFIG_ACPI_CONTAINER=y -# CONFIG_ACPI_SBS is not set -# CONFIG_APM is not set - -# -# CPU Frequency scaling -#  CONFIG_CPU_FREQ=y -CONFIG_CPU_FREQ_TABLE=y  CONFIG_CPU_FREQ_DEBUG=y  # CONFIG_CPU_FREQ_STAT is not set -# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set -# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set  CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y -# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set -# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set  CONFIG_CPU_FREQ_GOV_PERFORMANCE=y -# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set -CONFIG_CPU_FREQ_GOV_USERSPACE=y  CONFIG_CPU_FREQ_GOV_ONDEMAND=y -# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set - -# -# CPUFreq processor drivers -#  CONFIG_X86_ACPI_CPUFREQ=y -# CONFIG_X86_POWERNOW_K6 is not set -# CONFIG_X86_POWERNOW_K7 is not set -# CONFIG_X86_POWERNOW_K8 is not set -# CONFIG_X86_GX_SUSPMOD is not set -# CONFIG_X86_SPEEDSTEP_CENTRINO is not set -# CONFIG_X86_SPEEDSTEP_ICH is not set -# CONFIG_X86_SPEEDSTEP_SMI is not set -# CONFIG_X86_P4_CLOCKMOD is not set -# CONFIG_X86_CPUFREQ_NFORCE2 is not set -# CONFIG_X86_LONGRUN is not set -# CONFIG_X86_LONGHAUL is not set -# CONFIG_X86_E_POWERSAVER is not set - -# -# shared options -# -# CONFIG_X86_SPEEDSTEP_LIB is not set -CONFIG_CPU_IDLE=y -CONFIG_CPU_IDLE_GOV_LADDER=y -CONFIG_CPU_IDLE_GOV_MENU=y - -# -# Bus options (PCI etc.) -# -CONFIG_PCI=y -# CONFIG_PCI_GOBIOS is not set -# CONFIG_PCI_GOMMCONFIG is not set -# CONFIG_PCI_GODIRECT is not set -# CONFIG_PCI_GOOLPC is not set -CONFIG_PCI_GOANY=y -CONFIG_PCI_BIOS=y -CONFIG_PCI_DIRECT=y -CONFIG_PCI_MMCONFIG=y -CONFIG_PCI_DOMAINS=y -# CONFIG_DMAR is not set  CONFIG_PCIEPORTBUS=y -# CONFIG_HOTPLUG_PCI_PCIE is not set -CONFIG_PCIEAER=y -# CONFIG_PCIEASPM is not set -CONFIG_ARCH_SUPPORTS_MSI=y  CONFIG_PCI_MSI=y -# CONFIG_PCI_LEGACY is not set -# CONFIG_PCI_DEBUG is not set -# CONFIG_PCI_STUB is not set -CONFIG_HT_IRQ=y -# CONFIG_PCI_IOV is not set -CONFIG_ISA_DMA_API=y -# CONFIG_ISA is not set -# CONFIG_MCA is not set -# CONFIG_SCx200 is not set -# CONFIG_OLPC is not set -CONFIG_K8_NB=y  CONFIG_PCCARD=y -# CONFIG_PCMCIA_DEBUG is not set -CONFIG_PCMCIA=y -CONFIG_PCMCIA_LOAD_CIS=y -CONFIG_PCMCIA_IOCTL=y -CONFIG_CARDBUS=y - -# -# PC-card bridges -#  CONFIG_YENTA=y -CONFIG_YENTA_O2=y -CONFIG_YENTA_RICOH=y -CONFIG_YENTA_TI=y -CONFIG_YENTA_ENE_TUNE=y -CONFIG_YENTA_TOSHIBA=y -# CONFIG_PD6729 is not set -# CONFIG_I82092 is not set -CONFIG_PCCARD_NONSTATIC=y  CONFIG_HOTPLUG_PCI=y -# CONFIG_HOTPLUG_PCI_FAKE is not set -# CONFIG_HOTPLUG_PCI_IBM is not set -# CONFIG_HOTPLUG_PCI_ACPI is not set -# CONFIG_HOTPLUG_PCI_CPCI is not set -# CONFIG_HOTPLUG_PCI_SHPC is not set - -# -# Executable file formats / Emulations -# -CONFIG_BINFMT_ELF=y  CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y -CONFIG_HAVE_AOUT=y -# CONFIG_BINFMT_AOUT is not set  CONFIG_BINFMT_MISC=y -CONFIG_HAVE_ATOMIC_IOMAP=y  CONFIG_NET=y - -# -# Networking options -#  CONFIG_PACKET=y -CONFIG_PACKET_MMAP=y  CONFIG_UNIX=y -CONFIG_XFRM=y  CONFIG_XFRM_USER=y -# CONFIG_XFRM_SUB_POLICY is not set -# CONFIG_XFRM_MIGRATE is not set -# CONFIG_XFRM_STATISTICS is not set -# CONFIG_NET_KEY is not set  CONFIG_INET=y  CONFIG_IP_MULTICAST=y  CONFIG_IP_ADVANCED_ROUTER=y -CONFIG_ASK_IP_FIB_HASH=y -# CONFIG_IP_FIB_TRIE is not set -CONFIG_IP_FIB_HASH=y  CONFIG_IP_MULTIPLE_TABLES=y  CONFIG_IP_ROUTE_MULTIPATH=y  CONFIG_IP_ROUTE_VERBOSE=y @@ -526,118 +86,46 @@ CONFIG_IP_PNP=y  CONFIG_IP_PNP_DHCP=y  CONFIG_IP_PNP_BOOTP=y  CONFIG_IP_PNP_RARP=y -# CONFIG_NET_IPIP is not set -# CONFIG_NET_IPGRE is not set  CONFIG_IP_MROUTE=y  CONFIG_IP_PIMSM_V1=y  CONFIG_IP_PIMSM_V2=y -# CONFIG_ARPD is not set  CONFIG_SYN_COOKIES=y -# CONFIG_INET_AH is not set -# CONFIG_INET_ESP is not set -# CONFIG_INET_IPCOMP is not set -# CONFIG_INET_XFRM_TUNNEL is not set -CONFIG_INET_TUNNEL=y  # CONFIG_INET_XFRM_MODE_TRANSPORT is not set  # CONFIG_INET_XFRM_MODE_TUNNEL is not set  # CONFIG_INET_XFRM_MODE_BEET is not set -CONFIG_INET_LRO=y  # CONFIG_INET_DIAG is not set  CONFIG_TCP_CONG_ADVANCED=y  # CONFIG_TCP_CONG_BIC is not set -CONFIG_TCP_CONG_CUBIC=y  # CONFIG_TCP_CONG_WESTWOOD is not set  # CONFIG_TCP_CONG_HTCP is not set -# CONFIG_TCP_CONG_HSTCP is not set -# CONFIG_TCP_CONG_HYBLA is not set -# CONFIG_TCP_CONG_VEGAS is not set -# CONFIG_TCP_CONG_SCALABLE is not set -# CONFIG_TCP_CONG_LP is not set -# CONFIG_TCP_CONG_VENO is not set -# CONFIG_TCP_CONG_YEAH is not set -# CONFIG_TCP_CONG_ILLINOIS is not set -# CONFIG_DEFAULT_BIC is not set -CONFIG_DEFAULT_CUBIC=y -# CONFIG_DEFAULT_HTCP is not set -# CONFIG_DEFAULT_VEGAS is not set -# CONFIG_DEFAULT_WESTWOOD is not set -# CONFIG_DEFAULT_RENO is not set -CONFIG_DEFAULT_TCP_CONG="cubic"  CONFIG_TCP_MD5SIG=y  CONFIG_IPV6=y -# CONFIG_IPV6_PRIVACY is not set -# CONFIG_IPV6_ROUTER_PREF is not set -# CONFIG_IPV6_OPTIMISTIC_DAD is not set  CONFIG_INET6_AH=y  CONFIG_INET6_ESP=y -# CONFIG_INET6_IPCOMP is not set -# CONFIG_IPV6_MIP6 is not set -# CONFIG_INET6_XFRM_TUNNEL is not set -# CONFIG_INET6_TUNNEL is not set -CONFIG_INET6_XFRM_MODE_TRANSPORT=y -CONFIG_INET6_XFRM_MODE_TUNNEL=y -CONFIG_INET6_XFRM_MODE_BEET=y -# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set -CONFIG_IPV6_SIT=y -CONFIG_IPV6_NDISC_NODETYPE=y -# CONFIG_IPV6_TUNNEL is not set -# CONFIG_IPV6_MULTIPLE_TABLES is not set -# CONFIG_IPV6_MROUTE is not set  CONFIG_NETLABEL=y -CONFIG_NETWORK_SECMARK=y  CONFIG_NETFILTER=y -# CONFIG_NETFILTER_DEBUG is not set  # CONFIG_NETFILTER_ADVANCED is not set - -# -# Core Netfilter Configuration -# -CONFIG_NETFILTER_NETLINK=y -CONFIG_NETFILTER_NETLINK_LOG=y  CONFIG_NF_CONNTRACK=y -CONFIG_NF_CONNTRACK_SECMARK=y  CONFIG_NF_CONNTRACK_FTP=y  CONFIG_NF_CONNTRACK_IRC=y  CONFIG_NF_CONNTRACK_SIP=y  CONFIG_NF_CT_NETLINK=y -CONFIG_NETFILTER_XTABLES=y  CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y -CONFIG_NETFILTER_XT_TARGET_MARK=y  CONFIG_NETFILTER_XT_TARGET_NFLOG=y  CONFIG_NETFILTER_XT_TARGET_SECMARK=y  CONFIG_NETFILTER_XT_TARGET_TCPMSS=y  CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y -CONFIG_NETFILTER_XT_MATCH_MARK=y  CONFIG_NETFILTER_XT_MATCH_POLICY=y  CONFIG_NETFILTER_XT_MATCH_STATE=y -# CONFIG_IP_VS is not set - -# -# IP: Netfilter Configuration -# -CONFIG_NF_DEFRAG_IPV4=y  CONFIG_NF_CONNTRACK_IPV4=y -CONFIG_NF_CONNTRACK_PROC_COMPAT=y  CONFIG_IP_NF_IPTABLES=y  CONFIG_IP_NF_FILTER=y  CONFIG_IP_NF_TARGET_REJECT=y  CONFIG_IP_NF_TARGET_LOG=y  CONFIG_IP_NF_TARGET_ULOG=y  CONFIG_NF_NAT=y -CONFIG_NF_NAT_NEEDED=y  CONFIG_IP_NF_TARGET_MASQUERADE=y -CONFIG_NF_NAT_FTP=y -CONFIG_NF_NAT_IRC=y -# CONFIG_NF_NAT_TFTP is not set -# CONFIG_NF_NAT_AMANDA is not set -# CONFIG_NF_NAT_PPTP is not set -# CONFIG_NF_NAT_H323 is not set -CONFIG_NF_NAT_SIP=y  CONFIG_IP_NF_MANGLE=y - -# -# IPv6: Netfilter Configuration -#  CONFIG_NF_CONNTRACK_IPV6=y  CONFIG_IP6_NF_IPTABLES=y  CONFIG_IP6_NF_MATCH_IPV6HEADER=y @@ -645,1228 +133,115 @@ CONFIG_IP6_NF_TARGET_LOG=y  CONFIG_IP6_NF_FILTER=y  CONFIG_IP6_NF_TARGET_REJECT=y  CONFIG_IP6_NF_MANGLE=y -# CONFIG_IP_DCCP is not set -# CONFIG_IP_SCTP is not set -# CONFIG_TIPC is not set -# CONFIG_ATM is not set -# CONFIG_BRIDGE is not set -# CONFIG_NET_DSA is not set -# CONFIG_VLAN_8021Q is not set -# CONFIG_DECNET is not set -CONFIG_LLC=y -# CONFIG_LLC2 is not set -# CONFIG_IPX is not set -# CONFIG_ATALK is not set -# CONFIG_X25 is not set -# CONFIG_LAPB is not set -# CONFIG_ECONET is not set -# CONFIG_WAN_ROUTER is not set -# CONFIG_PHONET is not set  CONFIG_NET_SCHED=y - -# -# Queueing/Scheduling -# -# CONFIG_NET_SCH_CBQ is not set -# CONFIG_NET_SCH_HTB is not set -# CONFIG_NET_SCH_HFSC is not set -# CONFIG_NET_SCH_PRIO is not set -# CONFIG_NET_SCH_MULTIQ is not set -# CONFIG_NET_SCH_RED is not set -# CONFIG_NET_SCH_SFQ is not set -# CONFIG_NET_SCH_TEQL is not set -# CONFIG_NET_SCH_TBF is not set -# CONFIG_NET_SCH_GRED is not set -# CONFIG_NET_SCH_DSMARK is not set -# CONFIG_NET_SCH_NETEM is not set -# CONFIG_NET_SCH_DRR is not set -# CONFIG_NET_SCH_INGRESS is not set - -# -# Classification -# -CONFIG_NET_CLS=y -# CONFIG_NET_CLS_BASIC is not set -# CONFIG_NET_CLS_TCINDEX is not set -# CONFIG_NET_CLS_ROUTE4 is not set -# CONFIG_NET_CLS_FW is not set -# CONFIG_NET_CLS_U32 is not set -# CONFIG_NET_CLS_RSVP is not set -# CONFIG_NET_CLS_RSVP6 is not set -# CONFIG_NET_CLS_FLOW is not set -# CONFIG_NET_CLS_CGROUP is not set  CONFIG_NET_EMATCH=y -CONFIG_NET_EMATCH_STACK=32 -# CONFIG_NET_EMATCH_CMP is not set -# CONFIG_NET_EMATCH_NBYTE is not set -# CONFIG_NET_EMATCH_U32 is not set -# CONFIG_NET_EMATCH_META is not set -# CONFIG_NET_EMATCH_TEXT is not set  CONFIG_NET_CLS_ACT=y -# CONFIG_NET_ACT_POLICE is not set -# CONFIG_NET_ACT_GACT is not set -# CONFIG_NET_ACT_MIRRED is not set -# CONFIG_NET_ACT_IPT is not set -# CONFIG_NET_ACT_NAT is not set -# CONFIG_NET_ACT_PEDIT is not set -# CONFIG_NET_ACT_SIMP is not set -# CONFIG_NET_ACT_SKBEDIT is not set -CONFIG_NET_SCH_FIFO=y -# CONFIG_DCB is not set - -# -# Network testing -# -# CONFIG_NET_PKTGEN is not set -# CONFIG_NET_TCPPROBE is not set -# CONFIG_NET_DROP_MONITOR is not set  CONFIG_HAMRADIO=y - -# -# Packet Radio protocols -# -# CONFIG_AX25 is not set -# CONFIG_CAN is not set -# CONFIG_IRDA is not set -# CONFIG_BT is not set -# CONFIG_AF_RXRPC is not set -CONFIG_FIB_RULES=y -CONFIG_WIRELESS=y  CONFIG_CFG80211=y -# CONFIG_CFG80211_REG_DEBUG is not set -CONFIG_WIRELESS_OLD_REGULATORY=y -CONFIG_WIRELESS_EXT=y -CONFIG_WIRELESS_EXT_SYSFS=y -# CONFIG_LIB80211 is not set  CONFIG_MAC80211=y - -# -# Rate control algorithm selection -# -CONFIG_MAC80211_RC_MINSTREL=y -# CONFIG_MAC80211_RC_DEFAULT_PID is not set -CONFIG_MAC80211_RC_DEFAULT_MINSTREL=y -CONFIG_MAC80211_RC_DEFAULT="minstrel" -# CONFIG_MAC80211_MESH is not set  CONFIG_MAC80211_LEDS=y -# CONFIG_MAC80211_DEBUGFS is not set -# CONFIG_MAC80211_DEBUG_MENU is not set -# CONFIG_WIMAX is not set  CONFIG_RFKILL=y -# CONFIG_RFKILL_INPUT is not set -CONFIG_RFKILL_LEDS=y -# CONFIG_NET_9P is not set - -# -# Device Drivers -# - -# -# Generic Driver Options -#  CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -CONFIG_STANDALONE=y -CONFIG_PREVENT_FIRMWARE_BUILD=y -CONFIG_FW_LOADER=y -CONFIG_FIRMWARE_IN_KERNEL=y -CONFIG_EXTRA_FIRMWARE="" -# CONFIG_DEBUG_DRIVER is not set  CONFIG_DEBUG_DEVRES=y -# CONFIG_SYS_HYPERVISOR is not set  CONFIG_CONNECTOR=y -CONFIG_PROC_EVENTS=y -# CONFIG_MTD is not set -# CONFIG_PARPORT is not set -CONFIG_PNP=y -CONFIG_PNP_DEBUG_MESSAGES=y - -# -# Protocols -# -CONFIG_PNPACPI=y -CONFIG_BLK_DEV=y -# CONFIG_BLK_DEV_FD is not set -# CONFIG_BLK_CPQ_DA is not set -# CONFIG_BLK_CPQ_CISS_DA is not set -# CONFIG_BLK_DEV_DAC960 is not set -# CONFIG_BLK_DEV_UMEM is not set -# CONFIG_BLK_DEV_COW_COMMON is not set  CONFIG_BLK_DEV_LOOP=y -# CONFIG_BLK_DEV_CRYPTOLOOP is not set -# CONFIG_BLK_DEV_NBD is not set -# CONFIG_BLK_DEV_SX8 is not set -# CONFIG_BLK_DEV_UB is not set  CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_COUNT=16  CONFIG_BLK_DEV_RAM_SIZE=16384 -# CONFIG_BLK_DEV_XIP is not set -# CONFIG_CDROM_PKTCDVD is not set -# CONFIG_ATA_OVER_ETH is not set -# CONFIG_BLK_DEV_HD is not set -CONFIG_MISC_DEVICES=y -# CONFIG_IBM_ASM is not set -# CONFIG_PHANTOM is not set -# CONFIG_SGI_IOC4 is not set -# CONFIG_TIFM_CORE is not set -# CONFIG_ICS932S401 is not set -# CONFIG_ENCLOSURE_SERVICES is not set -# CONFIG_HP_ILO is not set -# CONFIG_ISL29003 is not set -# CONFIG_C2PORT is not set - -# -# EEPROM support -# -# CONFIG_EEPROM_AT24 is not set -# CONFIG_EEPROM_LEGACY is not set -# CONFIG_EEPROM_93CX6 is not set -CONFIG_HAVE_IDE=y -# CONFIG_IDE is not set - -# -# SCSI device support -# -# CONFIG_RAID_ATTRS is not set -CONFIG_SCSI=y -CONFIG_SCSI_DMA=y -# CONFIG_SCSI_TGT is not set -# CONFIG_SCSI_NETLINK is not set -CONFIG_SCSI_PROC_FS=y - -# -# SCSI support type (disk, tape, CD-ROM) -#  CONFIG_BLK_DEV_SD=y -# CONFIG_CHR_DEV_ST is not set -# CONFIG_CHR_DEV_OSST is not set  CONFIG_BLK_DEV_SR=y  CONFIG_BLK_DEV_SR_VENDOR=y  CONFIG_CHR_DEV_SG=y -# CONFIG_CHR_DEV_SCH is not set - -# -# Some SCSI devices (e.g. CD jukebox) support multiple LUNs -# -# CONFIG_SCSI_MULTI_LUN is not set  CONFIG_SCSI_CONSTANTS=y -# CONFIG_SCSI_LOGGING is not set -# CONFIG_SCSI_SCAN_ASYNC is not set -CONFIG_SCSI_WAIT_SCAN=m - -# -# SCSI Transports -#  CONFIG_SCSI_SPI_ATTRS=y -# CONFIG_SCSI_FC_ATTRS is not set -# CONFIG_SCSI_ISCSI_ATTRS is not set -# CONFIG_SCSI_SAS_ATTRS is not set -# CONFIG_SCSI_SAS_LIBSAS is not set -# CONFIG_SCSI_SRP_ATTRS is not set  # CONFIG_SCSI_LOWLEVEL is not set -# CONFIG_SCSI_LOWLEVEL_PCMCIA is not set -# CONFIG_SCSI_DH is not set -# CONFIG_SCSI_OSD_INITIATOR is not set  CONFIG_ATA=y -# CONFIG_ATA_NONSTANDARD is not set -CONFIG_ATA_ACPI=y -CONFIG_SATA_PMP=y  CONFIG_SATA_AHCI=y -# CONFIG_SATA_SIL24 is not set -CONFIG_ATA_SFF=y -# CONFIG_SATA_SVW is not set  CONFIG_ATA_PIIX=y -# CONFIG_SATA_MV is not set -# CONFIG_SATA_NV is not set -# CONFIG_PDC_ADMA is not set -# CONFIG_SATA_QSTOR is not set -# CONFIG_SATA_PROMISE is not set -# CONFIG_SATA_SX4 is not set -# CONFIG_SATA_SIL is not set -# CONFIG_SATA_SIS is not set -# CONFIG_SATA_ULI is not set -# CONFIG_SATA_VIA is not set -# CONFIG_SATA_VITESSE is not set -# CONFIG_SATA_INIC162X is not set -# CONFIG_PATA_ACPI is not set -# CONFIG_PATA_ALI is not set  CONFIG_PATA_AMD=y -# CONFIG_PATA_ARTOP is not set -# CONFIG_PATA_ATIIXP is not set -# CONFIG_PATA_CMD640_PCI is not set -# CONFIG_PATA_CMD64X is not set -# CONFIG_PATA_CS5520 is not set -# CONFIG_PATA_CS5530 is not set -# CONFIG_PATA_CS5535 is not set -# CONFIG_PATA_CS5536 is not set -# CONFIG_PATA_CYPRESS is not set -# CONFIG_PATA_EFAR is not set -CONFIG_ATA_GENERIC=y -# CONFIG_PATA_HPT366 is not set -# CONFIG_PATA_HPT37X is not set -# CONFIG_PATA_HPT3X2N is not set -# CONFIG_PATA_HPT3X3 is not set -# CONFIG_PATA_IT821X is not set -# CONFIG_PATA_IT8213 is not set -# CONFIG_PATA_JMICRON is not set -# CONFIG_PATA_TRIFLEX is not set -# CONFIG_PATA_MARVELL is not set -CONFIG_PATA_MPIIX=y  CONFIG_PATA_OLDPIIX=y -# CONFIG_PATA_NETCELL is not set -# CONFIG_PATA_NINJA32 is not set -# CONFIG_PATA_NS87410 is not set -# CONFIG_PATA_NS87415 is not set -# CONFIG_PATA_OPTI is not set -# CONFIG_PATA_OPTIDMA is not set -# CONFIG_PATA_PCMCIA is not set -# CONFIG_PATA_PDC_OLD is not set -# CONFIG_PATA_RADISYS is not set -# CONFIG_PATA_RZ1000 is not set -# CONFIG_PATA_SC1200 is not set -# CONFIG_PATA_SERVERWORKS is not set -# CONFIG_PATA_PDC2027X is not set -# CONFIG_PATA_SIL680 is not set -# CONFIG_PATA_SIS is not set -# CONFIG_PATA_VIA is not set -# CONFIG_PATA_WINBOND is not set  CONFIG_PATA_SCH=y +CONFIG_PATA_MPIIX=y +CONFIG_ATA_GENERIC=y  CONFIG_MD=y  CONFIG_BLK_DEV_MD=y -CONFIG_MD_AUTODETECT=y -# CONFIG_MD_LINEAR is not set -# CONFIG_MD_RAID0 is not set -# CONFIG_MD_RAID1 is not set -# CONFIG_MD_RAID10 is not set -# CONFIG_MD_RAID456 is not set -# CONFIG_MD_MULTIPATH is not set -# CONFIG_MD_FAULTY is not set  CONFIG_BLK_DEV_DM=y -# CONFIG_DM_DEBUG is not set -# CONFIG_DM_CRYPT is not set -# CONFIG_DM_SNAPSHOT is not set  CONFIG_DM_MIRROR=y  CONFIG_DM_ZERO=y -# CONFIG_DM_MULTIPATH is not set -# CONFIG_DM_DELAY is not set -# CONFIG_DM_UEVENT is not set -# CONFIG_FUSION is not set - -# -# IEEE 1394 (FireWire) support -# - -# -# Enable only one of the two stacks, unless you know what you are doing -# -# CONFIG_FIREWIRE is not set -# CONFIG_IEEE1394 is not set -# CONFIG_I2O is not set  CONFIG_MACINTOSH_DRIVERS=y  CONFIG_MAC_EMUMOUSEBTN=y  CONFIG_NETDEVICES=y -CONFIG_COMPAT_NET_DEV_OPS=y -# CONFIG_IFB is not set -# CONFIG_DUMMY is not set -# CONFIG_BONDING is not set -# CONFIG_MACVLAN is not set -# CONFIG_EQUALIZER is not set -# CONFIG_TUN is not set -# CONFIG_VETH is not set -# CONFIG_NET_SB1000 is not set -# CONFIG_ARCNET is not set -CONFIG_PHYLIB=y - -# -# MII PHY device drivers -# -# CONFIG_MARVELL_PHY is not set -# CONFIG_DAVICOM_PHY is not set -# CONFIG_QSEMI_PHY is not set -# CONFIG_LXT_PHY is not set -# CONFIG_CICADA_PHY is not set -# CONFIG_VITESSE_PHY is not set -# CONFIG_SMSC_PHY is not set -# CONFIG_BROADCOM_PHY is not set -# CONFIG_ICPLUS_PHY is not set -# CONFIG_REALTEK_PHY is not set -# CONFIG_NATIONAL_PHY is not set -# CONFIG_STE10XP is not set -# CONFIG_LSI_ET1011C_PHY is not set -# CONFIG_FIXED_PHY is not set -# CONFIG_MDIO_BITBANG is not set  CONFIG_NET_ETHERNET=y -CONFIG_MII=y -# CONFIG_HAPPYMEAL is not set -# CONFIG_SUNGEM is not set -# CONFIG_CASSINI is not set  CONFIG_NET_VENDOR_3COM=y -# CONFIG_VORTEX is not set -# CONFIG_TYPHOON is not set -# CONFIG_ETHOC is not set -# CONFIG_DNET is not set  CONFIG_NET_TULIP=y -# CONFIG_DE2104X is not set -# CONFIG_TULIP is not set -# CONFIG_DE4X5 is not set -# CONFIG_WINBOND_840 is not set -# CONFIG_DM9102 is not set -# CONFIG_ULI526X is not set -# CONFIG_PCMCIA_XIRCOM is not set -# CONFIG_HP100 is not set -# CONFIG_IBM_NEW_EMAC_ZMII is not set -# CONFIG_IBM_NEW_EMAC_RGMII is not set -# CONFIG_IBM_NEW_EMAC_TAH is not set -# CONFIG_IBM_NEW_EMAC_EMAC4 is not set -# CONFIG_IBM_NEW_EMAC_NO_FLOW_CTRL is not set -# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set -# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set  CONFIG_NET_PCI=y -# CONFIG_PCNET32 is not set -# CONFIG_AMD8111_ETH is not set -# CONFIG_ADAPTEC_STARFIRE is not set -# CONFIG_B44 is not set  CONFIG_FORCEDETH=y -# CONFIG_FORCEDETH_NAPI is not set  CONFIG_E100=y -# CONFIG_FEALNX is not set -# CONFIG_NATSEMI is not set  CONFIG_NE2K_PCI=y -# CONFIG_8139CP is not set  CONFIG_8139TOO=y  # CONFIG_8139TOO_PIO is not set -# CONFIG_8139TOO_TUNE_TWISTER is not set -# CONFIG_8139TOO_8129 is not set -# CONFIG_8139_OLD_RX_RESET is not set -# CONFIG_R6040 is not set -# CONFIG_SIS900 is not set -# CONFIG_EPIC100 is not set -# CONFIG_SMSC9420 is not set -# CONFIG_SUNDANCE is not set -# CONFIG_TLAN is not set -# CONFIG_VIA_RHINE is not set -# CONFIG_SC92031 is not set -# CONFIG_ATL2 is not set -CONFIG_NETDEV_1000=y -# CONFIG_ACENIC is not set -# CONFIG_DL2K is not set  CONFIG_E1000=y  CONFIG_E1000E=y -# CONFIG_IP1000 is not set -# CONFIG_IGB is not set -# CONFIG_IGBVF is not set -# CONFIG_NS83820 is not set -# CONFIG_HAMACHI is not set -# CONFIG_YELLOWFIN is not set  CONFIG_R8169=y -# CONFIG_SIS190 is not set -# CONFIG_SKGE is not set  CONFIG_SKY2=y -# CONFIG_SKY2_DEBUG is not set -# CONFIG_VIA_VELOCITY is not set  CONFIG_TIGON3=y  CONFIG_BNX2=y -# CONFIG_QLA3XXX is not set -# CONFIG_ATL1 is not set -# CONFIG_ATL1E is not set -# CONFIG_ATL1C is not set -# CONFIG_JME is not set -CONFIG_NETDEV_10000=y -# CONFIG_CHELSIO_T1 is not set -CONFIG_CHELSIO_T3_DEPENDS=y -# CONFIG_CHELSIO_T3 is not set -# CONFIG_ENIC is not set -# CONFIG_IXGBE is not set -# CONFIG_IXGB is not set -# CONFIG_S2IO is not set -# CONFIG_VXGE is not set -# CONFIG_MYRI10GE is not set -# CONFIG_NETXEN_NIC is not set -# CONFIG_NIU is not set -# CONFIG_MLX4_EN is not set -# CONFIG_MLX4_CORE is not set -# CONFIG_TEHUTI is not set -# CONFIG_BNX2X is not set -# CONFIG_QLGE is not set -# CONFIG_SFC is not set -# CONFIG_BE2NET is not set  CONFIG_TR=y -# CONFIG_IBMOL is not set -# CONFIG_IBMLS is not set -# CONFIG_3C359 is not set -# CONFIG_TMS380TR is not set - -# -# Wireless LAN -# -# CONFIG_WLAN_PRE80211 is not set -CONFIG_WLAN_80211=y -# CONFIG_PCMCIA_RAYCS is not set -# CONFIG_LIBERTAS is not set -# CONFIG_LIBERTAS_THINFIRM is not set -# CONFIG_AIRO is not set -# CONFIG_ATMEL is not set -# CONFIG_AT76C50X_USB is not set -# CONFIG_AIRO_CS is not set -# CONFIG_PCMCIA_WL3501 is not set -# CONFIG_PRISM54 is not set -# CONFIG_USB_ZD1201 is not set -# CONFIG_USB_NET_RNDIS_WLAN is not set -# CONFIG_RTL8180 is not set -# CONFIG_RTL8187 is not set -# CONFIG_ADM8211 is not set -# CONFIG_MAC80211_HWSIM is not set -# CONFIG_MWL8K is not set -# CONFIG_P54_COMMON is not set -CONFIG_ATH5K=y -# CONFIG_ATH5K_DEBUG is not set -# CONFIG_ATH9K is not set -# CONFIG_AR9170_USB is not set -# CONFIG_IPW2100 is not set -# CONFIG_IPW2200 is not set -# CONFIG_IWLWIFI is not set -# CONFIG_HOSTAP is not set -# CONFIG_B43 is not set -# CONFIG_B43LEGACY is not set -# CONFIG_ZD1211RW is not set -# CONFIG_RT2X00 is not set -# CONFIG_HERMES is not set - -# -# Enable WiMAX (Networking options) to see the WiMAX drivers -# - -# -# USB Network Adapters -# -# CONFIG_USB_CATC is not set -# CONFIG_USB_KAWETH is not set -# CONFIG_USB_PEGASUS is not set -# CONFIG_USB_RTL8150 is not set -# CONFIG_USB_USBNET is not set -# CONFIG_USB_HSO is not set  CONFIG_NET_PCMCIA=y -# CONFIG_PCMCIA_3C589 is not set -# CONFIG_PCMCIA_3C574 is not set -# CONFIG_PCMCIA_FMVJ18X is not set -# CONFIG_PCMCIA_PCNET is not set -# CONFIG_PCMCIA_NMCLAN is not set -# CONFIG_PCMCIA_SMC91C92 is not set -# CONFIG_PCMCIA_XIRC2PS is not set -# CONFIG_PCMCIA_AXNET is not set -# CONFIG_PCMCIA_IBMTR is not set -# CONFIG_WAN is not set  CONFIG_FDDI=y -# CONFIG_DEFXX is not set -# CONFIG_SKFP is not set -# CONFIG_HIPPI is not set -# CONFIG_PPP is not set -# CONFIG_SLIP is not set -# CONFIG_NET_FC is not set  CONFIG_NETCONSOLE=y -# CONFIG_NETCONSOLE_DYNAMIC is not set -CONFIG_NETPOLL=y -# CONFIG_NETPOLL_TRAP is not set -CONFIG_NET_POLL_CONTROLLER=y -# CONFIG_ISDN is not set -# CONFIG_PHONE is not set - -# -# Input device support -# -CONFIG_INPUT=y -CONFIG_INPUT_FF_MEMLESS=y  CONFIG_INPUT_POLLDEV=y - -# -# Userland interfaces -# -CONFIG_INPUT_MOUSEDEV=y  # CONFIG_INPUT_MOUSEDEV_PSAUX is not set -CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 -CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 -# CONFIG_INPUT_JOYDEV is not set  CONFIG_INPUT_EVDEV=y -# CONFIG_INPUT_EVBUG is not set - -# -# Input Device Drivers -# -CONFIG_INPUT_KEYBOARD=y -CONFIG_KEYBOARD_ATKBD=y -# CONFIG_KEYBOARD_SUNKBD is not set -# CONFIG_KEYBOARD_LKKBD is not set -# CONFIG_KEYBOARD_XTKBD is not set -# CONFIG_KEYBOARD_NEWTON is not set -# CONFIG_KEYBOARD_STOWAWAY is not set -CONFIG_INPUT_MOUSE=y -CONFIG_MOUSE_PS2=y -CONFIG_MOUSE_PS2_ALPS=y -CONFIG_MOUSE_PS2_LOGIPS2PP=y -CONFIG_MOUSE_PS2_SYNAPTICS=y -CONFIG_MOUSE_PS2_LIFEBOOK=y -CONFIG_MOUSE_PS2_TRACKPOINT=y -# CONFIG_MOUSE_PS2_ELANTECH is not set -# CONFIG_MOUSE_PS2_TOUCHKIT is not set -# CONFIG_MOUSE_SERIAL is not set -# CONFIG_MOUSE_APPLETOUCH is not set -# CONFIG_MOUSE_BCM5974 is not set -# CONFIG_MOUSE_VSXXXAA is not set  CONFIG_INPUT_JOYSTICK=y -# CONFIG_JOYSTICK_ANALOG is not set -# CONFIG_JOYSTICK_A3D is not set -# CONFIG_JOYSTICK_ADI is not set -# CONFIG_JOYSTICK_COBRA is not set -# CONFIG_JOYSTICK_GF2K is not set -# CONFIG_JOYSTICK_GRIP is not set -# CONFIG_JOYSTICK_GRIP_MP is not set -# CONFIG_JOYSTICK_GUILLEMOT is not set -# CONFIG_JOYSTICK_INTERACT is not set -# CONFIG_JOYSTICK_SIDEWINDER is not set -# CONFIG_JOYSTICK_TMDC is not set -# CONFIG_JOYSTICK_IFORCE is not set -# CONFIG_JOYSTICK_WARRIOR is not set -# CONFIG_JOYSTICK_MAGELLAN is not set -# CONFIG_JOYSTICK_SPACEORB is not set -# CONFIG_JOYSTICK_SPACEBALL is not set -# CONFIG_JOYSTICK_STINGER is not set -# CONFIG_JOYSTICK_TWIDJOY is not set -# CONFIG_JOYSTICK_ZHENHUA is not set -# CONFIG_JOYSTICK_JOYDUMP is not set -# CONFIG_JOYSTICK_XPAD is not set  CONFIG_INPUT_TABLET=y -# CONFIG_TABLET_USB_ACECAD is not set -# CONFIG_TABLET_USB_AIPTEK is not set -# CONFIG_TABLET_USB_GTCO is not set -# CONFIG_TABLET_USB_KBTAB is not set -# CONFIG_TABLET_USB_WACOM is not set  CONFIG_INPUT_TOUCHSCREEN=y -# CONFIG_TOUCHSCREEN_AD7879_I2C is not set -# CONFIG_TOUCHSCREEN_AD7879 is not set -# CONFIG_TOUCHSCREEN_FUJITSU is not set -# CONFIG_TOUCHSCREEN_GUNZE is not set -# CONFIG_TOUCHSCREEN_ELO is not set -# CONFIG_TOUCHSCREEN_WACOM_W8001 is not set -# CONFIG_TOUCHSCREEN_MTOUCH is not set -# CONFIG_TOUCHSCREEN_INEXIO is not set -# CONFIG_TOUCHSCREEN_MK712 is not set -# CONFIG_TOUCHSCREEN_PENMOUNT is not set -# CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set -# CONFIG_TOUCHSCREEN_TOUCHWIN is not set -# CONFIG_TOUCHSCREEN_USB_COMPOSITE is not set -# CONFIG_TOUCHSCREEN_TOUCHIT213 is not set -# CONFIG_TOUCHSCREEN_TSC2007 is not set  CONFIG_INPUT_MISC=y -# CONFIG_INPUT_PCSPKR is not set -# CONFIG_INPUT_APANEL is not set -# CONFIG_INPUT_WISTRON_BTNS is not set -# CONFIG_INPUT_ATLAS_BTNS is not set -# CONFIG_INPUT_ATI_REMOTE is not set -# CONFIG_INPUT_ATI_REMOTE2 is not set -# CONFIG_INPUT_KEYSPAN_REMOTE is not set -# CONFIG_INPUT_POWERMATE is not set -# CONFIG_INPUT_YEALINK is not set -# CONFIG_INPUT_CM109 is not set -# CONFIG_INPUT_UINPUT is not set - -# -# Hardware I/O ports -# -CONFIG_SERIO=y -CONFIG_SERIO_I8042=y -CONFIG_SERIO_SERPORT=y -# CONFIG_SERIO_CT82C710 is not set -# CONFIG_SERIO_PCIPS2 is not set -CONFIG_SERIO_LIBPS2=y -# CONFIG_SERIO_RAW is not set -# CONFIG_GAMEPORT is not set - -# -# Character devices -# -CONFIG_VT=y -CONFIG_CONSOLE_TRANSLATIONS=y -CONFIG_VT_CONSOLE=y -CONFIG_HW_CONSOLE=y  CONFIG_VT_HW_CONSOLE_BINDING=y -CONFIG_DEVKMEM=y  CONFIG_SERIAL_NONSTANDARD=y -# CONFIG_COMPUTONE is not set -# CONFIG_ROCKETPORT is not set -# CONFIG_CYCLADES is not set -# CONFIG_DIGIEPCA is not set -# CONFIG_MOXA_INTELLIO is not set -# CONFIG_MOXA_SMARTIO is not set -# CONFIG_ISI is not set -# CONFIG_SYNCLINK is not set -# CONFIG_SYNCLINKMP is not set -# CONFIG_SYNCLINK_GT is not set -# CONFIG_N_HDLC is not set -# CONFIG_RISCOM8 is not set -# CONFIG_SPECIALIX is not set -# CONFIG_SX is not set -# CONFIG_RIO is not set -# CONFIG_STALDRV is not set -# CONFIG_NOZOMI is not set - -# -# Serial drivers -#  CONFIG_SERIAL_8250=y  CONFIG_SERIAL_8250_CONSOLE=y -CONFIG_FIX_EARLYCON_MEM=y -CONFIG_SERIAL_8250_PCI=y -CONFIG_SERIAL_8250_PNP=y -# CONFIG_SERIAL_8250_CS is not set  CONFIG_SERIAL_8250_NR_UARTS=32 -CONFIG_SERIAL_8250_RUNTIME_UARTS=4  CONFIG_SERIAL_8250_EXTENDED=y  CONFIG_SERIAL_8250_MANY_PORTS=y  CONFIG_SERIAL_8250_SHARE_IRQ=y  CONFIG_SERIAL_8250_DETECT_IRQ=y  CONFIG_SERIAL_8250_RSA=y - -# -# Non-8250 serial port support -# -CONFIG_SERIAL_CORE=y -CONFIG_SERIAL_CORE_CONSOLE=y -# CONFIG_SERIAL_JSM is not set -CONFIG_UNIX98_PTYS=y -# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set  # CONFIG_LEGACY_PTYS is not set -# CONFIG_IPMI_HANDLER is not set  CONFIG_HW_RANDOM=y -# CONFIG_HW_RANDOM_TIMERIOMEM is not set -CONFIG_HW_RANDOM_INTEL=y -CONFIG_HW_RANDOM_AMD=y -CONFIG_HW_RANDOM_GEODE=y -CONFIG_HW_RANDOM_VIA=y  CONFIG_NVRAM=y -# CONFIG_R3964 is not set -# CONFIG_APPLICOM is not set -# CONFIG_SONYPI is not set - -# -# PCMCIA character devices -# -# CONFIG_SYNCLINK_CS is not set -# CONFIG_CARDMAN_4000 is not set -# CONFIG_CARDMAN_4040 is not set -# CONFIG_IPWIRELESS is not set -# CONFIG_MWAVE is not set -# CONFIG_PC8736x_GPIO is not set -# CONFIG_NSC_GPIO is not set -# CONFIG_CS5535_GPIO is not set -# CONFIG_RAW_DRIVER is not set  CONFIG_HPET=y  # CONFIG_HPET_MMAP is not set -# CONFIG_HANGCHECK_TIMER is not set -# CONFIG_TCG_TPM is not set -# CONFIG_TELCLOCK is not set -CONFIG_DEVPORT=y -CONFIG_I2C=y -CONFIG_I2C_BOARDINFO=y -# CONFIG_I2C_CHARDEV is not set -CONFIG_I2C_HELPER_AUTO=y -CONFIG_I2C_ALGOBIT=y - -# -# I2C Hardware Bus support -# - -# -# PC SMBus host controller drivers -# -# CONFIG_I2C_ALI1535 is not set -# CONFIG_I2C_ALI1563 is not set -# CONFIG_I2C_ALI15X3 is not set -# CONFIG_I2C_AMD756 is not set -# CONFIG_I2C_AMD8111 is not set  CONFIG_I2C_I801=y -# CONFIG_I2C_ISCH is not set -# CONFIG_I2C_PIIX4 is not set -# CONFIG_I2C_NFORCE2 is not set -# CONFIG_I2C_SIS5595 is not set -# CONFIG_I2C_SIS630 is not set -# CONFIG_I2C_SIS96X is not set -# CONFIG_I2C_VIA is not set -# CONFIG_I2C_VIAPRO is not set - -# -# I2C system bus drivers (mostly embedded / system-on-chip) -# -# CONFIG_I2C_OCORES is not set -# CONFIG_I2C_SIMTEC is not set - -# -# External I2C/SMBus adapter drivers -# -# CONFIG_I2C_PARPORT_LIGHT is not set -# CONFIG_I2C_TAOS_EVM is not set -# CONFIG_I2C_TINY_USB is not set - -# -# Graphics adapter I2C/DDC channel drivers -# -# CONFIG_I2C_VOODOO3 is not set - -# -# Other I2C/SMBus bus drivers -# -# CONFIG_I2C_PCA_PLATFORM is not set -# CONFIG_I2C_STUB is not set -# CONFIG_SCx200_ACB is not set - -# -# Miscellaneous I2C Chip support -# -# CONFIG_DS1682 is not set -# CONFIG_SENSORS_PCF8574 is not set -# CONFIG_PCF8575 is not set -# CONFIG_SENSORS_PCA9539 is not set -# CONFIG_SENSORS_MAX6875 is not set -# CONFIG_SENSORS_TSL2550 is not set -# CONFIG_I2C_DEBUG_CORE is not set -# CONFIG_I2C_DEBUG_ALGO is not set -# CONFIG_I2C_DEBUG_BUS is not set -# CONFIG_I2C_DEBUG_CHIP is not set -# CONFIG_SPI is not set -CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y -# CONFIG_GPIOLIB is not set -# CONFIG_W1 is not set -CONFIG_POWER_SUPPLY=y -# CONFIG_POWER_SUPPLY_DEBUG is not set -# CONFIG_PDA_POWER is not set -# CONFIG_BATTERY_DS2760 is not set -# CONFIG_BATTERY_BQ27x00 is not set -CONFIG_HWMON=y -# CONFIG_HWMON_VID is not set -# CONFIG_SENSORS_ABITUGURU is not set -# CONFIG_SENSORS_ABITUGURU3 is not set -# CONFIG_SENSORS_AD7414 is not set -# CONFIG_SENSORS_AD7418 is not set -# CONFIG_SENSORS_ADM1021 is not set -# CONFIG_SENSORS_ADM1025 is not set -# CONFIG_SENSORS_ADM1026 is not set -# CONFIG_SENSORS_ADM1029 is not set -# CONFIG_SENSORS_ADM1031 is not set -# CONFIG_SENSORS_ADM9240 is not set -# CONFIG_SENSORS_ADT7462 is not set -# CONFIG_SENSORS_ADT7470 is not set -# CONFIG_SENSORS_ADT7473 is not set -# CONFIG_SENSORS_ADT7475 is not set -# CONFIG_SENSORS_K8TEMP is not set -# CONFIG_SENSORS_ASB100 is not set -# CONFIG_SENSORS_ATK0110 is not set -# CONFIG_SENSORS_ATXP1 is not set -# CONFIG_SENSORS_DS1621 is not set -# CONFIG_SENSORS_I5K_AMB is not set -# CONFIG_SENSORS_F71805F is not set -# CONFIG_SENSORS_F71882FG is not set -# CONFIG_SENSORS_F75375S is not set -# CONFIG_SENSORS_FSCHER is not set -# CONFIG_SENSORS_FSCPOS is not set -# CONFIG_SENSORS_FSCHMD is not set -# CONFIG_SENSORS_G760A is not set -# CONFIG_SENSORS_GL518SM is not set -# CONFIG_SENSORS_GL520SM is not set -# CONFIG_SENSORS_CORETEMP is not set -# CONFIG_SENSORS_IT87 is not set -# CONFIG_SENSORS_LM63 is not set -# CONFIG_SENSORS_LM75 is not set -# CONFIG_SENSORS_LM77 is not set -# CONFIG_SENSORS_LM78 is not set -# CONFIG_SENSORS_LM80 is not set -# CONFIG_SENSORS_LM83 is not set -# CONFIG_SENSORS_LM85 is not set -# CONFIG_SENSORS_LM87 is not set -# CONFIG_SENSORS_LM90 is not set -# CONFIG_SENSORS_LM92 is not set -# CONFIG_SENSORS_LM93 is not set -# CONFIG_SENSORS_LTC4215 is not set -# CONFIG_SENSORS_LTC4245 is not set -# CONFIG_SENSORS_LM95241 is not set -# CONFIG_SENSORS_MAX1619 is not set -# CONFIG_SENSORS_MAX6650 is not set -# CONFIG_SENSORS_PC87360 is not set -# CONFIG_SENSORS_PC87427 is not set -# CONFIG_SENSORS_PCF8591 is not set -# CONFIG_SENSORS_SIS5595 is not set -# CONFIG_SENSORS_DME1737 is not set -# CONFIG_SENSORS_SMSC47M1 is not set -# CONFIG_SENSORS_SMSC47M192 is not set -# CONFIG_SENSORS_SMSC47B397 is not set -# CONFIG_SENSORS_ADS7828 is not set -# CONFIG_SENSORS_THMC50 is not set -# CONFIG_SENSORS_VIA686A is not set -# CONFIG_SENSORS_VT1211 is not set -# CONFIG_SENSORS_VT8231 is not set -# CONFIG_SENSORS_W83781D is not set -# CONFIG_SENSORS_W83791D is not set -# CONFIG_SENSORS_W83792D is not set -# CONFIG_SENSORS_W83793 is not set -# CONFIG_SENSORS_W83L785TS is not set -# CONFIG_SENSORS_W83L786NG is not set -# CONFIG_SENSORS_W83627HF is not set -# CONFIG_SENSORS_W83627EHF is not set -# CONFIG_SENSORS_HDAPS is not set -# CONFIG_SENSORS_LIS3LV02D is not set -# CONFIG_SENSORS_APPLESMC is not set -# CONFIG_HWMON_DEBUG_CHIP is not set -CONFIG_THERMAL=y -# CONFIG_THERMAL_HWMON is not set  CONFIG_WATCHDOG=y -# CONFIG_WATCHDOG_NOWAYOUT is not set - -# -# Watchdog Device Drivers -# -# CONFIG_SOFT_WATCHDOG is not set -# CONFIG_ACQUIRE_WDT is not set -# CONFIG_ADVANTECH_WDT is not set -# CONFIG_ALIM1535_WDT is not set -# CONFIG_ALIM7101_WDT is not set -# CONFIG_SC520_WDT is not set -# CONFIG_EUROTECH_WDT is not set -# CONFIG_IB700_WDT is not set -# CONFIG_IBMASR is not set -# CONFIG_WAFER_WDT is not set -# CONFIG_I6300ESB_WDT is not set -# CONFIG_ITCO_WDT is not set -# CONFIG_IT8712F_WDT is not set -# CONFIG_IT87_WDT is not set -# CONFIG_HP_WATCHDOG is not set -# CONFIG_SC1200_WDT is not set -# CONFIG_PC87413_WDT is not set -# CONFIG_60XX_WDT is not set -# CONFIG_SBC8360_WDT is not set -# CONFIG_SBC7240_WDT is not set -# CONFIG_CPU5_WDT is not set -# CONFIG_SMSC_SCH311X_WDT is not set -# CONFIG_SMSC37B787_WDT is not set -# CONFIG_W83627HF_WDT is not set -# CONFIG_W83697HF_WDT is not set -# CONFIG_W83697UG_WDT is not set -# CONFIG_W83877F_WDT is not set -# CONFIG_W83977F_WDT is not set -# CONFIG_MACHZ_WDT is not set -# CONFIG_SBC_EPX_C3_WATCHDOG is not set - -# -# PCI-based Watchdog Cards -# -# CONFIG_PCIPCWATCHDOG is not set -# CONFIG_WDTPCI is not set - -# -# USB-based Watchdog Cards -# -# CONFIG_USBPCWATCHDOG is not set -CONFIG_SSB_POSSIBLE=y - -# -# Sonics Silicon Backplane -# -# CONFIG_SSB is not set - -# -# Multifunction device drivers -# -# CONFIG_MFD_CORE is not set -# CONFIG_MFD_SM501 is not set -# CONFIG_HTC_PASIC3 is not set -# CONFIG_TWL4030_CORE is not set -# CONFIG_MFD_TMIO is not set -# CONFIG_PMIC_DA903X is not set -# CONFIG_MFD_WM8400 is not set -# CONFIG_MFD_WM8350_I2C is not set -# CONFIG_MFD_PCF50633 is not set -# CONFIG_REGULATOR is not set - -# -# Multimedia devices -# - -# -# Multimedia core support -# -# CONFIG_VIDEO_DEV is not set -# CONFIG_DVB_CORE is not set -# CONFIG_VIDEO_MEDIA is not set - -# -# Multimedia drivers -# -CONFIG_DAB=y -# CONFIG_USB_DABUSB is not set - -# -# Graphics support -#  CONFIG_AGP=y -# CONFIG_AGP_ALI is not set -# CONFIG_AGP_ATI is not set -# CONFIG_AGP_AMD is not set  CONFIG_AGP_AMD64=y  CONFIG_AGP_INTEL=y -# CONFIG_AGP_NVIDIA is not set -# CONFIG_AGP_SIS is not set -# CONFIG_AGP_SWORKS is not set -# CONFIG_AGP_VIA is not set -# CONFIG_AGP_EFFICEON is not set  CONFIG_DRM=y -# CONFIG_DRM_TDFX is not set -# CONFIG_DRM_R128 is not set -# CONFIG_DRM_RADEON is not set -# CONFIG_DRM_I810 is not set -# CONFIG_DRM_I830 is not set  CONFIG_DRM_I915=y -# CONFIG_DRM_I915_KMS is not set -# CONFIG_DRM_MGA is not set -# CONFIG_DRM_SIS is not set -# CONFIG_DRM_VIA is not set -# CONFIG_DRM_SAVAGE is not set -# CONFIG_VGASTATE is not set -# CONFIG_VIDEO_OUTPUT_CONTROL is not set -CONFIG_FB=y -# CONFIG_FIRMWARE_EDID is not set -# CONFIG_FB_DDC is not set -# CONFIG_FB_BOOT_VESA_SUPPORT is not set -CONFIG_FB_CFB_FILLRECT=y -CONFIG_FB_CFB_COPYAREA=y -CONFIG_FB_CFB_IMAGEBLIT=y -# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set -# CONFIG_FB_SYS_FILLRECT is not set -# CONFIG_FB_SYS_COPYAREA is not set -# CONFIG_FB_SYS_IMAGEBLIT is not set -# CONFIG_FB_FOREIGN_ENDIAN is not set -# CONFIG_FB_SYS_FOPS is not set -# CONFIG_FB_SVGALIB is not set -# CONFIG_FB_MACMODES is not set -# CONFIG_FB_BACKLIGHT is not set  CONFIG_FB_MODE_HELPERS=y  CONFIG_FB_TILEBLITTING=y - -# -# Frame buffer hardware drivers -# -# CONFIG_FB_CIRRUS is not set -# CONFIG_FB_PM2 is not set -# CONFIG_FB_CYBER2000 is not set -# CONFIG_FB_ARC is not set -# CONFIG_FB_ASILIANT is not set -# CONFIG_FB_IMSTT is not set -# CONFIG_FB_VGA16 is not set -# CONFIG_FB_UVESA is not set -# CONFIG_FB_VESA is not set  CONFIG_FB_EFI=y -# CONFIG_FB_N411 is not set -# CONFIG_FB_HGA is not set -# CONFIG_FB_S1D13XXX is not set -# CONFIG_FB_NVIDIA is not set -# CONFIG_FB_RIVA is not set -# CONFIG_FB_I810 is not set -# CONFIG_FB_LE80578 is not set -# CONFIG_FB_INTEL is not set -# CONFIG_FB_MATROX is not set -# CONFIG_FB_RADEON is not set -# CONFIG_FB_ATY128 is not set -# CONFIG_FB_ATY is not set -# CONFIG_FB_S3 is not set -# CONFIG_FB_SAVAGE is not set -# CONFIG_FB_SIS is not set -# CONFIG_FB_VIA is not set -# CONFIG_FB_NEOMAGIC is not set -# CONFIG_FB_KYRO is not set -# CONFIG_FB_3DFX is not set -# CONFIG_FB_VOODOO1 is not set -# CONFIG_FB_VT8623 is not set -# CONFIG_FB_TRIDENT is not set -# CONFIG_FB_ARK is not set -# CONFIG_FB_PM3 is not set -# CONFIG_FB_CARMINE is not set -# CONFIG_FB_GEODE is not set -# CONFIG_FB_VIRTUAL is not set -# CONFIG_FB_METRONOME is not set -# CONFIG_FB_MB862XX is not set -# CONFIG_FB_BROADSHEET is not set  CONFIG_BACKLIGHT_LCD_SUPPORT=y  # CONFIG_LCD_CLASS_DEVICE is not set -CONFIG_BACKLIGHT_CLASS_DEVICE=y -CONFIG_BACKLIGHT_GENERIC=y -# CONFIG_BACKLIGHT_PROGEAR is not set -# CONFIG_BACKLIGHT_MBP_NVIDIA is not set -# CONFIG_BACKLIGHT_SAHARA is not set - -# -# Display device support -# -# CONFIG_DISPLAY_SUPPORT is not set - -# -# Console display driver support -# -CONFIG_VGA_CONSOLE=y  CONFIG_VGACON_SOFT_SCROLLBACK=y -CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=64 -CONFIG_DUMMY_CONSOLE=y -# CONFIG_FRAMEBUFFER_CONSOLE is not set  CONFIG_LOGO=y  # CONFIG_LOGO_LINUX_MONO is not set  # CONFIG_LOGO_LINUX_VGA16 is not set -CONFIG_LOGO_LINUX_CLUT224=y  CONFIG_SOUND=y -CONFIG_SOUND_OSS_CORE=y  CONFIG_SND=y -CONFIG_SND_TIMER=y -CONFIG_SND_PCM=y -CONFIG_SND_HWDEP=y -CONFIG_SND_JACK=y  CONFIG_SND_SEQUENCER=y  CONFIG_SND_SEQ_DUMMY=y -CONFIG_SND_OSSEMUL=y  CONFIG_SND_MIXER_OSS=y  CONFIG_SND_PCM_OSS=y -CONFIG_SND_PCM_OSS_PLUGINS=y  CONFIG_SND_SEQUENCER_OSS=y  CONFIG_SND_HRTIMER=y -CONFIG_SND_SEQ_HRTIMER_DEFAULT=y -CONFIG_SND_DYNAMIC_MINORS=y -CONFIG_SND_SUPPORT_OLD_API=y -CONFIG_SND_VERBOSE_PROCFS=y -# CONFIG_SND_VERBOSE_PRINTK is not set -# CONFIG_SND_DEBUG is not set -CONFIG_SND_VMASTER=y -CONFIG_SND_DRIVERS=y -# CONFIG_SND_PCSP is not set -# CONFIG_SND_DUMMY is not set -# CONFIG_SND_VIRMIDI is not set -# CONFIG_SND_MTPAV is not set -# CONFIG_SND_SERIAL_U16550 is not set -# CONFIG_SND_MPU401 is not set -CONFIG_SND_PCI=y -# CONFIG_SND_AD1889 is not set -# CONFIG_SND_ALS300 is not set -# CONFIG_SND_ALS4000 is not set -# CONFIG_SND_ALI5451 is not set -# CONFIG_SND_ATIIXP is not set -# CONFIG_SND_ATIIXP_MODEM is not set -# CONFIG_SND_AU8810 is not set -# CONFIG_SND_AU8820 is not set -# CONFIG_SND_AU8830 is not set -# CONFIG_SND_AW2 is not set -# CONFIG_SND_AZT3328 is not set -# CONFIG_SND_BT87X is not set -# CONFIG_SND_CA0106 is not set -# CONFIG_SND_CMIPCI is not set -# CONFIG_SND_OXYGEN is not set -# CONFIG_SND_CS4281 is not set -# CONFIG_SND_CS46XX is not set -# CONFIG_SND_CS5530 is not set -# CONFIG_SND_CS5535AUDIO is not set -# CONFIG_SND_DARLA20 is not set -# CONFIG_SND_GINA20 is not set -# CONFIG_SND_LAYLA20 is not set -# CONFIG_SND_DARLA24 is not set -# CONFIG_SND_GINA24 is not set -# CONFIG_SND_LAYLA24 is not set -# CONFIG_SND_MONA is not set -# CONFIG_SND_MIA is not set -# CONFIG_SND_ECHO3G is not set -# CONFIG_SND_INDIGO is not set -# CONFIG_SND_INDIGOIO is not set -# CONFIG_SND_INDIGODJ is not set -# CONFIG_SND_INDIGOIOX is not set -# CONFIG_SND_INDIGODJX is not set -# CONFIG_SND_EMU10K1 is not set -# CONFIG_SND_EMU10K1X is not set -# CONFIG_SND_ENS1370 is not set -# CONFIG_SND_ENS1371 is not set -# CONFIG_SND_ES1938 is not set -# CONFIG_SND_ES1968 is not set -# CONFIG_SND_FM801 is not set  CONFIG_SND_HDA_INTEL=y  CONFIG_SND_HDA_HWDEP=y -# CONFIG_SND_HDA_RECONFIG is not set -# CONFIG_SND_HDA_INPUT_BEEP is not set -CONFIG_SND_HDA_CODEC_REALTEK=y -CONFIG_SND_HDA_CODEC_ANALOG=y -CONFIG_SND_HDA_CODEC_SIGMATEL=y -CONFIG_SND_HDA_CODEC_VIA=y -CONFIG_SND_HDA_CODEC_ATIHDMI=y -CONFIG_SND_HDA_CODEC_NVHDMI=y -CONFIG_SND_HDA_CODEC_INTELHDMI=y -CONFIG_SND_HDA_ELD=y -CONFIG_SND_HDA_CODEC_CONEXANT=y -CONFIG_SND_HDA_CODEC_CMEDIA=y -CONFIG_SND_HDA_CODEC_SI3054=y -CONFIG_SND_HDA_GENERIC=y -# CONFIG_SND_HDA_POWER_SAVE is not set -# CONFIG_SND_HDSP is not set -# CONFIG_SND_HDSPM is not set -# CONFIG_SND_HIFIER is not set -# CONFIG_SND_ICE1712 is not set -# CONFIG_SND_ICE1724 is not set -# CONFIG_SND_INTEL8X0 is not set -# CONFIG_SND_INTEL8X0M is not set -# CONFIG_SND_KORG1212 is not set -# CONFIG_SND_MAESTRO3 is not set -# CONFIG_SND_MIXART is not set -# CONFIG_SND_NM256 is not set -# CONFIG_SND_PCXHR is not set -# CONFIG_SND_RIPTIDE is not set -# CONFIG_SND_RME32 is not set -# CONFIG_SND_RME96 is not set -# CONFIG_SND_RME9652 is not set -# CONFIG_SND_SIS7019 is not set -# CONFIG_SND_SONICVIBES is not set -# CONFIG_SND_TRIDENT is not set -# CONFIG_SND_VIA82XX is not set -# CONFIG_SND_VIA82XX_MODEM is not set -# CONFIG_SND_VIRTUOSO is not set -# CONFIG_SND_VX222 is not set -# CONFIG_SND_YMFPCI is not set -CONFIG_SND_USB=y -# CONFIG_SND_USB_AUDIO is not set -# CONFIG_SND_USB_USX2Y is not set -# CONFIG_SND_USB_CAIAQ is not set -# CONFIG_SND_USB_US122L is not set -CONFIG_SND_PCMCIA=y -# CONFIG_SND_VXPOCKET is not set -# CONFIG_SND_PDAUDIOCF is not set -# CONFIG_SND_SOC is not set -# CONFIG_SOUND_PRIME is not set -CONFIG_HID_SUPPORT=y -CONFIG_HID=y -CONFIG_HID_DEBUG=y  CONFIG_HIDRAW=y - -# -# USB Input Devices -# -CONFIG_USB_HID=y  CONFIG_HID_PID=y  CONFIG_USB_HIDDEV=y - -# -# Special HID drivers -# -CONFIG_HID_A4TECH=y -CONFIG_HID_APPLE=y -CONFIG_HID_BELKIN=y -CONFIG_HID_CHERRY=y -CONFIG_HID_CHICONY=y -CONFIG_HID_CYPRESS=y -# CONFIG_DRAGONRISE_FF is not set -CONFIG_HID_EZKEY=y -CONFIG_HID_KYE=y  CONFIG_HID_GYRATION=y -CONFIG_HID_KENSINGTON=y -CONFIG_HID_LOGITECH=y  CONFIG_LOGITECH_FF=y -# CONFIG_LOGIRUMBLEPAD2_FF is not set -CONFIG_HID_MICROSOFT=y -CONFIG_HID_MONTEREY=y  CONFIG_HID_NTRIG=y  CONFIG_HID_PANTHERLORD=y  CONFIG_PANTHERLORD_FF=y @@ -1874,702 +249,92 @@ CONFIG_HID_PETALYNX=y  CONFIG_HID_SAMSUNG=y  CONFIG_HID_SONY=y  CONFIG_HID_SUNPLUS=y -# CONFIG_GREENASIA_FF is not set  CONFIG_HID_TOPSEED=y -CONFIG_THRUSTMASTER_FF=y -CONFIG_ZEROPLUS_FF=y -CONFIG_USB_SUPPORT=y -CONFIG_USB_ARCH_HAS_HCD=y -CONFIG_USB_ARCH_HAS_OHCI=y -CONFIG_USB_ARCH_HAS_EHCI=y  CONFIG_USB=y  CONFIG_USB_DEBUG=y  CONFIG_USB_ANNOUNCE_NEW_DEVICES=y - -# -# Miscellaneous USB options -#  CONFIG_USB_DEVICEFS=y  # CONFIG_USB_DEVICE_CLASS is not set -# CONFIG_USB_DYNAMIC_MINORS is not set -CONFIG_USB_SUSPEND=y -# CONFIG_USB_OTG is not set  CONFIG_USB_MON=y -# CONFIG_USB_WUSB is not set -# CONFIG_USB_WUSB_CBAF is not set - -# -# USB Host Controller Drivers -# -# CONFIG_USB_C67X00_HCD is not set  CONFIG_USB_EHCI_HCD=y -# CONFIG_USB_EHCI_ROOT_HUB_TT is not set  # CONFIG_USB_EHCI_TT_NEWSCHED is not set -# CONFIG_USB_OXU210HP_HCD is not set -# CONFIG_USB_ISP116X_HCD is not set -# CONFIG_USB_ISP1760_HCD is not set  CONFIG_USB_OHCI_HCD=y -# CONFIG_USB_OHCI_BIG_ENDIAN_DESC is not set -# CONFIG_USB_OHCI_BIG_ENDIAN_MMIO is not set -CONFIG_USB_OHCI_LITTLE_ENDIAN=y  CONFIG_USB_UHCI_HCD=y -# CONFIG_USB_SL811_HCD is not set -# CONFIG_USB_R8A66597_HCD is not set -# CONFIG_USB_WHCI_HCD is not set -# CONFIG_USB_HWA_HCD is not set - -# -# USB Device Class drivers -# -# CONFIG_USB_ACM is not set  CONFIG_USB_PRINTER=y -# CONFIG_USB_WDM is not set -# CONFIG_USB_TMC is not set - -# -# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may -# - -# -# also be needed; see USB_STORAGE Help for more info -#  CONFIG_USB_STORAGE=y -# CONFIG_USB_STORAGE_DEBUG is not set -# CONFIG_USB_STORAGE_DATAFAB is not set -# CONFIG_USB_STORAGE_FREECOM is not set -# CONFIG_USB_STORAGE_ISD200 is not set -# CONFIG_USB_STORAGE_USBAT is not set -# CONFIG_USB_STORAGE_SDDR09 is not set -# CONFIG_USB_STORAGE_SDDR55 is not set -# CONFIG_USB_STORAGE_JUMPSHOT is not set -# CONFIG_USB_STORAGE_ALAUDA is not set -# CONFIG_USB_STORAGE_ONETOUCH is not set -# CONFIG_USB_STORAGE_KARMA is not set -# CONFIG_USB_STORAGE_CYPRESS_ATACB is not set  CONFIG_USB_LIBUSUAL=y - -# -# USB Imaging devices -# -# CONFIG_USB_MDC800 is not set -# CONFIG_USB_MICROTEK is not set - -# -# USB port drivers -# -# CONFIG_USB_SERIAL is not set - -# -# USB Miscellaneous drivers -# -# CONFIG_USB_EMI62 is not set -# CONFIG_USB_EMI26 is not set -# CONFIG_USB_ADUTUX is not set -# CONFIG_USB_SEVSEG is not set -# CONFIG_USB_RIO500 is not set -# CONFIG_USB_LEGOTOWER is not set -# CONFIG_USB_LCD is not set -# CONFIG_USB_BERRY_CHARGE is not set -# CONFIG_USB_LED is not set -# CONFIG_USB_CYPRESS_CY7C63 is not set -# CONFIG_USB_CYTHERM is not set -# CONFIG_USB_IDMOUSE is not set -# CONFIG_USB_FTDI_ELAN is not set -# CONFIG_USB_APPLEDISPLAY is not set -# CONFIG_USB_SISUSBVGA is not set -# CONFIG_USB_LD is not set -# CONFIG_USB_TRANCEVIBRATOR is not set -# CONFIG_USB_IOWARRIOR is not set -# CONFIG_USB_TEST is not set -# CONFIG_USB_ISIGHTFW is not set -# CONFIG_USB_VST is not set -# CONFIG_USB_GADGET is not set - -# -# OTG and related infrastructure -# -# CONFIG_NOP_USB_XCEIV is not set -# CONFIG_UWB is not set -# CONFIG_MMC is not set -# CONFIG_MEMSTICK is not set -CONFIG_NEW_LEDS=y -CONFIG_LEDS_CLASS=y - -# -# LED drivers -# -# CONFIG_LEDS_ALIX2 is not set -# CONFIG_LEDS_PCA9532 is not set -# CONFIG_LEDS_LP5521 is not set -# CONFIG_LEDS_CLEVO_MAIL is not set -# CONFIG_LEDS_PCA955X is not set -# CONFIG_LEDS_BD2802 is not set - -# -# LED Triggers -# -CONFIG_LEDS_TRIGGERS=y -# CONFIG_LEDS_TRIGGER_TIMER is not set -# CONFIG_LEDS_TRIGGER_HEARTBEAT is not set -# CONFIG_LEDS_TRIGGER_BACKLIGHT is not set -# CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set - -# -# iptables trigger is under Netfilter config (LED target) -# -# CONFIG_ACCESSIBILITY is not set -# CONFIG_INFINIBAND is not set  CONFIG_EDAC=y - -# -# Reporting subsystems -# -# CONFIG_EDAC_DEBUG is not set -# CONFIG_EDAC_MM_EDAC is not set -CONFIG_RTC_LIB=y  CONFIG_RTC_CLASS=y  # CONFIG_RTC_HCTOSYS is not set -# CONFIG_RTC_DEBUG is not set - -# -# RTC interfaces -# -CONFIG_RTC_INTF_SYSFS=y -CONFIG_RTC_INTF_PROC=y -CONFIG_RTC_INTF_DEV=y -# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set -# CONFIG_RTC_DRV_TEST is not set - -# -# I2C RTC drivers -# -# CONFIG_RTC_DRV_DS1307 is not set -# CONFIG_RTC_DRV_DS1374 is not set -# CONFIG_RTC_DRV_DS1672 is not set -# CONFIG_RTC_DRV_MAX6900 is not set -# CONFIG_RTC_DRV_RS5C372 is not set -# CONFIG_RTC_DRV_ISL1208 is not set -# CONFIG_RTC_DRV_X1205 is not set -# CONFIG_RTC_DRV_PCF8563 is not set -# CONFIG_RTC_DRV_PCF8583 is not set -# CONFIG_RTC_DRV_M41T80 is not set -# CONFIG_RTC_DRV_S35390A is not set -# CONFIG_RTC_DRV_FM3130 is not set -# CONFIG_RTC_DRV_RX8581 is not set - -# -# SPI RTC drivers -# - -# -# Platform RTC drivers -# -CONFIG_RTC_DRV_CMOS=y -# CONFIG_RTC_DRV_DS1286 is not set -# CONFIG_RTC_DRV_DS1511 is not set -# CONFIG_RTC_DRV_DS1553 is not set -# CONFIG_RTC_DRV_DS1742 is not set -# CONFIG_RTC_DRV_STK17TA8 is not set -# CONFIG_RTC_DRV_M48T86 is not set -# CONFIG_RTC_DRV_M48T35 is not set -# CONFIG_RTC_DRV_M48T59 is not set -# CONFIG_RTC_DRV_BQ4802 is not set -# CONFIG_RTC_DRV_V3020 is not set - -# -# on-CPU RTC drivers -#  CONFIG_DMADEVICES=y - -# -# DMA Devices -# -# CONFIG_INTEL_IOATDMA is not set -# CONFIG_AUXDISPLAY is not set -# CONFIG_UIO is not set -# CONFIG_STAGING is not set -CONFIG_X86_PLATFORM_DEVICES=y -# CONFIG_ACER_WMI is not set -# CONFIG_ASUS_LAPTOP is not set -# CONFIG_FUJITSU_LAPTOP is not set -# CONFIG_TC1100_WMI is not set -# CONFIG_MSI_LAPTOP is not set -# CONFIG_PANASONIC_LAPTOP is not set -# CONFIG_COMPAL_LAPTOP is not set -# CONFIG_SONY_LAPTOP is not set -# CONFIG_THINKPAD_ACPI is not set -# CONFIG_INTEL_MENLOW is not set  CONFIG_EEEPC_LAPTOP=y -# CONFIG_ACPI_WMI is not set -# CONFIG_ACPI_ASUS is not set -# CONFIG_ACPI_TOSHIBA is not set - -# -# Firmware Drivers -# -# CONFIG_EDD is not set -CONFIG_FIRMWARE_MEMMAP=y  CONFIG_EFI_VARS=y -# CONFIG_DELL_RBU is not set -# CONFIG_DCDBAS is not set -CONFIG_DMIID=y -# CONFIG_ISCSI_IBFT_FIND is not set - -# -# File systems -# -# CONFIG_EXT2_FS is not set  CONFIG_EXT3_FS=y  # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_XATTR=y  CONFIG_EXT3_FS_POSIX_ACL=y  CONFIG_EXT3_FS_SECURITY=y -# CONFIG_EXT4_FS is not set -CONFIG_JBD=y -# CONFIG_JBD_DEBUG is not set -CONFIG_FS_MBCACHE=y -# CONFIG_REISERFS_FS is not set -# CONFIG_JFS_FS is not set -CONFIG_FS_POSIX_ACL=y -CONFIG_FILE_LOCKING=y -# CONFIG_XFS_FS is not set -# CONFIG_OCFS2_FS is not set -# CONFIG_BTRFS_FS is not set -CONFIG_DNOTIFY=y -CONFIG_INOTIFY=y -CONFIG_INOTIFY_USER=y  CONFIG_QUOTA=y  CONFIG_QUOTA_NETLINK_INTERFACE=y  # CONFIG_PRINT_QUOTA_WARNING is not set -CONFIG_QUOTA_TREE=y -# CONFIG_QFMT_V1 is not set  CONFIG_QFMT_V2=y -CONFIG_QUOTACTL=y -# CONFIG_AUTOFS_FS is not set  CONFIG_AUTOFS4_FS=y -# CONFIG_FUSE_FS is not set -CONFIG_GENERIC_ACL=y - -# -# Caches -# -# CONFIG_FSCACHE is not set - -# -# CD-ROM/DVD Filesystems -#  CONFIG_ISO9660_FS=y  CONFIG_JOLIET=y  CONFIG_ZISOFS=y -# CONFIG_UDF_FS is not set - -# -# DOS/FAT/NT Filesystems -# -CONFIG_FAT_FS=y  CONFIG_MSDOS_FS=y  CONFIG_VFAT_FS=y -CONFIG_FAT_DEFAULT_CODEPAGE=437 -CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" -# CONFIG_NTFS_FS is not set - -# -# Pseudo filesystems -# -CONFIG_PROC_FS=y  CONFIG_PROC_KCORE=y -CONFIG_PROC_VMCORE=y -CONFIG_PROC_SYSCTL=y -CONFIG_PROC_PAGE_MONITOR=y -CONFIG_SYSFS=y  CONFIG_TMPFS=y  CONFIG_TMPFS_POSIX_ACL=y  CONFIG_HUGETLBFS=y -CONFIG_HUGETLB_PAGE=y -# CONFIG_CONFIGFS_FS is not set -CONFIG_MISC_FILESYSTEMS=y -# CONFIG_ADFS_FS is not set -# CONFIG_AFFS_FS is not set -# CONFIG_ECRYPT_FS is not set -# CONFIG_HFS_FS is not set -# CONFIG_HFSPLUS_FS is not set -# CONFIG_BEFS_FS is not set -# CONFIG_BFS_FS is not set -# CONFIG_EFS_FS is not set -# CONFIG_CRAMFS is not set -# CONFIG_SQUASHFS is not set -# CONFIG_VXFS_FS is not set -# CONFIG_MINIX_FS is not set -# CONFIG_OMFS_FS is not set -# CONFIG_HPFS_FS is not set -# CONFIG_QNX4FS_FS is not set -# CONFIG_ROMFS_FS is not set -# CONFIG_SYSV_FS is not set -# CONFIG_UFS_FS is not set -# CONFIG_NILFS2_FS is not set -CONFIG_NETWORK_FILESYSTEMS=y  CONFIG_NFS_FS=y  CONFIG_NFS_V3=y  CONFIG_NFS_V3_ACL=y  CONFIG_NFS_V4=y  CONFIG_ROOT_NFS=y -# CONFIG_NFSD is not set -CONFIG_LOCKD=y -CONFIG_LOCKD_V4=y -CONFIG_NFS_ACL_SUPPORT=y -CONFIG_NFS_COMMON=y -CONFIG_SUNRPC=y -CONFIG_SUNRPC_GSS=y -CONFIG_RPCSEC_GSS_KRB5=y -# CONFIG_RPCSEC_GSS_SPKM3 is not set -# CONFIG_SMB_FS is not set -# CONFIG_CIFS is not set -# CONFIG_NCP_FS is not set -# CONFIG_CODA_FS is not set -# CONFIG_AFS_FS is not set - -# -# Partition Types -#  CONFIG_PARTITION_ADVANCED=y -# CONFIG_ACORN_PARTITION is not set  CONFIG_OSF_PARTITION=y  CONFIG_AMIGA_PARTITION=y -# CONFIG_ATARI_PARTITION is not set  CONFIG_MAC_PARTITION=y -CONFIG_MSDOS_PARTITION=y  CONFIG_BSD_DISKLABEL=y  CONFIG_MINIX_SUBPARTITION=y  CONFIG_SOLARIS_X86_PARTITION=y  CONFIG_UNIXWARE_DISKLABEL=y -# CONFIG_LDM_PARTITION is not set  CONFIG_SGI_PARTITION=y -# CONFIG_ULTRIX_PARTITION is not set  CONFIG_SUN_PARTITION=y  CONFIG_KARMA_PARTITION=y  CONFIG_EFI_PARTITION=y -# CONFIG_SYSV68_PARTITION is not set -CONFIG_NLS=y  CONFIG_NLS_DEFAULT="utf8"  CONFIG_NLS_CODEPAGE_437=y -# CONFIG_NLS_CODEPAGE_737 is not set -# CONFIG_NLS_CODEPAGE_775 is not set -# CONFIG_NLS_CODEPAGE_850 is not set -# CONFIG_NLS_CODEPAGE_852 is not set -# CONFIG_NLS_CODEPAGE_855 is not set -# CONFIG_NLS_CODEPAGE_857 is not set -# CONFIG_NLS_CODEPAGE_860 is not set -# CONFIG_NLS_CODEPAGE_861 is not set -# CONFIG_NLS_CODEPAGE_862 is not set -# CONFIG_NLS_CODEPAGE_863 is not set -# CONFIG_NLS_CODEPAGE_864 is not set -# CONFIG_NLS_CODEPAGE_865 is not set -# CONFIG_NLS_CODEPAGE_866 is not set -# CONFIG_NLS_CODEPAGE_869 is not set -# CONFIG_NLS_CODEPAGE_936 is not set -# CONFIG_NLS_CODEPAGE_950 is not set -# CONFIG_NLS_CODEPAGE_932 is not set -# CONFIG_NLS_CODEPAGE_949 is not set -# CONFIG_NLS_CODEPAGE_874 is not set -# CONFIG_NLS_ISO8859_8 is not set -# CONFIG_NLS_CODEPAGE_1250 is not set -# CONFIG_NLS_CODEPAGE_1251 is not set  CONFIG_NLS_ASCII=y  CONFIG_NLS_ISO8859_1=y -# CONFIG_NLS_ISO8859_2 is not set -# CONFIG_NLS_ISO8859_3 is not set -# CONFIG_NLS_ISO8859_4 is not set -# CONFIG_NLS_ISO8859_5 is not set -# CONFIG_NLS_ISO8859_6 is not set -# CONFIG_NLS_ISO8859_7 is not set -# CONFIG_NLS_ISO8859_9 is not set -# CONFIG_NLS_ISO8859_13 is not set -# CONFIG_NLS_ISO8859_14 is not set -# CONFIG_NLS_ISO8859_15 is not set -# CONFIG_NLS_KOI8_R is not set -# CONFIG_NLS_KOI8_U is not set  CONFIG_NLS_UTF8=y -# CONFIG_DLM is not set - -# -# Kernel hacking -# -CONFIG_TRACE_IRQFLAGS_SUPPORT=y  CONFIG_PRINTK_TIME=y  # CONFIG_ENABLE_WARN_DEPRECATED is not set -CONFIG_ENABLE_MUST_CHECK=y  CONFIG_FRAME_WARN=2048  CONFIG_MAGIC_SYSRQ=y  # CONFIG_UNUSED_SYMBOLS is not set -CONFIG_DEBUG_FS=y -# CONFIG_HEADERS_CHECK is not set  CONFIG_DEBUG_KERNEL=y -# CONFIG_DEBUG_SHIRQ is not set -# CONFIG_DETECT_SOFTLOCKUP is not set -# CONFIG_DETECT_HUNG_TASK is not set  # CONFIG_SCHED_DEBUG is not set  CONFIG_SCHEDSTATS=y  CONFIG_TIMER_STATS=y -# CONFIG_DEBUG_OBJECTS is not set -# CONFIG_SLUB_DEBUG_ON is not set -# CONFIG_SLUB_STATS is not set -# CONFIG_DEBUG_RT_MUTEXES is not set -# CONFIG_RT_MUTEX_TESTER is not set -# CONFIG_DEBUG_SPINLOCK is not set -# CONFIG_DEBUG_MUTEXES is not set -# CONFIG_DEBUG_LOCK_ALLOC is not set -# CONFIG_PROVE_LOCKING is not set -# CONFIG_LOCK_STAT is not set -# CONFIG_DEBUG_SPINLOCK_SLEEP is not set -# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set -CONFIG_STACKTRACE=y -# CONFIG_DEBUG_KOBJECT is not set -# CONFIG_DEBUG_HIGHMEM is not set -CONFIG_DEBUG_BUGVERBOSE=y -# CONFIG_DEBUG_INFO is not set -# CONFIG_DEBUG_VM is not set -# CONFIG_DEBUG_VIRTUAL is not set -# CONFIG_DEBUG_WRITECOUNT is not set -CONFIG_DEBUG_MEMORY_INIT=y -# CONFIG_DEBUG_LIST is not set -# CONFIG_DEBUG_SG is not set -# CONFIG_DEBUG_NOTIFIERS is not set -CONFIG_ARCH_WANT_FRAME_POINTERS=y -CONFIG_FRAME_POINTER=y -# CONFIG_BOOT_PRINTK_DELAY is not set -# CONFIG_RCU_TORTURE_TEST is not set  # CONFIG_RCU_CPU_STALL_DETECTOR is not set -# CONFIG_KPROBES_SANITY_TEST is not set -# CONFIG_BACKTRACE_SELF_TEST is not set -# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set -# CONFIG_LKDTM is not set -# CONFIG_FAULT_INJECTION is not set -# CONFIG_LATENCYTOP is not set  CONFIG_SYSCTL_SYSCALL_CHECK=y -# CONFIG_DEBUG_PAGEALLOC is not set -CONFIG_USER_STACKTRACE_SUPPORT=y -CONFIG_NOP_TRACER=y -CONFIG_HAVE_FUNCTION_TRACER=y -CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y -CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y -CONFIG_HAVE_DYNAMIC_FTRACE=y -CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y -CONFIG_HAVE_HW_BRANCH_TRACER=y -CONFIG_HAVE_SYSCALL_TRACEPOINTS=y -CONFIG_RING_BUFFER=y -CONFIG_TRACING=y -CONFIG_TRACING_SUPPORT=y - -# -# Tracers -# -# CONFIG_FUNCTION_TRACER is not set -# CONFIG_IRQSOFF_TRACER is not set -# CONFIG_SYSPROF_TRACER is not set -# CONFIG_SCHED_TRACER is not set -# CONFIG_CONTEXT_SWITCH_TRACER is not set -# CONFIG_EVENT_TRACER is not set -# CONFIG_FTRACE_SYSCALLS is not set -# CONFIG_BOOT_TRACER is not set -# CONFIG_TRACE_BRANCH_PROFILING is not set -# CONFIG_POWER_TRACER is not set -# CONFIG_STACK_TRACER is not set -# CONFIG_HW_BRANCH_TRACER is not set -# CONFIG_KMEMTRACE is not set -# CONFIG_WORKQUEUE_TRACER is not set  CONFIG_BLK_DEV_IO_TRACE=y -# CONFIG_FTRACE_STARTUP_TEST is not set -# CONFIG_MMIOTRACE is not set  CONFIG_PROVIDE_OHCI1394_DMA_INIT=y -# CONFIG_DYNAMIC_DEBUG is not set -# CONFIG_DMA_API_DEBUG is not set -# CONFIG_SAMPLES is not set -CONFIG_HAVE_ARCH_KGDB=y -# CONFIG_KGDB is not set -# CONFIG_STRICT_DEVMEM is not set -CONFIG_X86_VERBOSE_BOOTUP=y -CONFIG_EARLY_PRINTK=y  CONFIG_EARLY_PRINTK_DBGP=y  CONFIG_DEBUG_STACKOVERFLOW=y  CONFIG_DEBUG_STACK_USAGE=y -# CONFIG_DEBUG_PER_CPU_MAPS is not set -# CONFIG_X86_PTDUMP is not set -CONFIG_DEBUG_RODATA=y  # CONFIG_DEBUG_RODATA_TEST is not set  CONFIG_DEBUG_NX_TEST=m -# CONFIG_4KSTACKS is not set -CONFIG_DOUBLEFAULT=y -CONFIG_HAVE_MMIOTRACE_SUPPORT=y -CONFIG_IO_DELAY_TYPE_0X80=0 -CONFIG_IO_DELAY_TYPE_0XED=1 -CONFIG_IO_DELAY_TYPE_UDELAY=2 -CONFIG_IO_DELAY_TYPE_NONE=3 -CONFIG_IO_DELAY_0X80=y -# CONFIG_IO_DELAY_0XED is not set -# CONFIG_IO_DELAY_UDELAY is not set -# CONFIG_IO_DELAY_NONE is not set -CONFIG_DEFAULT_IO_DELAY_TYPE=0  CONFIG_DEBUG_BOOT_PARAMS=y -# CONFIG_CPA_DEBUG is not set  CONFIG_OPTIMIZE_INLINING=y - -# -# Security options -# -CONFIG_KEYS=y  CONFIG_KEYS_DEBUG_PROC_KEYS=y  CONFIG_SECURITY=y -# CONFIG_SECURITYFS is not set  CONFIG_SECURITY_NETWORK=y -# CONFIG_SECURITY_NETWORK_XFRM is not set -# CONFIG_SECURITY_PATH is not set -CONFIG_SECURITY_FILE_CAPABILITIES=y -# CONFIG_SECURITY_ROOTPLUG is not set -CONFIG_SECURITY_DEFAULT_MMAP_MIN_ADDR=65536  CONFIG_SECURITY_SELINUX=y  CONFIG_SECURITY_SELINUX_BOOTPARAM=y -CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=1  CONFIG_SECURITY_SELINUX_DISABLE=y -CONFIG_SECURITY_SELINUX_DEVELOP=y -CONFIG_SECURITY_SELINUX_AVC_STATS=y -CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1 -# CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX is not set -# CONFIG_SECURITY_SMACK is not set -# CONFIG_SECURITY_TOMOYO is not set -# CONFIG_IMA is not set -CONFIG_CRYPTO=y - -# -# Crypto core or helper -# -# CONFIG_CRYPTO_FIPS is not set -CONFIG_CRYPTO_ALGAPI=y -CONFIG_CRYPTO_ALGAPI2=y -CONFIG_CRYPTO_AEAD=y -CONFIG_CRYPTO_AEAD2=y -CONFIG_CRYPTO_BLKCIPHER=y -CONFIG_CRYPTO_BLKCIPHER2=y -CONFIG_CRYPTO_HASH=y -CONFIG_CRYPTO_HASH2=y -CONFIG_CRYPTO_RNG2=y -CONFIG_CRYPTO_PCOMP=y -CONFIG_CRYPTO_MANAGER=y -CONFIG_CRYPTO_MANAGER2=y -# CONFIG_CRYPTO_GF128MUL is not set -# CONFIG_CRYPTO_NULL is not set -CONFIG_CRYPTO_WORKQUEUE=y -# CONFIG_CRYPTO_CRYPTD is not set -CONFIG_CRYPTO_AUTHENC=y -# CONFIG_CRYPTO_TEST is not set - -# -# Authenticated Encryption with Associated Data -# -# CONFIG_CRYPTO_CCM is not set -# CONFIG_CRYPTO_GCM is not set -# CONFIG_CRYPTO_SEQIV is not set - -# -# Block modes -# -CONFIG_CRYPTO_CBC=y -# CONFIG_CRYPTO_CTR is not set -# CONFIG_CRYPTO_CTS is not set -CONFIG_CRYPTO_ECB=y -# CONFIG_CRYPTO_LRW is not set -# CONFIG_CRYPTO_PCBC is not set -# CONFIG_CRYPTO_XTS is not set - -# -# Hash modes -# -CONFIG_CRYPTO_HMAC=y -# CONFIG_CRYPTO_XCBC is not set - -# -# Digest -# -# CONFIG_CRYPTO_CRC32C is not set -# CONFIG_CRYPTO_CRC32C_INTEL is not set -# CONFIG_CRYPTO_MD4 is not set -CONFIG_CRYPTO_MD5=y -# CONFIG_CRYPTO_MICHAEL_MIC is not set -# CONFIG_CRYPTO_RMD128 is not set -# CONFIG_CRYPTO_RMD160 is not set -# CONFIG_CRYPTO_RMD256 is not set -# CONFIG_CRYPTO_RMD320 is not set -CONFIG_CRYPTO_SHA1=y -# CONFIG_CRYPTO_SHA256 is not set -# CONFIG_CRYPTO_SHA512 is not set -# CONFIG_CRYPTO_TGR192 is not set -# CONFIG_CRYPTO_WP512 is not set - -# -# Ciphers -# -CONFIG_CRYPTO_AES=y  CONFIG_CRYPTO_AES_586=y -# CONFIG_CRYPTO_ANUBIS is not set -CONFIG_CRYPTO_ARC4=y -# CONFIG_CRYPTO_BLOWFISH is not set -# CONFIG_CRYPTO_CAMELLIA is not set -# CONFIG_CRYPTO_CAST5 is not set -# CONFIG_CRYPTO_CAST6 is not set -CONFIG_CRYPTO_DES=y -# CONFIG_CRYPTO_FCRYPT is not set -# CONFIG_CRYPTO_KHAZAD is not set -# CONFIG_CRYPTO_SALSA20 is not set -# CONFIG_CRYPTO_SALSA20_586 is not set -# CONFIG_CRYPTO_SEED is not set -# CONFIG_CRYPTO_SERPENT is not set -# CONFIG_CRYPTO_TEA is not set -# CONFIG_CRYPTO_TWOFISH is not set -# CONFIG_CRYPTO_TWOFISH_586 is not set - -# -# Compression -# -# CONFIG_CRYPTO_DEFLATE is not set -# CONFIG_CRYPTO_ZLIB is not set -# CONFIG_CRYPTO_LZO is not set - -# -# Random Number Generation -#  # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRYPTO_HW=y -# CONFIG_CRYPTO_DEV_PADLOCK is not set -# CONFIG_CRYPTO_DEV_GEODE is not set -# CONFIG_CRYPTO_DEV_HIFN_795X is not set -CONFIG_HAVE_KVM=y -CONFIG_HAVE_KVM_IRQCHIP=y -CONFIG_VIRTUALIZATION=y -# CONFIG_KVM is not set -# CONFIG_LGUEST is not set -# CONFIG_VIRTIO_PCI is not set -# CONFIG_VIRTIO_BALLOON is not set -CONFIG_BINARY_PRINTF=y - -# -# Library routines -# -CONFIG_BITREVERSE=y -CONFIG_GENERIC_FIND_FIRST_BIT=y -CONFIG_GENERIC_FIND_NEXT_BIT=y -CONFIG_GENERIC_FIND_LAST_BIT=y -# CONFIG_CRC_CCITT is not set -# CONFIG_CRC16 is not set  CONFIG_CRC_T10DIF=y -# CONFIG_CRC_ITU_T is not set -CONFIG_CRC32=y -# CONFIG_CRC7 is not set -# CONFIG_LIBCRC32C is not set -CONFIG_AUDIT_GENERIC=y -CONFIG_ZLIB_INFLATE=y -CONFIG_DECOMPRESS_GZIP=y -CONFIG_DECOMPRESS_BZIP2=y -CONFIG_DECOMPRESS_LZMA=y -CONFIG_HAS_IOMEM=y -CONFIG_HAS_IOPORT=y -CONFIG_HAS_DMA=y -CONFIG_NLATTR=y diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 6c86acd847a4..ee01a9d5d4f0 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -1,519 +1,89 @@ -# -# Automatically generated make config: don't edit -# Linux kernel version: 2.6.30-rc2 -# Mon May 11 16:22:00 2009 -#  CONFIG_64BIT=y -# CONFIG_X86_32 is not set -CONFIG_X86_64=y -CONFIG_X86=y -CONFIG_OUTPUT_FORMAT="elf64-x86-64" -CONFIG_ARCH_DEFCONFIG="arch/x86/configs/x86_64_defconfig" -CONFIG_GENERIC_TIME=y -CONFIG_GENERIC_CMOS_UPDATE=y -CONFIG_CLOCKSOURCE_WATCHDOG=y -CONFIG_GENERIC_CLOCKEVENTS=y -CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y -CONFIG_LOCKDEP_SUPPORT=y -CONFIG_STACKTRACE_SUPPORT=y -CONFIG_HAVE_LATENCYTOP_SUPPORT=y -CONFIG_FAST_CMPXCHG_LOCAL=y -CONFIG_MMU=y -CONFIG_ZONE_DMA=y -CONFIG_GENERIC_ISA_DMA=y -CONFIG_GENERIC_IOMAP=y -CONFIG_GENERIC_BUG=y -CONFIG_GENERIC_BUG_RELATIVE_POINTERS=y -CONFIG_GENERIC_HWEIGHT=y -CONFIG_ARCH_MAY_HAVE_PC_FDC=y -CONFIG_RWSEM_GENERIC_SPINLOCK=y -# CONFIG_RWSEM_XCHGADD_ALGORITHM is not set -CONFIG_ARCH_HAS_CPU_IDLE_WAIT=y -CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_GENERIC_TIME_VSYSCALL=y -CONFIG_ARCH_HAS_CPU_RELAX=y -CONFIG_ARCH_HAS_DEFAULT_IDLE=y -CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y -CONFIG_HAVE_SETUP_PER_CPU_AREA=y -CONFIG_HAVE_DYNAMIC_PER_CPU_AREA=y -CONFIG_HAVE_CPUMASK_OF_CPU_MAP=y -CONFIG_ARCH_HIBERNATION_POSSIBLE=y -CONFIG_ARCH_SUSPEND_POSSIBLE=y -CONFIG_ZONE_DMA32=y -CONFIG_ARCH_POPULATES_NODE_MAP=y -CONFIG_AUDIT_ARCH=y -CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y -CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y -CONFIG_GENERIC_HARDIRQS=y -CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y -CONFIG_GENERIC_IRQ_PROBE=y -CONFIG_GENERIC_PENDING_IRQ=y -CONFIG_USE_GENERIC_SMP_HELPERS=y -CONFIG_X86_64_SMP=y -CONFIG_X86_HT=y -CONFIG_X86_TRAMPOLINE=y -# CONFIG_KTIME_SCALAR is not set -CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" - -# -# General setup -#  CONFIG_EXPERIMENTAL=y -CONFIG_LOCK_KERNEL=y -CONFIG_INIT_ENV_ARG_LIMIT=32 -CONFIG_LOCALVERSION=""  # CONFIG_LOCALVERSION_AUTO is not set -CONFIG_HAVE_KERNEL_GZIP=y -CONFIG_HAVE_KERNEL_BZIP2=y -CONFIG_HAVE_KERNEL_LZMA=y -CONFIG_KERNEL_GZIP=y -# CONFIG_KERNEL_BZIP2 is not set -# CONFIG_KERNEL_LZMA is not set -CONFIG_SWAP=y  CONFIG_SYSVIPC=y -CONFIG_SYSVIPC_SYSCTL=y  CONFIG_POSIX_MQUEUE=y -CONFIG_POSIX_MQUEUE_SYSCTL=y  CONFIG_BSD_PROCESS_ACCT=y -# CONFIG_BSD_PROCESS_ACCT_V3 is not set  CONFIG_TASKSTATS=y  CONFIG_TASK_DELAY_ACCT=y  CONFIG_TASK_XACCT=y  CONFIG_TASK_IO_ACCOUNTING=y  CONFIG_AUDIT=y -CONFIG_AUDITSYSCALL=y -CONFIG_AUDIT_TREE=y - -# -# RCU Subsystem -# -# CONFIG_CLASSIC_RCU is not set -CONFIG_TREE_RCU=y -# CONFIG_PREEMPT_RCU is not set -# CONFIG_RCU_TRACE is not set -CONFIG_RCU_FANOUT=64 -# CONFIG_RCU_FANOUT_EXACT is not set -# CONFIG_TREE_RCU_TRACE is not set -# CONFIG_PREEMPT_RCU_TRACE is not set -# CONFIG_IKCONFIG is not set  CONFIG_LOG_BUF_SHIFT=18 -CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y -CONFIG_GROUP_SCHED=y -CONFIG_FAIR_GROUP_SCHED=y -# CONFIG_RT_GROUP_SCHED is not set -# CONFIG_USER_SCHED is not set -CONFIG_CGROUP_SCHED=y  CONFIG_CGROUPS=y -# CONFIG_CGROUP_DEBUG is not set  CONFIG_CGROUP_NS=y  CONFIG_CGROUP_FREEZER=y -# CONFIG_CGROUP_DEVICE is not set  CONFIG_CPUSETS=y -CONFIG_PROC_PID_CPUSET=y  CONFIG_CGROUP_CPUACCT=y  CONFIG_RESOURCE_COUNTERS=y -# CONFIG_CGROUP_MEM_RES_CTLR is not set -# CONFIG_SYSFS_DEPRECATED_V2 is not set -CONFIG_RELAY=y -CONFIG_NAMESPACES=y +CONFIG_CGROUP_SCHED=y  CONFIG_UTS_NS=y  CONFIG_IPC_NS=y  CONFIG_USER_NS=y  CONFIG_PID_NS=y  CONFIG_NET_NS=y  CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="" -CONFIG_RD_GZIP=y -CONFIG_RD_BZIP2=y -CONFIG_RD_LZMA=y -CONFIG_CC_OPTIMIZE_FOR_SIZE=y -CONFIG_SYSCTL=y -CONFIG_ANON_INODES=y -# CONFIG_EMBEDDED is not set -CONFIG_UID16=y -CONFIG_SYSCTL_SYSCALL=y -CONFIG_KALLSYMS=y -CONFIG_KALLSYMS_ALL=y  CONFIG_KALLSYMS_EXTRA_PASS=y -# CONFIG_STRIP_ASM_SYMS is not set -CONFIG_HOTPLUG=y -CONFIG_PRINTK=y -CONFIG_BUG=y -CONFIG_ELF_CORE=y -CONFIG_PCSPKR_PLATFORM=y -CONFIG_BASE_FULL=y -CONFIG_FUTEX=y -CONFIG_EPOLL=y -CONFIG_SIGNALFD=y -CONFIG_TIMERFD=y -CONFIG_EVENTFD=y -CONFIG_SHMEM=y -CONFIG_AIO=y -CONFIG_VM_EVENT_COUNTERS=y -CONFIG_PCI_QUIRKS=y -CONFIG_SLUB_DEBUG=y  # CONFIG_COMPAT_BRK is not set -# CONFIG_SLAB is not set -CONFIG_SLUB=y -# CONFIG_SLOB is not set  CONFIG_PROFILING=y -CONFIG_TRACEPOINTS=y -CONFIG_MARKERS=y -# CONFIG_OPROFILE is not set -CONFIG_HAVE_OPROFILE=y  CONFIG_KPROBES=y -CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y -CONFIG_KRETPROBES=y -CONFIG_HAVE_IOREMAP_PROT=y -CONFIG_HAVE_KPROBES=y -CONFIG_HAVE_KRETPROBES=y -CONFIG_HAVE_ARCH_TRACEHOOK=y -CONFIG_HAVE_DMA_API_DEBUG=y -# CONFIG_SLOW_WORK is not set -# CONFIG_HAVE_GENERIC_DMA_COHERENT is not set -CONFIG_SLABINFO=y -CONFIG_RT_MUTEXES=y -CONFIG_BASE_SMALL=0  CONFIG_MODULES=y -# CONFIG_MODULE_FORCE_LOAD is not set  CONFIG_MODULE_UNLOAD=y  CONFIG_MODULE_FORCE_UNLOAD=y -# CONFIG_MODVERSIONS is not set -# CONFIG_MODULE_SRCVERSION_ALL is not set -CONFIG_STOP_MACHINE=y -CONFIG_BLOCK=y -CONFIG_BLK_DEV_BSG=y -# CONFIG_BLK_DEV_INTEGRITY is not set -CONFIG_BLOCK_COMPAT=y - -# -# IO Schedulers -# -CONFIG_IOSCHED_NOOP=y -CONFIG_IOSCHED_AS=y -CONFIG_IOSCHED_DEADLINE=y -CONFIG_IOSCHED_CFQ=y -# CONFIG_DEFAULT_AS is not set -# CONFIG_DEFAULT_DEADLINE is not set -CONFIG_DEFAULT_CFQ=y -# CONFIG_DEFAULT_NOOP is not set -CONFIG_DEFAULT_IOSCHED="cfq" -CONFIG_FREEZER=y - -# -# Processor type and features -# -CONFIG_TICK_ONESHOT=y  CONFIG_NO_HZ=y  CONFIG_HIGH_RES_TIMERS=y -CONFIG_GENERIC_CLOCKEVENTS_BUILD=y  CONFIG_SMP=y  CONFIG_SPARSE_IRQ=y -CONFIG_X86_MPPARSE=y -CONFIG_X86_EXTENDED_PLATFORM=y -# CONFIG_X86_VSMP is not set -# CONFIG_X86_UV is not set -CONFIG_SCHED_OMIT_FRAME_POINTER=y -# CONFIG_PARAVIRT_GUEST is not set -# CONFIG_MEMTEST is not set -# CONFIG_M386 is not set -# CONFIG_M486 is not set -# CONFIG_M586 is not set -# CONFIG_M586TSC is not set -# CONFIG_M586MMX is not set -# CONFIG_M686 is not set -# CONFIG_MPENTIUMII is not set -# CONFIG_MPENTIUMIII is not set -# CONFIG_MPENTIUMM is not set -# CONFIG_MPENTIUM4 is not set -# CONFIG_MK6 is not set -# CONFIG_MK7 is not set -# CONFIG_MK8 is not set -# CONFIG_MCRUSOE is not set -# CONFIG_MEFFICEON is not set -# CONFIG_MWINCHIPC6 is not set -# CONFIG_MWINCHIP3D is not set -# CONFIG_MGEODEGX1 is not set -# CONFIG_MGEODE_LX is not set -# CONFIG_MCYRIXIII is not set -# CONFIG_MVIAC3_2 is not set -# CONFIG_MVIAC7 is not set -# CONFIG_MPSC is not set -# CONFIG_MCORE2 is not set -CONFIG_GENERIC_CPU=y -CONFIG_X86_CPU=y -CONFIG_X86_L1_CACHE_BYTES=64 -CONFIG_X86_INTERNODE_CACHE_BYTES=64 -CONFIG_X86_CMPXCHG=y -CONFIG_X86_L1_CACHE_SHIFT=6 -CONFIG_X86_WP_WORKS_OK=y -CONFIG_X86_TSC=y -CONFIG_X86_CMPXCHG64=y -CONFIG_X86_CMOV=y -CONFIG_X86_MINIMUM_CPU_FAMILY=64 -CONFIG_X86_DEBUGCTLMSR=y -CONFIG_CPU_SUP_INTEL=y -CONFIG_CPU_SUP_AMD=y -CONFIG_CPU_SUP_CENTAUR=y -CONFIG_X86_DS=y -CONFIG_X86_PTRACE_BTS=y -CONFIG_HPET_TIMER=y -CONFIG_HPET_EMULATE_RTC=y -CONFIG_DMI=y -CONFIG_GART_IOMMU=y  CONFIG_CALGARY_IOMMU=y -CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT=y  CONFIG_AMD_IOMMU=y  CONFIG_AMD_IOMMU_STATS=y -CONFIG_SWIOTLB=y -CONFIG_IOMMU_HELPER=y -CONFIG_IOMMU_API=y -# CONFIG_MAXSMP is not set  CONFIG_NR_CPUS=64  CONFIG_SCHED_SMT=y -CONFIG_SCHED_MC=y -# CONFIG_PREEMPT_NONE is not set  CONFIG_PREEMPT_VOLUNTARY=y -# CONFIG_PREEMPT is not set -CONFIG_X86_LOCAL_APIC=y -CONFIG_X86_IO_APIC=y  CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y  CONFIG_X86_MCE=y -CONFIG_X86_MCE_INTEL=y -CONFIG_X86_MCE_AMD=y -CONFIG_X86_MCE_THRESHOLD=y -# CONFIG_I8K is not set  CONFIG_MICROCODE=y -CONFIG_MICROCODE_INTEL=y  CONFIG_MICROCODE_AMD=y -CONFIG_MICROCODE_OLD_INTERFACE=y  CONFIG_X86_MSR=y  CONFIG_X86_CPUID=y -# CONFIG_X86_CPU_DEBUG is not set -CONFIG_ARCH_PHYS_ADDR_T_64BIT=y -CONFIG_DIRECT_GBPAGES=y  CONFIG_NUMA=y -CONFIG_K8_NUMA=y -CONFIG_X86_64_ACPI_NUMA=y -CONFIG_NODES_SPAN_OTHER_NODES=y -# CONFIG_NUMA_EMU is not set -CONFIG_NODES_SHIFT=6 -CONFIG_ARCH_SPARSEMEM_DEFAULT=y -CONFIG_ARCH_SPARSEMEM_ENABLE=y -CONFIG_ARCH_SELECT_MEMORY_MODEL=y -CONFIG_SELECT_MEMORY_MODEL=y -# CONFIG_FLATMEM_MANUAL is not set -# CONFIG_DISCONTIGMEM_MANUAL is not set -CONFIG_SPARSEMEM_MANUAL=y -CONFIG_SPARSEMEM=y -CONFIG_NEED_MULTIPLE_NODES=y -CONFIG_HAVE_MEMORY_PRESENT=y -CONFIG_SPARSEMEM_EXTREME=y -CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y -CONFIG_SPARSEMEM_VMEMMAP=y - -# -# Memory hotplug is currently incompatible with Software Suspend -# -CONFIG_PAGEFLAGS_EXTENDED=y -CONFIG_SPLIT_PTLOCK_CPUS=4 -CONFIG_MIGRATION=y -CONFIG_PHYS_ADDR_T_64BIT=y -CONFIG_ZONE_DMA_FLAG=1 -CONFIG_BOUNCE=y -CONFIG_VIRT_TO_BUS=y -CONFIG_UNEVICTABLE_LRU=y -CONFIG_HAVE_MLOCK=y -CONFIG_HAVE_MLOCKED_PAGE_BIT=y  CONFIG_X86_CHECK_BIOS_CORRUPTION=y -CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK=y -CONFIG_X86_RESERVE_LOW_64K=y -CONFIG_MTRR=y  # CONFIG_MTRR_SANITIZER is not set -CONFIG_X86_PAT=y  CONFIG_EFI=y -CONFIG_SECCOMP=y -# CONFIG_CC_STACKPROTECTOR is not set -# CONFIG_HZ_100 is not set -# CONFIG_HZ_250 is not set -# CONFIG_HZ_300 is not set  CONFIG_HZ_1000=y -CONFIG_HZ=1000 -CONFIG_SCHED_HRTICK=y  CONFIG_KEXEC=y  CONFIG_CRASH_DUMP=y -# CONFIG_KEXEC_JUMP is not set -CONFIG_PHYSICAL_START=0x1000000 -CONFIG_RELOCATABLE=y -CONFIG_PHYSICAL_ALIGN=0x1000000 -CONFIG_HOTPLUG_CPU=y  # CONFIG_COMPAT_VDSO is not set -# CONFIG_CMDLINE_BOOL is not set -CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y -CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y - -# -# Power management and ACPI options -# -CONFIG_ARCH_HIBERNATION_HEADER=y  CONFIG_PM=y  CONFIG_PM_DEBUG=y -# CONFIG_PM_VERBOSE is not set -CONFIG_CAN_PM_TRACE=y -CONFIG_PM_TRACE=y  CONFIG_PM_TRACE_RTC=y -CONFIG_PM_SLEEP_SMP=y -CONFIG_PM_SLEEP=y -CONFIG_SUSPEND=y -# CONFIG_PM_TEST_SUSPEND is not set -CONFIG_SUSPEND_FREEZER=y  CONFIG_HIBERNATION=y -CONFIG_PM_STD_PARTITION="" -CONFIG_ACPI=y -CONFIG_ACPI_SLEEP=y  CONFIG_ACPI_PROCFS=y -CONFIG_ACPI_PROCFS_POWER=y -CONFIG_ACPI_SYSFS_POWER=y -CONFIG_ACPI_PROC_EVENT=y -CONFIG_ACPI_AC=y -CONFIG_ACPI_BATTERY=y -CONFIG_ACPI_BUTTON=y -CONFIG_ACPI_FAN=y  CONFIG_ACPI_DOCK=y -CONFIG_ACPI_PROCESSOR=y -CONFIG_ACPI_HOTPLUG_CPU=y -CONFIG_ACPI_THERMAL=y -CONFIG_ACPI_NUMA=y -# CONFIG_ACPI_CUSTOM_DSDT is not set -CONFIG_ACPI_BLACKLIST_YEAR=0 -# CONFIG_ACPI_DEBUG is not set -# CONFIG_ACPI_PCI_SLOT is not set -CONFIG_X86_PM_TIMER=y -CONFIG_ACPI_CONTAINER=y -# CONFIG_ACPI_SBS is not set - -# -# CPU Frequency scaling -#  CONFIG_CPU_FREQ=y -CONFIG_CPU_FREQ_TABLE=y  CONFIG_CPU_FREQ_DEBUG=y  # CONFIG_CPU_FREQ_STAT is not set -# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set -# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set  CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y -# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set -# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set  CONFIG_CPU_FREQ_GOV_PERFORMANCE=y -# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set -CONFIG_CPU_FREQ_GOV_USERSPACE=y  CONFIG_CPU_FREQ_GOV_ONDEMAND=y -# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set - -# -# CPUFreq processor drivers -#  CONFIG_X86_ACPI_CPUFREQ=y -# CONFIG_X86_POWERNOW_K8 is not set -# CONFIG_X86_SPEEDSTEP_CENTRINO is not set -# CONFIG_X86_P4_CLOCKMOD is not set - -# -# shared options -# -# CONFIG_X86_SPEEDSTEP_LIB is not set -CONFIG_CPU_IDLE=y -CONFIG_CPU_IDLE_GOV_LADDER=y -CONFIG_CPU_IDLE_GOV_MENU=y - -# -# Memory power savings -# -# CONFIG_I7300_IDLE is not set - -# -# Bus options (PCI etc.) -# -CONFIG_PCI=y -CONFIG_PCI_DIRECT=y  CONFIG_PCI_MMCONFIG=y -CONFIG_PCI_DOMAINS=y  CONFIG_DMAR=y  # CONFIG_DMAR_DEFAULT_ON is not set -CONFIG_DMAR_GFX_WA=y -CONFIG_DMAR_FLOPPY_WA=y -# CONFIG_INTR_REMAP is not set  CONFIG_PCIEPORTBUS=y -# CONFIG_HOTPLUG_PCI_PCIE is not set -CONFIG_PCIEAER=y -# CONFIG_PCIEASPM is not set -CONFIG_ARCH_SUPPORTS_MSI=y -CONFIG_PCI_MSI=y -# CONFIG_PCI_LEGACY is not set -# CONFIG_PCI_DEBUG is not set -# CONFIG_PCI_STUB is not set -CONFIG_HT_IRQ=y -# CONFIG_PCI_IOV is not set -CONFIG_ISA_DMA_API=y -CONFIG_K8_NB=y  CONFIG_PCCARD=y -# CONFIG_PCMCIA_DEBUG is not set -CONFIG_PCMCIA=y -CONFIG_PCMCIA_LOAD_CIS=y -CONFIG_PCMCIA_IOCTL=y -CONFIG_CARDBUS=y - -# -# PC-card bridges -#  CONFIG_YENTA=y -CONFIG_YENTA_O2=y -CONFIG_YENTA_RICOH=y -CONFIG_YENTA_TI=y -CONFIG_YENTA_ENE_TUNE=y -CONFIG_YENTA_TOSHIBA=y -# CONFIG_PD6729 is not set -# CONFIG_I82092 is not set -CONFIG_PCCARD_NONSTATIC=y  CONFIG_HOTPLUG_PCI=y -# CONFIG_HOTPLUG_PCI_FAKE is not set -# CONFIG_HOTPLUG_PCI_ACPI is not set -# CONFIG_HOTPLUG_PCI_CPCI is not set -# CONFIG_HOTPLUG_PCI_SHPC is not set - -# -# Executable file formats / Emulations -# -CONFIG_BINFMT_ELF=y -CONFIG_COMPAT_BINFMT_ELF=y  CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y -# CONFIG_HAVE_AOUT is not set  CONFIG_BINFMT_MISC=y  CONFIG_IA32_EMULATION=y -# CONFIG_IA32_AOUT is not set -CONFIG_COMPAT=y -CONFIG_COMPAT_FOR_U64_ALIGNMENT=y -CONFIG_SYSVIPC_COMPAT=y  CONFIG_NET=y - -# -# Networking options -#  CONFIG_PACKET=y -CONFIG_PACKET_MMAP=y  CONFIG_UNIX=y -CONFIG_XFRM=y  CONFIG_XFRM_USER=y -# CONFIG_XFRM_SUB_POLICY is not set -# CONFIG_XFRM_MIGRATE is not set -# CONFIG_XFRM_STATISTICS is not set -# CONFIG_NET_KEY is not set  CONFIG_INET=y  CONFIG_IP_MULTICAST=y  CONFIG_IP_ADVANCED_ROUTER=y -CONFIG_ASK_IP_FIB_HASH=y -# CONFIG_IP_FIB_TRIE is not set -CONFIG_IP_FIB_HASH=y  CONFIG_IP_MULTIPLE_TABLES=y  CONFIG_IP_ROUTE_MULTIPATH=y  CONFIG_IP_ROUTE_VERBOSE=y @@ -521,118 +91,46 @@ CONFIG_IP_PNP=y  CONFIG_IP_PNP_DHCP=y  CONFIG_IP_PNP_BOOTP=y  CONFIG_IP_PNP_RARP=y -# CONFIG_NET_IPIP is not set -# CONFIG_NET_IPGRE is not set  CONFIG_IP_MROUTE=y  CONFIG_IP_PIMSM_V1=y  CONFIG_IP_PIMSM_V2=y -# CONFIG_ARPD is not set  CONFIG_SYN_COOKIES=y -# CONFIG_INET_AH is not set -# CONFIG_INET_ESP is not set -# CONFIG_INET_IPCOMP is not set -# CONFIG_INET_XFRM_TUNNEL is not set -CONFIG_INET_TUNNEL=y  # CONFIG_INET_XFRM_MODE_TRANSPORT is not set  # CONFIG_INET_XFRM_MODE_TUNNEL is not set  # CONFIG_INET_XFRM_MODE_BEET is not set -CONFIG_INET_LRO=y  # CONFIG_INET_DIAG is not set  CONFIG_TCP_CONG_ADVANCED=y  # CONFIG_TCP_CONG_BIC is not set -CONFIG_TCP_CONG_CUBIC=y  # CONFIG_TCP_CONG_WESTWOOD is not set  # CONFIG_TCP_CONG_HTCP is not set -# CONFIG_TCP_CONG_HSTCP is not set -# CONFIG_TCP_CONG_HYBLA is not set -# CONFIG_TCP_CONG_VEGAS is not set -# CONFIG_TCP_CONG_SCALABLE is not set -# CONFIG_TCP_CONG_LP is not set -# CONFIG_TCP_CONG_VENO is not set -# CONFIG_TCP_CONG_YEAH is not set -# CONFIG_TCP_CONG_ILLINOIS is not set -# CONFIG_DEFAULT_BIC is not set -CONFIG_DEFAULT_CUBIC=y -# CONFIG_DEFAULT_HTCP is not set -# CONFIG_DEFAULT_VEGAS is not set -# CONFIG_DEFAULT_WESTWOOD is not set -# CONFIG_DEFAULT_RENO is not set -CONFIG_DEFAULT_TCP_CONG="cubic"  CONFIG_TCP_MD5SIG=y  CONFIG_IPV6=y -# CONFIG_IPV6_PRIVACY is not set -# CONFIG_IPV6_ROUTER_PREF is not set -# CONFIG_IPV6_OPTIMISTIC_DAD is not set  CONFIG_INET6_AH=y  CONFIG_INET6_ESP=y -# CONFIG_INET6_IPCOMP is not set -# CONFIG_IPV6_MIP6 is not set -# CONFIG_INET6_XFRM_TUNNEL is not set -# CONFIG_INET6_TUNNEL is not set -CONFIG_INET6_XFRM_MODE_TRANSPORT=y -CONFIG_INET6_XFRM_MODE_TUNNEL=y -CONFIG_INET6_XFRM_MODE_BEET=y -# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set -CONFIG_IPV6_SIT=y -CONFIG_IPV6_NDISC_NODETYPE=y -# CONFIG_IPV6_TUNNEL is not set -# CONFIG_IPV6_MULTIPLE_TABLES is not set -# CONFIG_IPV6_MROUTE is not set  CONFIG_NETLABEL=y -CONFIG_NETWORK_SECMARK=y  CONFIG_NETFILTER=y -# CONFIG_NETFILTER_DEBUG is not set  # CONFIG_NETFILTER_ADVANCED is not set - -# -# Core Netfilter Configuration -# -CONFIG_NETFILTER_NETLINK=y -CONFIG_NETFILTER_NETLINK_LOG=y  CONFIG_NF_CONNTRACK=y -CONFIG_NF_CONNTRACK_SECMARK=y  CONFIG_NF_CONNTRACK_FTP=y  CONFIG_NF_CONNTRACK_IRC=y  CONFIG_NF_CONNTRACK_SIP=y  CONFIG_NF_CT_NETLINK=y -CONFIG_NETFILTER_XTABLES=y  CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y -CONFIG_NETFILTER_XT_TARGET_MARK=y  CONFIG_NETFILTER_XT_TARGET_NFLOG=y  CONFIG_NETFILTER_XT_TARGET_SECMARK=y  CONFIG_NETFILTER_XT_TARGET_TCPMSS=y  CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y -CONFIG_NETFILTER_XT_MATCH_MARK=y  CONFIG_NETFILTER_XT_MATCH_POLICY=y  CONFIG_NETFILTER_XT_MATCH_STATE=y -# CONFIG_IP_VS is not set - -# -# IP: Netfilter Configuration -# -CONFIG_NF_DEFRAG_IPV4=y  CONFIG_NF_CONNTRACK_IPV4=y -CONFIG_NF_CONNTRACK_PROC_COMPAT=y  CONFIG_IP_NF_IPTABLES=y  CONFIG_IP_NF_FILTER=y  CONFIG_IP_NF_TARGET_REJECT=y  CONFIG_IP_NF_TARGET_LOG=y  CONFIG_IP_NF_TARGET_ULOG=y  CONFIG_NF_NAT=y -CONFIG_NF_NAT_NEEDED=y  CONFIG_IP_NF_TARGET_MASQUERADE=y -CONFIG_NF_NAT_FTP=y -CONFIG_NF_NAT_IRC=y -# CONFIG_NF_NAT_TFTP is not set -# CONFIG_NF_NAT_AMANDA is not set -# CONFIG_NF_NAT_PPTP is not set -# CONFIG_NF_NAT_H323 is not set -CONFIG_NF_NAT_SIP=y  CONFIG_IP_NF_MANGLE=y - -# -# IPv6: Netfilter Configuration -#  CONFIG_NF_CONNTRACK_IPV6=y  CONFIG_IP6_NF_IPTABLES=y  CONFIG_IP6_NF_MATCH_IPV6HEADER=y @@ -640,1208 +138,111 @@ CONFIG_IP6_NF_TARGET_LOG=y  CONFIG_IP6_NF_FILTER=y  CONFIG_IP6_NF_TARGET_REJECT=y  CONFIG_IP6_NF_MANGLE=y -# CONFIG_IP_DCCP is not set -# CONFIG_IP_SCTP is not set -# CONFIG_TIPC is not set -# CONFIG_ATM is not set -# CONFIG_BRIDGE is not set -# CONFIG_NET_DSA is not set -# CONFIG_VLAN_8021Q is not set -# CONFIG_DECNET is not set -CONFIG_LLC=y -# CONFIG_LLC2 is not set -# CONFIG_IPX is not set -# CONFIG_ATALK is not set -# CONFIG_X25 is not set -# CONFIG_LAPB is not set -# CONFIG_ECONET is not set -# CONFIG_WAN_ROUTER is not set -# CONFIG_PHONET is not set  CONFIG_NET_SCHED=y - -# -# Queueing/Scheduling -# -# CONFIG_NET_SCH_CBQ is not set -# CONFIG_NET_SCH_HTB is not set -# CONFIG_NET_SCH_HFSC is not set -# CONFIG_NET_SCH_PRIO is not set -# CONFIG_NET_SCH_MULTIQ is not set -# CONFIG_NET_SCH_RED is not set -# CONFIG_NET_SCH_SFQ is not set -# CONFIG_NET_SCH_TEQL is not set -# CONFIG_NET_SCH_TBF is not set -# CONFIG_NET_SCH_GRED is not set -# CONFIG_NET_SCH_DSMARK is not set -# CONFIG_NET_SCH_NETEM is not set -# CONFIG_NET_SCH_DRR is not set -# CONFIG_NET_SCH_INGRESS is not set - -# -# Classification -# -CONFIG_NET_CLS=y -# CONFIG_NET_CLS_BASIC is not set -# CONFIG_NET_CLS_TCINDEX is not set -# CONFIG_NET_CLS_ROUTE4 is not set -# CONFIG_NET_CLS_FW is not set -# CONFIG_NET_CLS_U32 is not set -# CONFIG_NET_CLS_RSVP is not set -# CONFIG_NET_CLS_RSVP6 is not set -# CONFIG_NET_CLS_FLOW is not set -# CONFIG_NET_CLS_CGROUP is not set  CONFIG_NET_EMATCH=y -CONFIG_NET_EMATCH_STACK=32 -# CONFIG_NET_EMATCH_CMP is not set -# CONFIG_NET_EMATCH_NBYTE is not set -# CONFIG_NET_EMATCH_U32 is not set -# CONFIG_NET_EMATCH_META is not set -# CONFIG_NET_EMATCH_TEXT is not set  CONFIG_NET_CLS_ACT=y -# CONFIG_NET_ACT_POLICE is not set -# CONFIG_NET_ACT_GACT is not set -# CONFIG_NET_ACT_MIRRED is not set -# CONFIG_NET_ACT_IPT is not set -# CONFIG_NET_ACT_NAT is not set -# CONFIG_NET_ACT_PEDIT is not set -# CONFIG_NET_ACT_SIMP is not set -# CONFIG_NET_ACT_SKBEDIT is not set -CONFIG_NET_SCH_FIFO=y -# CONFIG_DCB is not set - -# -# Network testing -# -# CONFIG_NET_PKTGEN is not set -# CONFIG_NET_TCPPROBE is not set -# CONFIG_NET_DROP_MONITOR is not set  CONFIG_HAMRADIO=y - -# -# Packet Radio protocols -# -# CONFIG_AX25 is not set -# CONFIG_CAN is not set -# CONFIG_IRDA is not set -# CONFIG_BT is not set -# CONFIG_AF_RXRPC is not set -CONFIG_FIB_RULES=y -CONFIG_WIRELESS=y  CONFIG_CFG80211=y -# CONFIG_CFG80211_REG_DEBUG is not set -CONFIG_WIRELESS_OLD_REGULATORY=y -CONFIG_WIRELESS_EXT=y -CONFIG_WIRELESS_EXT_SYSFS=y -# CONFIG_LIB80211 is not set  CONFIG_MAC80211=y - -# -# Rate control algorithm selection -# -CONFIG_MAC80211_RC_MINSTREL=y -# CONFIG_MAC80211_RC_DEFAULT_PID is not set -CONFIG_MAC80211_RC_DEFAULT_MINSTREL=y -CONFIG_MAC80211_RC_DEFAULT="minstrel" -# CONFIG_MAC80211_MESH is not set  CONFIG_MAC80211_LEDS=y -# CONFIG_MAC80211_DEBUGFS is not set -# CONFIG_MAC80211_DEBUG_MENU is not set -# CONFIG_WIMAX is not set  CONFIG_RFKILL=y -# CONFIG_RFKILL_INPUT is not set -CONFIG_RFKILL_LEDS=y -# CONFIG_NET_9P is not set - -# -# Device Drivers -# - -# -# Generic Driver Options -#  CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -CONFIG_STANDALONE=y -CONFIG_PREVENT_FIRMWARE_BUILD=y -CONFIG_FW_LOADER=y -CONFIG_FIRMWARE_IN_KERNEL=y -CONFIG_EXTRA_FIRMWARE="" -# CONFIG_DEBUG_DRIVER is not set  CONFIG_DEBUG_DEVRES=y -# CONFIG_SYS_HYPERVISOR is not set  CONFIG_CONNECTOR=y -CONFIG_PROC_EVENTS=y -# CONFIG_MTD is not set -# CONFIG_PARPORT is not set -CONFIG_PNP=y -CONFIG_PNP_DEBUG_MESSAGES=y - -# -# Protocols -# -CONFIG_PNPACPI=y -CONFIG_BLK_DEV=y -# CONFIG_BLK_DEV_FD is not set -# CONFIG_BLK_CPQ_DA is not set -# CONFIG_BLK_CPQ_CISS_DA is not set -# CONFIG_BLK_DEV_DAC960 is not set -# CONFIG_BLK_DEV_UMEM is not set -# CONFIG_BLK_DEV_COW_COMMON is not set  CONFIG_BLK_DEV_LOOP=y -# CONFIG_BLK_DEV_CRYPTOLOOP is not set -# CONFIG_BLK_DEV_NBD is not set -# CONFIG_BLK_DEV_SX8 is not set -# CONFIG_BLK_DEV_UB is not set  CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_COUNT=16  CONFIG_BLK_DEV_RAM_SIZE=16384 -# CONFIG_BLK_DEV_XIP is not set -# CONFIG_CDROM_PKTCDVD is not set -# CONFIG_ATA_OVER_ETH is not set -# CONFIG_BLK_DEV_HD is not set -CONFIG_MISC_DEVICES=y -# CONFIG_IBM_ASM is not set -# CONFIG_PHANTOM is not set -# CONFIG_SGI_IOC4 is not set -# CONFIG_TIFM_CORE is not set -# CONFIG_ICS932S401 is not set -# CONFIG_ENCLOSURE_SERVICES is not set -# CONFIG_HP_ILO is not set -# CONFIG_ISL29003 is not set -# CONFIG_C2PORT is not set - -# -# EEPROM support -# -# CONFIG_EEPROM_AT24 is not set -# CONFIG_EEPROM_LEGACY is not set -# CONFIG_EEPROM_93CX6 is not set -CONFIG_HAVE_IDE=y -# CONFIG_IDE is not set - -# -# SCSI device support -# -# CONFIG_RAID_ATTRS is not set -CONFIG_SCSI=y -CONFIG_SCSI_DMA=y -# CONFIG_SCSI_TGT is not set -# CONFIG_SCSI_NETLINK is not set -CONFIG_SCSI_PROC_FS=y - -# -# SCSI support type (disk, tape, CD-ROM) -#  CONFIG_BLK_DEV_SD=y -# CONFIG_CHR_DEV_ST is not set -# CONFIG_CHR_DEV_OSST is not set  CONFIG_BLK_DEV_SR=y  CONFIG_BLK_DEV_SR_VENDOR=y  CONFIG_CHR_DEV_SG=y -# CONFIG_CHR_DEV_SCH is not set - -# -# Some SCSI devices (e.g. CD jukebox) support multiple LUNs -# -# CONFIG_SCSI_MULTI_LUN is not set  CONFIG_SCSI_CONSTANTS=y -# CONFIG_SCSI_LOGGING is not set -# CONFIG_SCSI_SCAN_ASYNC is not set -CONFIG_SCSI_WAIT_SCAN=m - -# -# SCSI Transports -#  CONFIG_SCSI_SPI_ATTRS=y -# CONFIG_SCSI_FC_ATTRS is not set -# CONFIG_SCSI_ISCSI_ATTRS is not set -# CONFIG_SCSI_SAS_ATTRS is not set -# CONFIG_SCSI_SAS_LIBSAS is not set -# CONFIG_SCSI_SRP_ATTRS is not set  # CONFIG_SCSI_LOWLEVEL is not set -# CONFIG_SCSI_LOWLEVEL_PCMCIA is not set -# CONFIG_SCSI_DH is not set -# CONFIG_SCSI_OSD_INITIATOR is not set  CONFIG_ATA=y -# CONFIG_ATA_NONSTANDARD is not set -CONFIG_ATA_ACPI=y -CONFIG_SATA_PMP=y  CONFIG_SATA_AHCI=y -# CONFIG_SATA_SIL24 is not set -CONFIG_ATA_SFF=y -# CONFIG_SATA_SVW is not set  CONFIG_ATA_PIIX=y -# CONFIG_SATA_MV is not set -# CONFIG_SATA_NV is not set -# CONFIG_PDC_ADMA is not set -# CONFIG_SATA_QSTOR is not set -# CONFIG_SATA_PROMISE is not set -# CONFIG_SATA_SX4 is not set -# CONFIG_SATA_SIL is not set -# CONFIG_SATA_SIS is not set -# CONFIG_SATA_ULI is not set -# CONFIG_SATA_VIA is not set -# CONFIG_SATA_VITESSE is not set -# CONFIG_SATA_INIC162X is not set -# CONFIG_PATA_ACPI is not set -# CONFIG_PATA_ALI is not set  CONFIG_PATA_AMD=y -# CONFIG_PATA_ARTOP is not set -# CONFIG_PATA_ATIIXP is not set -# CONFIG_PATA_CMD640_PCI is not set -# CONFIG_PATA_CMD64X is not set -# CONFIG_PATA_CS5520 is not set -# CONFIG_PATA_CS5530 is not set -# CONFIG_PATA_CYPRESS is not set -# CONFIG_PATA_EFAR is not set -# CONFIG_ATA_GENERIC is not set -# CONFIG_PATA_HPT366 is not set -# CONFIG_PATA_HPT37X is not set -# CONFIG_PATA_HPT3X2N is not set -# CONFIG_PATA_HPT3X3 is not set -# CONFIG_PATA_IT821X is not set -# CONFIG_PATA_IT8213 is not set -# CONFIG_PATA_JMICRON is not set -# CONFIG_PATA_TRIFLEX is not set -# CONFIG_PATA_MARVELL is not set -# CONFIG_PATA_MPIIX is not set  CONFIG_PATA_OLDPIIX=y -# CONFIG_PATA_NETCELL is not set -# CONFIG_PATA_NINJA32 is not set -# CONFIG_PATA_NS87410 is not set -# CONFIG_PATA_NS87415 is not set -# CONFIG_PATA_OPTI is not set -# CONFIG_PATA_OPTIDMA is not set -# CONFIG_PATA_PCMCIA is not set -# CONFIG_PATA_PDC_OLD is not set -# CONFIG_PATA_RADISYS is not set -# CONFIG_PATA_RZ1000 is not set -# CONFIG_PATA_SC1200 is not set -# CONFIG_PATA_SERVERWORKS is not set -# CONFIG_PATA_PDC2027X is not set -# CONFIG_PATA_SIL680 is not set -# CONFIG_PATA_SIS is not set -# CONFIG_PATA_VIA is not set -# CONFIG_PATA_WINBOND is not set  CONFIG_PATA_SCH=y  CONFIG_MD=y  CONFIG_BLK_DEV_MD=y -CONFIG_MD_AUTODETECT=y -# CONFIG_MD_LINEAR is not set -# CONFIG_MD_RAID0 is not set -# CONFIG_MD_RAID1 is not set -# CONFIG_MD_RAID10 is not set -# CONFIG_MD_RAID456 is not set -# CONFIG_MD_MULTIPATH is not set -# CONFIG_MD_FAULTY is not set  CONFIG_BLK_DEV_DM=y -# CONFIG_DM_DEBUG is not set -# CONFIG_DM_CRYPT is not set -# CONFIG_DM_SNAPSHOT is not set  CONFIG_DM_MIRROR=y  CONFIG_DM_ZERO=y -# CONFIG_DM_MULTIPATH is not set -# CONFIG_DM_DELAY is not set -# CONFIG_DM_UEVENT is not set -# CONFIG_FUSION is not set - -# -# IEEE 1394 (FireWire) support -# - -# -# Enable only one of the two stacks, unless you know what you are doing -# -# CONFIG_FIREWIRE is not set -# CONFIG_IEEE1394 is not set -# CONFIG_I2O is not set  CONFIG_MACINTOSH_DRIVERS=y  CONFIG_MAC_EMUMOUSEBTN=y  CONFIG_NETDEVICES=y -CONFIG_COMPAT_NET_DEV_OPS=y -# CONFIG_IFB is not set -# CONFIG_DUMMY is not set -# CONFIG_BONDING is not set -# CONFIG_MACVLAN is not set -# CONFIG_EQUALIZER is not set -# CONFIG_TUN is not set -# CONFIG_VETH is not set -# CONFIG_NET_SB1000 is not set -# CONFIG_ARCNET is not set -CONFIG_PHYLIB=y - -# -# MII PHY device drivers -# -# CONFIG_MARVELL_PHY is not set -# CONFIG_DAVICOM_PHY is not set -# CONFIG_QSEMI_PHY is not set -# CONFIG_LXT_PHY is not set -# CONFIG_CICADA_PHY is not set -# CONFIG_VITESSE_PHY is not set -# CONFIG_SMSC_PHY is not set -# CONFIG_BROADCOM_PHY is not set -# CONFIG_ICPLUS_PHY is not set -# CONFIG_REALTEK_PHY is not set -# CONFIG_NATIONAL_PHY is not set -# CONFIG_STE10XP is not set -# CONFIG_LSI_ET1011C_PHY is not set -# CONFIG_FIXED_PHY is not set -# CONFIG_MDIO_BITBANG is not set  CONFIG_NET_ETHERNET=y -CONFIG_MII=y -# CONFIG_HAPPYMEAL is not set -# CONFIG_SUNGEM is not set -# CONFIG_CASSINI is not set  CONFIG_NET_VENDOR_3COM=y -# CONFIG_VORTEX is not set -# CONFIG_TYPHOON is not set -# CONFIG_ETHOC is not set -# CONFIG_DNET is not set  CONFIG_NET_TULIP=y -# CONFIG_DE2104X is not set -# CONFIG_TULIP is not set -# CONFIG_DE4X5 is not set -# CONFIG_WINBOND_840 is not set -# CONFIG_DM9102 is not set -# CONFIG_ULI526X is not set -# CONFIG_PCMCIA_XIRCOM is not set -# CONFIG_HP100 is not set -# CONFIG_IBM_NEW_EMAC_ZMII is not set -# CONFIG_IBM_NEW_EMAC_RGMII is not set -# CONFIG_IBM_NEW_EMAC_TAH is not set -# CONFIG_IBM_NEW_EMAC_EMAC4 is not set -# CONFIG_IBM_NEW_EMAC_NO_FLOW_CTRL is not set -# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set -# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set  CONFIG_NET_PCI=y -# CONFIG_PCNET32 is not set -# CONFIG_AMD8111_ETH is not set -# CONFIG_ADAPTEC_STARFIRE is not set -# CONFIG_B44 is not set  CONFIG_FORCEDETH=y -# CONFIG_FORCEDETH_NAPI is not set  CONFIG_E100=y -# CONFIG_FEALNX is not set -# CONFIG_NATSEMI is not set -# CONFIG_NE2K_PCI is not set -# CONFIG_8139CP is not set  CONFIG_8139TOO=y -CONFIG_8139TOO_PIO=y -# CONFIG_8139TOO_TUNE_TWISTER is not set -# CONFIG_8139TOO_8129 is not set -# CONFIG_8139_OLD_RX_RESET is not set -# CONFIG_R6040 is not set -# CONFIG_SIS900 is not set -# CONFIG_EPIC100 is not set -# CONFIG_SMSC9420 is not set -# CONFIG_SUNDANCE is not set -# CONFIG_TLAN is not set -# CONFIG_VIA_RHINE is not set -# CONFIG_SC92031 is not set -# CONFIG_ATL2 is not set -CONFIG_NETDEV_1000=y -# CONFIG_ACENIC is not set -# CONFIG_DL2K is not set  CONFIG_E1000=y -# CONFIG_E1000E is not set -# CONFIG_IP1000 is not set -# CONFIG_IGB is not set -# CONFIG_IGBVF is not set -# CONFIG_NS83820 is not set -# CONFIG_HAMACHI is not set -# CONFIG_YELLOWFIN is not set -# CONFIG_R8169 is not set -# CONFIG_SIS190 is not set -# CONFIG_SKGE is not set  CONFIG_SKY2=y -# CONFIG_SKY2_DEBUG is not set -# CONFIG_VIA_VELOCITY is not set  CONFIG_TIGON3=y -# CONFIG_BNX2 is not set -# CONFIG_QLA3XXX is not set -# CONFIG_ATL1 is not set -# CONFIG_ATL1E is not set -# CONFIG_ATL1C is not set -# CONFIG_JME is not set -CONFIG_NETDEV_10000=y -# CONFIG_CHELSIO_T1 is not set -CONFIG_CHELSIO_T3_DEPENDS=y -# CONFIG_CHELSIO_T3 is not set -# CONFIG_ENIC is not set -# CONFIG_IXGBE is not set -# CONFIG_IXGB is not set -# CONFIG_S2IO is not set -# CONFIG_VXGE is not set -# CONFIG_MYRI10GE is not set -# CONFIG_NETXEN_NIC is not set -# CONFIG_NIU is not set -# CONFIG_MLX4_EN is not set -# CONFIG_MLX4_CORE is not set -# CONFIG_TEHUTI is not set -# CONFIG_BNX2X is not set -# CONFIG_QLGE is not set -# CONFIG_SFC is not set -# CONFIG_BE2NET is not set  CONFIG_TR=y -# CONFIG_IBMOL is not set -# CONFIG_3C359 is not set -# CONFIG_TMS380TR is not set - -# -# Wireless LAN -# -# CONFIG_WLAN_PRE80211 is not set -CONFIG_WLAN_80211=y -# CONFIG_PCMCIA_RAYCS is not set -# CONFIG_LIBERTAS is not set -# CONFIG_LIBERTAS_THINFIRM is not set -# CONFIG_AIRO is not set -# CONFIG_ATMEL is not set -# CONFIG_AT76C50X_USB is not set -# CONFIG_AIRO_CS is not set -# CONFIG_PCMCIA_WL3501 is not set -# CONFIG_PRISM54 is not set -# CONFIG_USB_ZD1201 is not set -# CONFIG_USB_NET_RNDIS_WLAN is not set -# CONFIG_RTL8180 is not set -# CONFIG_RTL8187 is not set -# CONFIG_ADM8211 is not set -# CONFIG_MAC80211_HWSIM is not set -# CONFIG_MWL8K is not set -# CONFIG_P54_COMMON is not set -CONFIG_ATH5K=y -# CONFIG_ATH5K_DEBUG is not set -# CONFIG_ATH9K is not set -# CONFIG_AR9170_USB is not set -# CONFIG_IPW2100 is not set -# CONFIG_IPW2200 is not set -# CONFIG_IWLWIFI is not set -# CONFIG_HOSTAP is not set -# CONFIG_B43 is not set -# CONFIG_B43LEGACY is not set -# CONFIG_ZD1211RW is not set -# CONFIG_RT2X00 is not set -# CONFIG_HERMES is not set - -# -# Enable WiMAX (Networking options) to see the WiMAX drivers -# - -# -# USB Network Adapters -# -# CONFIG_USB_CATC is not set -# CONFIG_USB_KAWETH is not set -# CONFIG_USB_PEGASUS is not set -# CONFIG_USB_RTL8150 is not set -# CONFIG_USB_USBNET is not set -# CONFIG_USB_HSO is not set  CONFIG_NET_PCMCIA=y -# CONFIG_PCMCIA_3C589 is not set -# CONFIG_PCMCIA_3C574 is not set -# CONFIG_PCMCIA_FMVJ18X is not set -# CONFIG_PCMCIA_PCNET is not set -# CONFIG_PCMCIA_NMCLAN is not set -# CONFIG_PCMCIA_SMC91C92 is not set -# CONFIG_PCMCIA_XIRC2PS is not set -# CONFIG_PCMCIA_AXNET is not set -# CONFIG_PCMCIA_IBMTR is not set -# CONFIG_WAN is not set  CONFIG_FDDI=y -# CONFIG_DEFXX is not set -# CONFIG_SKFP is not set -# CONFIG_HIPPI is not set -# CONFIG_PPP is not set -# CONFIG_SLIP is not set -# CONFIG_NET_FC is not set  CONFIG_NETCONSOLE=y -# CONFIG_NETCONSOLE_DYNAMIC is not set -CONFIG_NETPOLL=y -# CONFIG_NETPOLL_TRAP is not set -CONFIG_NET_POLL_CONTROLLER=y -# CONFIG_ISDN is not set -# CONFIG_PHONE is not set - -# -# Input device support -# -CONFIG_INPUT=y -CONFIG_INPUT_FF_MEMLESS=y  CONFIG_INPUT_POLLDEV=y - -# -# Userland interfaces -# -CONFIG_INPUT_MOUSEDEV=y  # CONFIG_INPUT_MOUSEDEV_PSAUX is not set -CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 -CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 -# CONFIG_INPUT_JOYDEV is not set  CONFIG_INPUT_EVDEV=y -# CONFIG_INPUT_EVBUG is not set - -# -# Input Device Drivers -# -CONFIG_INPUT_KEYBOARD=y -CONFIG_KEYBOARD_ATKBD=y -# CONFIG_KEYBOARD_SUNKBD is not set -# CONFIG_KEYBOARD_LKKBD is not set -# CONFIG_KEYBOARD_XTKBD is not set -# CONFIG_KEYBOARD_NEWTON is not set -# CONFIG_KEYBOARD_STOWAWAY is not set -CONFIG_INPUT_MOUSE=y -CONFIG_MOUSE_PS2=y -CONFIG_MOUSE_PS2_ALPS=y -CONFIG_MOUSE_PS2_LOGIPS2PP=y -CONFIG_MOUSE_PS2_SYNAPTICS=y -CONFIG_MOUSE_PS2_LIFEBOOK=y -CONFIG_MOUSE_PS2_TRACKPOINT=y -# CONFIG_MOUSE_PS2_ELANTECH is not set -# CONFIG_MOUSE_PS2_TOUCHKIT is not set -# CONFIG_MOUSE_SERIAL is not set -# CONFIG_MOUSE_APPLETOUCH is not set -# CONFIG_MOUSE_BCM5974 is not set -# CONFIG_MOUSE_VSXXXAA is not set  CONFIG_INPUT_JOYSTICK=y -# CONFIG_JOYSTICK_ANALOG is not set -# CONFIG_JOYSTICK_A3D is not set -# CONFIG_JOYSTICK_ADI is not set -# CONFIG_JOYSTICK_COBRA is not set -# CONFIG_JOYSTICK_GF2K is not set -# CONFIG_JOYSTICK_GRIP is not set -# CONFIG_JOYSTICK_GRIP_MP is not set -# CONFIG_JOYSTICK_GUILLEMOT is not set -# CONFIG_JOYSTICK_INTERACT is not set -# CONFIG_JOYSTICK_SIDEWINDER is not set -# CONFIG_JOYSTICK_TMDC is not set -# CONFIG_JOYSTICK_IFORCE is not set -# CONFIG_JOYSTICK_WARRIOR is not set -# CONFIG_JOYSTICK_MAGELLAN is not set -# CONFIG_JOYSTICK_SPACEORB is not set -# CONFIG_JOYSTICK_SPACEBALL is not set -# CONFIG_JOYSTICK_STINGER is not set -# CONFIG_JOYSTICK_TWIDJOY is not set -# CONFIG_JOYSTICK_ZHENHUA is not set -# CONFIG_JOYSTICK_JOYDUMP is not set -# CONFIG_JOYSTICK_XPAD is not set  CONFIG_INPUT_TABLET=y -# CONFIG_TABLET_USB_ACECAD is not set -# CONFIG_TABLET_USB_AIPTEK is not set -# CONFIG_TABLET_USB_GTCO is not set -# CONFIG_TABLET_USB_KBTAB is not set -# CONFIG_TABLET_USB_WACOM is not set  CONFIG_INPUT_TOUCHSCREEN=y -# CONFIG_TOUCHSCREEN_AD7879_I2C is not set -# CONFIG_TOUCHSCREEN_AD7879 is not set -# CONFIG_TOUCHSCREEN_FUJITSU is not set -# CONFIG_TOUCHSCREEN_GUNZE is not set -# CONFIG_TOUCHSCREEN_ELO is not set -# CONFIG_TOUCHSCREEN_WACOM_W8001 is not set -# CONFIG_TOUCHSCREEN_MTOUCH is not set -# CONFIG_TOUCHSCREEN_INEXIO is not set -# CONFIG_TOUCHSCREEN_MK712 is not set -# CONFIG_TOUCHSCREEN_PENMOUNT is not set -# CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set -# CONFIG_TOUCHSCREEN_TOUCHWIN is not set -# CONFIG_TOUCHSCREEN_USB_COMPOSITE is not set -# CONFIG_TOUCHSCREEN_TOUCHIT213 is not set -# CONFIG_TOUCHSCREEN_TSC2007 is not set  CONFIG_INPUT_MISC=y -# CONFIG_INPUT_PCSPKR is not set -# CONFIG_INPUT_APANEL is not set -# CONFIG_INPUT_ATLAS_BTNS is not set -# CONFIG_INPUT_ATI_REMOTE is not set -# CONFIG_INPUT_ATI_REMOTE2 is not set -# CONFIG_INPUT_KEYSPAN_REMOTE is not set -# CONFIG_INPUT_POWERMATE is not set -# CONFIG_INPUT_YEALINK is not set -# CONFIG_INPUT_CM109 is not set -# CONFIG_INPUT_UINPUT is not set - -# -# Hardware I/O ports -# -CONFIG_SERIO=y -CONFIG_SERIO_I8042=y -CONFIG_SERIO_SERPORT=y -# CONFIG_SERIO_CT82C710 is not set -# CONFIG_SERIO_PCIPS2 is not set -CONFIG_SERIO_LIBPS2=y -# CONFIG_SERIO_RAW is not set -# CONFIG_GAMEPORT is not set - -# -# Character devices -# -CONFIG_VT=y -CONFIG_CONSOLE_TRANSLATIONS=y -CONFIG_VT_CONSOLE=y -CONFIG_HW_CONSOLE=y  CONFIG_VT_HW_CONSOLE_BINDING=y -CONFIG_DEVKMEM=y  CONFIG_SERIAL_NONSTANDARD=y -# CONFIG_COMPUTONE is not set -# CONFIG_ROCKETPORT is not set -# CONFIG_CYCLADES is not set -# CONFIG_DIGIEPCA is not set -# CONFIG_MOXA_INTELLIO is not set -# CONFIG_MOXA_SMARTIO is not set -# CONFIG_ISI is not set -# CONFIG_SYNCLINK is not set -# CONFIG_SYNCLINKMP is not set -# CONFIG_SYNCLINK_GT is not set -# CONFIG_N_HDLC is not set -# CONFIG_RISCOM8 is not set -# CONFIG_SPECIALIX is not set -# CONFIG_SX is not set -# CONFIG_RIO is not set -# CONFIG_STALDRV is not set -# CONFIG_NOZOMI is not set - -# -# Serial drivers -#  CONFIG_SERIAL_8250=y  CONFIG_SERIAL_8250_CONSOLE=y -CONFIG_FIX_EARLYCON_MEM=y -CONFIG_SERIAL_8250_PCI=y -CONFIG_SERIAL_8250_PNP=y -# CONFIG_SERIAL_8250_CS is not set  CONFIG_SERIAL_8250_NR_UARTS=32 -CONFIG_SERIAL_8250_RUNTIME_UARTS=4  CONFIG_SERIAL_8250_EXTENDED=y  CONFIG_SERIAL_8250_MANY_PORTS=y  CONFIG_SERIAL_8250_SHARE_IRQ=y  CONFIG_SERIAL_8250_DETECT_IRQ=y  CONFIG_SERIAL_8250_RSA=y - -# -# Non-8250 serial port support -# -CONFIG_SERIAL_CORE=y -CONFIG_SERIAL_CORE_CONSOLE=y -# CONFIG_SERIAL_JSM is not set -CONFIG_UNIX98_PTYS=y -# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set  # CONFIG_LEGACY_PTYS is not set -# CONFIG_IPMI_HANDLER is not set  CONFIG_HW_RANDOM=y -# CONFIG_HW_RANDOM_TIMERIOMEM is not set  # CONFIG_HW_RANDOM_INTEL is not set  # CONFIG_HW_RANDOM_AMD is not set  CONFIG_NVRAM=y -# CONFIG_R3964 is not set -# CONFIG_APPLICOM is not set - -# -# PCMCIA character devices -# -# CONFIG_SYNCLINK_CS is not set -# CONFIG_CARDMAN_4000 is not set -# CONFIG_CARDMAN_4040 is not set -# CONFIG_IPWIRELESS is not set -# CONFIG_MWAVE is not set -# CONFIG_PC8736x_GPIO is not set -# CONFIG_RAW_DRIVER is not set  CONFIG_HPET=y  # CONFIG_HPET_MMAP is not set -# CONFIG_HANGCHECK_TIMER is not set -# CONFIG_TCG_TPM is not set -# CONFIG_TELCLOCK is not set -CONFIG_DEVPORT=y -CONFIG_I2C=y -CONFIG_I2C_BOARDINFO=y -# CONFIG_I2C_CHARDEV is not set -CONFIG_I2C_HELPER_AUTO=y -CONFIG_I2C_ALGOBIT=y - -# -# I2C Hardware Bus support -# - -# -# PC SMBus host controller drivers -# -# CONFIG_I2C_ALI1535 is not set -# CONFIG_I2C_ALI1563 is not set -# CONFIG_I2C_ALI15X3 is not set -# CONFIG_I2C_AMD756 is not set -# CONFIG_I2C_AMD8111 is not set  CONFIG_I2C_I801=y -# CONFIG_I2C_ISCH is not set -# CONFIG_I2C_PIIX4 is not set -# CONFIG_I2C_NFORCE2 is not set -# CONFIG_I2C_SIS5595 is not set -# CONFIG_I2C_SIS630 is not set -# CONFIG_I2C_SIS96X is not set -# CONFIG_I2C_VIA is not set -# CONFIG_I2C_VIAPRO is not set - -# -# I2C system bus drivers (mostly embedded / system-on-chip) -# -# CONFIG_I2C_OCORES is not set -# CONFIG_I2C_SIMTEC is not set - -# -# External I2C/SMBus adapter drivers -# -# CONFIG_I2C_PARPORT_LIGHT is not set -# CONFIG_I2C_TAOS_EVM is not set -# CONFIG_I2C_TINY_USB is not set - -# -# Graphics adapter I2C/DDC channel drivers -# -# CONFIG_I2C_VOODOO3 is not set - -# -# Other I2C/SMBus bus drivers -# -# CONFIG_I2C_PCA_PLATFORM is not set -# CONFIG_I2C_STUB is not set - -# -# Miscellaneous I2C Chip support -# -# CONFIG_DS1682 is not set -# CONFIG_SENSORS_PCF8574 is not set -# CONFIG_PCF8575 is not set -# CONFIG_SENSORS_PCA9539 is not set -# CONFIG_SENSORS_MAX6875 is not set -# CONFIG_SENSORS_TSL2550 is not set -# CONFIG_I2C_DEBUG_CORE is not set -# CONFIG_I2C_DEBUG_ALGO is not set -# CONFIG_I2C_DEBUG_BUS is not set -# CONFIG_I2C_DEBUG_CHIP is not set -# CONFIG_SPI is not set -CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y -# CONFIG_GPIOLIB is not set -# CONFIG_W1 is not set -CONFIG_POWER_SUPPLY=y -# CONFIG_POWER_SUPPLY_DEBUG is not set -# CONFIG_PDA_POWER is not set -# CONFIG_BATTERY_DS2760 is not set -# CONFIG_BATTERY_BQ27x00 is not set -CONFIG_HWMON=y -# CONFIG_HWMON_VID is not set -# CONFIG_SENSORS_ABITUGURU is not set -# CONFIG_SENSORS_ABITUGURU3 is not set -# CONFIG_SENSORS_AD7414 is not set -# CONFIG_SENSORS_AD7418 is not set -# CONFIG_SENSORS_ADM1021 is not set -# CONFIG_SENSORS_ADM1025 is not set -# CONFIG_SENSORS_ADM1026 is not set -# CONFIG_SENSORS_ADM1029 is not set -# CONFIG_SENSORS_ADM1031 is not set -# CONFIG_SENSORS_ADM9240 is not set -# CONFIG_SENSORS_ADT7462 is not set -# CONFIG_SENSORS_ADT7470 is not set -# CONFIG_SENSORS_ADT7473 is not set -# CONFIG_SENSORS_ADT7475 is not set -# CONFIG_SENSORS_K8TEMP is not set -# CONFIG_SENSORS_ASB100 is not set -# CONFIG_SENSORS_ATK0110 is not set -# CONFIG_SENSORS_ATXP1 is not set -# CONFIG_SENSORS_DS1621 is not set -# CONFIG_SENSORS_I5K_AMB is not set -# CONFIG_SENSORS_F71805F is not set -# CONFIG_SENSORS_F71882FG is not set -# CONFIG_SENSORS_F75375S is not set -# CONFIG_SENSORS_FSCHER is not set -# CONFIG_SENSORS_FSCPOS is not set -# CONFIG_SENSORS_FSCHMD is not set -# CONFIG_SENSORS_G760A is not set -# CONFIG_SENSORS_GL518SM is not set -# CONFIG_SENSORS_GL520SM is not set -# CONFIG_SENSORS_CORETEMP is not set -# CONFIG_SENSORS_IT87 is not set -# CONFIG_SENSORS_LM63 is not set -# CONFIG_SENSORS_LM75 is not set -# CONFIG_SENSORS_LM77 is not set -# CONFIG_SENSORS_LM78 is not set -# CONFIG_SENSORS_LM80 is not set -# CONFIG_SENSORS_LM83 is not set -# CONFIG_SENSORS_LM85 is not set -# CONFIG_SENSORS_LM87 is not set -# CONFIG_SENSORS_LM90 is not set -# CONFIG_SENSORS_LM92 is not set -# CONFIG_SENSORS_LM93 is not set -# CONFIG_SENSORS_LTC4215 is not set -# CONFIG_SENSORS_LTC4245 is not set -# CONFIG_SENSORS_LM95241 is not set -# CONFIG_SENSORS_MAX1619 is not set -# CONFIG_SENSORS_MAX6650 is not set -# CONFIG_SENSORS_PC87360 is not set -# CONFIG_SENSORS_PC87427 is not set -# CONFIG_SENSORS_PCF8591 is not set -# CONFIG_SENSORS_SIS5595 is not set -# CONFIG_SENSORS_DME1737 is not set -# CONFIG_SENSORS_SMSC47M1 is not set -# CONFIG_SENSORS_SMSC47M192 is not set -# CONFIG_SENSORS_SMSC47B397 is not set -# CONFIG_SENSORS_ADS7828 is not set -# CONFIG_SENSORS_THMC50 is not set -# CONFIG_SENSORS_VIA686A is not set -# CONFIG_SENSORS_VT1211 is not set -# CONFIG_SENSORS_VT8231 is not set -# CONFIG_SENSORS_W83781D is not set -# CONFIG_SENSORS_W83791D is not set -# CONFIG_SENSORS_W83792D is not set -# CONFIG_SENSORS_W83793 is not set -# CONFIG_SENSORS_W83L785TS is not set -# CONFIG_SENSORS_W83L786NG is not set -# CONFIG_SENSORS_W83627HF is not set -# CONFIG_SENSORS_W83627EHF is not set -# CONFIG_SENSORS_HDAPS is not set -# CONFIG_SENSORS_LIS3LV02D is not set -# CONFIG_SENSORS_APPLESMC is not set -# CONFIG_HWMON_DEBUG_CHIP is not set -CONFIG_THERMAL=y -# CONFIG_THERMAL_HWMON is not set  CONFIG_WATCHDOG=y -# CONFIG_WATCHDOG_NOWAYOUT is not set - -# -# Watchdog Device Drivers -# -# CONFIG_SOFT_WATCHDOG is not set -# CONFIG_ACQUIRE_WDT is not set -# CONFIG_ADVANTECH_WDT is not set -# CONFIG_ALIM1535_WDT is not set -# CONFIG_ALIM7101_WDT is not set -# CONFIG_SC520_WDT is not set -# CONFIG_EUROTECH_WDT is not set -# CONFIG_IB700_WDT is not set -# CONFIG_IBMASR is not set -# CONFIG_WAFER_WDT is not set -# CONFIG_I6300ESB_WDT is not set -# CONFIG_ITCO_WDT is not set -# CONFIG_IT8712F_WDT is not set -# CONFIG_IT87_WDT is not set -# CONFIG_HP_WATCHDOG is not set -# CONFIG_SC1200_WDT is not set -# CONFIG_PC87413_WDT is not set -# CONFIG_60XX_WDT is not set -# CONFIG_SBC8360_WDT is not set -# CONFIG_CPU5_WDT is not set -# CONFIG_SMSC_SCH311X_WDT is not set -# CONFIG_SMSC37B787_WDT is not set -# CONFIG_W83627HF_WDT is not set -# CONFIG_W83697HF_WDT is not set -# CONFIG_W83697UG_WDT is not set -# CONFIG_W83877F_WDT is not set -# CONFIG_W83977F_WDT is not set -# CONFIG_MACHZ_WDT is not set -# CONFIG_SBC_EPX_C3_WATCHDOG is not set - -# -# PCI-based Watchdog Cards -# -# CONFIG_PCIPCWATCHDOG is not set -# CONFIG_WDTPCI is not set - -# -# USB-based Watchdog Cards -# -# CONFIG_USBPCWATCHDOG is not set -CONFIG_SSB_POSSIBLE=y - -# -# Sonics Silicon Backplane -# -# CONFIG_SSB is not set - -# -# Multifunction device drivers -# -# CONFIG_MFD_CORE is not set -# CONFIG_MFD_SM501 is not set -# CONFIG_HTC_PASIC3 is not set -# CONFIG_TWL4030_CORE is not set -# CONFIG_MFD_TMIO is not set -# CONFIG_PMIC_DA903X is not set -# CONFIG_MFD_WM8400 is not set -# CONFIG_MFD_WM8350_I2C is not set -# CONFIG_MFD_PCF50633 is not set -# CONFIG_REGULATOR is not set - -# -# Multimedia devices -# - -# -# Multimedia core support -# -# CONFIG_VIDEO_DEV is not set -# CONFIG_DVB_CORE is not set -# CONFIG_VIDEO_MEDIA is not set - -# -# Multimedia drivers -# -CONFIG_DAB=y -# CONFIG_USB_DABUSB is not set - -# -# Graphics support -#  CONFIG_AGP=y  CONFIG_AGP_AMD64=y  CONFIG_AGP_INTEL=y -# CONFIG_AGP_SIS is not set -# CONFIG_AGP_VIA is not set  CONFIG_DRM=y -# CONFIG_DRM_TDFX is not set -# CONFIG_DRM_R128 is not set -# CONFIG_DRM_RADEON is not set -# CONFIG_DRM_I810 is not set -# CONFIG_DRM_I830 is not set  CONFIG_DRM_I915=y  CONFIG_DRM_I915_KMS=y -# CONFIG_DRM_MGA is not set -# CONFIG_DRM_SIS is not set -# CONFIG_DRM_VIA is not set -# CONFIG_DRM_SAVAGE is not set -# CONFIG_VGASTATE is not set -# CONFIG_VIDEO_OUTPUT_CONTROL is not set -CONFIG_FB=y -# CONFIG_FIRMWARE_EDID is not set -# CONFIG_FB_DDC is not set -# CONFIG_FB_BOOT_VESA_SUPPORT is not set -CONFIG_FB_CFB_FILLRECT=y -CONFIG_FB_CFB_COPYAREA=y -CONFIG_FB_CFB_IMAGEBLIT=y -# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set -# CONFIG_FB_SYS_FILLRECT is not set -# CONFIG_FB_SYS_COPYAREA is not set -# CONFIG_FB_SYS_IMAGEBLIT is not set -# CONFIG_FB_FOREIGN_ENDIAN is not set -# CONFIG_FB_SYS_FOPS is not set -# CONFIG_FB_SVGALIB is not set -# CONFIG_FB_MACMODES is not set -# CONFIG_FB_BACKLIGHT is not set  CONFIG_FB_MODE_HELPERS=y  CONFIG_FB_TILEBLITTING=y - -# -# Frame buffer hardware drivers -# -# CONFIG_FB_CIRRUS is not set -# CONFIG_FB_PM2 is not set -# CONFIG_FB_CYBER2000 is not set -# CONFIG_FB_ARC is not set -# CONFIG_FB_ASILIANT is not set -# CONFIG_FB_IMSTT is not set -# CONFIG_FB_VGA16 is not set -# CONFIG_FB_UVESA is not set -# CONFIG_FB_VESA is not set  CONFIG_FB_EFI=y -# CONFIG_FB_N411 is not set -# CONFIG_FB_HGA is not set -# CONFIG_FB_S1D13XXX is not set -# CONFIG_FB_NVIDIA is not set -# CONFIG_FB_RIVA is not set -# CONFIG_FB_LE80578 is not set -# CONFIG_FB_INTEL is not set -# CONFIG_FB_MATROX is not set -# CONFIG_FB_RADEON is not set -# CONFIG_FB_ATY128 is not set -# CONFIG_FB_ATY is not set -# CONFIG_FB_S3 is not set -# CONFIG_FB_SAVAGE is not set -# CONFIG_FB_SIS is not set -# CONFIG_FB_VIA is not set -# CONFIG_FB_NEOMAGIC is not set -# CONFIG_FB_KYRO is not set -# CONFIG_FB_3DFX is not set -# CONFIG_FB_VOODOO1 is not set -# CONFIG_FB_VT8623 is not set -# CONFIG_FB_TRIDENT is not set -# CONFIG_FB_ARK is not set -# CONFIG_FB_PM3 is not set -# CONFIG_FB_CARMINE is not set -# CONFIG_FB_GEODE is not set -# CONFIG_FB_VIRTUAL is not set -# CONFIG_FB_METRONOME is not set -# CONFIG_FB_MB862XX is not set -# CONFIG_FB_BROADSHEET is not set  CONFIG_BACKLIGHT_LCD_SUPPORT=y  # CONFIG_LCD_CLASS_DEVICE is not set -CONFIG_BACKLIGHT_CLASS_DEVICE=y -CONFIG_BACKLIGHT_GENERIC=y -# CONFIG_BACKLIGHT_PROGEAR is not set -# CONFIG_BACKLIGHT_MBP_NVIDIA is not set -# CONFIG_BACKLIGHT_SAHARA is not set - -# -# Display device support -# -# CONFIG_DISPLAY_SUPPORT is not set - -# -# Console display driver support -# -CONFIG_VGA_CONSOLE=y  CONFIG_VGACON_SOFT_SCROLLBACK=y -CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=64 -CONFIG_DUMMY_CONSOLE=y -# CONFIG_FRAMEBUFFER_CONSOLE is not set  CONFIG_LOGO=y  # CONFIG_LOGO_LINUX_MONO is not set  # CONFIG_LOGO_LINUX_VGA16 is not set -CONFIG_LOGO_LINUX_CLUT224=y  CONFIG_SOUND=y -CONFIG_SOUND_OSS_CORE=y  CONFIG_SND=y -CONFIG_SND_TIMER=y -CONFIG_SND_PCM=y -CONFIG_SND_HWDEP=y -CONFIG_SND_JACK=y  CONFIG_SND_SEQUENCER=y  CONFIG_SND_SEQ_DUMMY=y -CONFIG_SND_OSSEMUL=y  CONFIG_SND_MIXER_OSS=y  CONFIG_SND_PCM_OSS=y -CONFIG_SND_PCM_OSS_PLUGINS=y  CONFIG_SND_SEQUENCER_OSS=y  CONFIG_SND_HRTIMER=y -CONFIG_SND_SEQ_HRTIMER_DEFAULT=y -CONFIG_SND_DYNAMIC_MINORS=y -CONFIG_SND_SUPPORT_OLD_API=y -CONFIG_SND_VERBOSE_PROCFS=y -# CONFIG_SND_VERBOSE_PRINTK is not set -# CONFIG_SND_DEBUG is not set -CONFIG_SND_VMASTER=y -CONFIG_SND_DRIVERS=y -# CONFIG_SND_PCSP is not set -# CONFIG_SND_DUMMY is not set -# CONFIG_SND_VIRMIDI is not set -# CONFIG_SND_MTPAV is not set -# CONFIG_SND_SERIAL_U16550 is not set -# CONFIG_SND_MPU401 is not set -CONFIG_SND_PCI=y -# CONFIG_SND_AD1889 is not set -# CONFIG_SND_ALS300 is not set -# CONFIG_SND_ALS4000 is not set -# CONFIG_SND_ALI5451 is not set -# CONFIG_SND_ATIIXP is not set -# CONFIG_SND_ATIIXP_MODEM is not set -# CONFIG_SND_AU8810 is not set -# CONFIG_SND_AU8820 is not set -# CONFIG_SND_AU8830 is not set -# CONFIG_SND_AW2 is not set -# CONFIG_SND_AZT3328 is not set -# CONFIG_SND_BT87X is not set -# CONFIG_SND_CA0106 is not set -# CONFIG_SND_CMIPCI is not set -# CONFIG_SND_OXYGEN is not set -# CONFIG_SND_CS4281 is not set -# CONFIG_SND_CS46XX is not set -# CONFIG_SND_CS5530 is not set -# CONFIG_SND_DARLA20 is not set -# CONFIG_SND_GINA20 is not set -# CONFIG_SND_LAYLA20 is not set -# CONFIG_SND_DARLA24 is not set -# CONFIG_SND_GINA24 is not set -# CONFIG_SND_LAYLA24 is not set -# CONFIG_SND_MONA is not set -# CONFIG_SND_MIA is not set -# CONFIG_SND_ECHO3G is not set -# CONFIG_SND_INDIGO is not set -# CONFIG_SND_INDIGOIO is not set -# CONFIG_SND_INDIGODJ is not set -# CONFIG_SND_INDIGOIOX is not set -# CONFIG_SND_INDIGODJX is not set -# CONFIG_SND_EMU10K1 is not set -# CONFIG_SND_EMU10K1X is not set -# CONFIG_SND_ENS1370 is not set -# CONFIG_SND_ENS1371 is not set -# CONFIG_SND_ES1938 is not set -# CONFIG_SND_ES1968 is not set -# CONFIG_SND_FM801 is not set  CONFIG_SND_HDA_INTEL=y  CONFIG_SND_HDA_HWDEP=y -# CONFIG_SND_HDA_RECONFIG is not set -# CONFIG_SND_HDA_INPUT_BEEP is not set -CONFIG_SND_HDA_CODEC_REALTEK=y -CONFIG_SND_HDA_CODEC_ANALOG=y -CONFIG_SND_HDA_CODEC_SIGMATEL=y -CONFIG_SND_HDA_CODEC_VIA=y -CONFIG_SND_HDA_CODEC_ATIHDMI=y -CONFIG_SND_HDA_CODEC_NVHDMI=y -CONFIG_SND_HDA_CODEC_INTELHDMI=y -CONFIG_SND_HDA_ELD=y -CONFIG_SND_HDA_CODEC_CONEXANT=y -CONFIG_SND_HDA_CODEC_CMEDIA=y -CONFIG_SND_HDA_CODEC_SI3054=y -CONFIG_SND_HDA_GENERIC=y -# CONFIG_SND_HDA_POWER_SAVE is not set -# CONFIG_SND_HDSP is not set -# CONFIG_SND_HDSPM is not set -# CONFIG_SND_HIFIER is not set -# CONFIG_SND_ICE1712 is not set -# CONFIG_SND_ICE1724 is not set -# CONFIG_SND_INTEL8X0 is not set -# CONFIG_SND_INTEL8X0M is not set -# CONFIG_SND_KORG1212 is not set -# CONFIG_SND_MAESTRO3 is not set -# CONFIG_SND_MIXART is not set -# CONFIG_SND_NM256 is not set -# CONFIG_SND_PCXHR is not set -# CONFIG_SND_RIPTIDE is not set -# CONFIG_SND_RME32 is not set -# CONFIG_SND_RME96 is not set -# CONFIG_SND_RME9652 is not set -# CONFIG_SND_SONICVIBES is not set -# CONFIG_SND_TRIDENT is not set -# CONFIG_SND_VIA82XX is not set -# CONFIG_SND_VIA82XX_MODEM is not set -# CONFIG_SND_VIRTUOSO is not set -# CONFIG_SND_VX222 is not set -# CONFIG_SND_YMFPCI is not set -CONFIG_SND_USB=y -# CONFIG_SND_USB_AUDIO is not set -# CONFIG_SND_USB_USX2Y is not set -# CONFIG_SND_USB_CAIAQ is not set -# CONFIG_SND_USB_US122L is not set -CONFIG_SND_PCMCIA=y -# CONFIG_SND_VXPOCKET is not set -# CONFIG_SND_PDAUDIOCF is not set -# CONFIG_SND_SOC is not set -# CONFIG_SOUND_PRIME is not set -CONFIG_HID_SUPPORT=y -CONFIG_HID=y -CONFIG_HID_DEBUG=y  CONFIG_HIDRAW=y - -# -# USB Input Devices -# -CONFIG_USB_HID=y  CONFIG_HID_PID=y  CONFIG_USB_HIDDEV=y - -# -# Special HID drivers -# -CONFIG_HID_A4TECH=y -CONFIG_HID_APPLE=y -CONFIG_HID_BELKIN=y -CONFIG_HID_CHERRY=y -CONFIG_HID_CHICONY=y -CONFIG_HID_CYPRESS=y -# CONFIG_DRAGONRISE_FF is not set -CONFIG_HID_EZKEY=y -CONFIG_HID_KYE=y  CONFIG_HID_GYRATION=y -CONFIG_HID_KENSINGTON=y -CONFIG_HID_LOGITECH=y  CONFIG_LOGITECH_FF=y -# CONFIG_LOGIRUMBLEPAD2_FF is not set -CONFIG_HID_MICROSOFT=y -CONFIG_HID_MONTEREY=y  CONFIG_HID_NTRIG=y  CONFIG_HID_PANTHERLORD=y  CONFIG_PANTHERLORD_FF=y @@ -1849,697 +250,90 @@ CONFIG_HID_PETALYNX=y  CONFIG_HID_SAMSUNG=y  CONFIG_HID_SONY=y  CONFIG_HID_SUNPLUS=y -# CONFIG_GREENASIA_FF is not set  CONFIG_HID_TOPSEED=y -CONFIG_THRUSTMASTER_FF=y -CONFIG_ZEROPLUS_FF=y -CONFIG_USB_SUPPORT=y -CONFIG_USB_ARCH_HAS_HCD=y -CONFIG_USB_ARCH_HAS_OHCI=y -CONFIG_USB_ARCH_HAS_EHCI=y  CONFIG_USB=y  CONFIG_USB_DEBUG=y  CONFIG_USB_ANNOUNCE_NEW_DEVICES=y - -# -# Miscellaneous USB options -#  CONFIG_USB_DEVICEFS=y  # CONFIG_USB_DEVICE_CLASS is not set -# CONFIG_USB_DYNAMIC_MINORS is not set -CONFIG_USB_SUSPEND=y -# CONFIG_USB_OTG is not set  CONFIG_USB_MON=y -# CONFIG_USB_WUSB is not set -# CONFIG_USB_WUSB_CBAF is not set - -# -# USB Host Controller Drivers -# -# CONFIG_USB_C67X00_HCD is not set  CONFIG_USB_EHCI_HCD=y -# CONFIG_USB_EHCI_ROOT_HUB_TT is not set  # CONFIG_USB_EHCI_TT_NEWSCHED is not set -# CONFIG_USB_OXU210HP_HCD is not set -# CONFIG_USB_ISP116X_HCD is not set -# CONFIG_USB_ISP1760_HCD is not set  CONFIG_USB_OHCI_HCD=y -# CONFIG_USB_OHCI_BIG_ENDIAN_DESC is not set -# CONFIG_USB_OHCI_BIG_ENDIAN_MMIO is not set -CONFIG_USB_OHCI_LITTLE_ENDIAN=y  CONFIG_USB_UHCI_HCD=y -# CONFIG_USB_SL811_HCD is not set -# CONFIG_USB_R8A66597_HCD is not set -# CONFIG_USB_WHCI_HCD is not set -# CONFIG_USB_HWA_HCD is not set - -# -# USB Device Class drivers -# -# CONFIG_USB_ACM is not set  CONFIG_USB_PRINTER=y -# CONFIG_USB_WDM is not set -# CONFIG_USB_TMC is not set - -# -# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may -# - -# -# also be needed; see USB_STORAGE Help for more info -#  CONFIG_USB_STORAGE=y -# CONFIG_USB_STORAGE_DEBUG is not set -# CONFIG_USB_STORAGE_DATAFAB is not set -# CONFIG_USB_STORAGE_FREECOM is not set -# CONFIG_USB_STORAGE_ISD200 is not set -# CONFIG_USB_STORAGE_USBAT is not set -# CONFIG_USB_STORAGE_SDDR09 is not set -# CONFIG_USB_STORAGE_SDDR55 is not set -# CONFIG_USB_STORAGE_JUMPSHOT is not set -# CONFIG_USB_STORAGE_ALAUDA is not set -# CONFIG_USB_STORAGE_ONETOUCH is not set -# CONFIG_USB_STORAGE_KARMA is not set -# CONFIG_USB_STORAGE_CYPRESS_ATACB is not set  CONFIG_USB_LIBUSUAL=y - -# -# USB Imaging devices -# -# CONFIG_USB_MDC800 is not set -# CONFIG_USB_MICROTEK is not set - -# -# USB port drivers -# -# CONFIG_USB_SERIAL is not set - -# -# USB Miscellaneous drivers -# -# CONFIG_USB_EMI62 is not set -# CONFIG_USB_EMI26 is not set -# CONFIG_USB_ADUTUX is not set -# CONFIG_USB_SEVSEG is not set -# CONFIG_USB_RIO500 is not set -# CONFIG_USB_LEGOTOWER is not set -# CONFIG_USB_LCD is not set -# CONFIG_USB_BERRY_CHARGE is not set -# CONFIG_USB_LED is not set -# CONFIG_USB_CYPRESS_CY7C63 is not set -# CONFIG_USB_CYTHERM is not set -# CONFIG_USB_IDMOUSE is not set -# CONFIG_USB_FTDI_ELAN is not set -# CONFIG_USB_APPLEDISPLAY is not set -# CONFIG_USB_SISUSBVGA is not set -# CONFIG_USB_LD is not set -# CONFIG_USB_TRANCEVIBRATOR is not set -# CONFIG_USB_IOWARRIOR is not set -# CONFIG_USB_TEST is not set -# CONFIG_USB_ISIGHTFW is not set -# CONFIG_USB_VST is not set -# CONFIG_USB_GADGET is not set - -# -# OTG and related infrastructure -# -# CONFIG_NOP_USB_XCEIV is not set -# CONFIG_UWB is not set -# CONFIG_MMC is not set -# CONFIG_MEMSTICK is not set -CONFIG_NEW_LEDS=y -CONFIG_LEDS_CLASS=y - -# -# LED drivers -# -# CONFIG_LEDS_ALIX2 is not set -# CONFIG_LEDS_PCA9532 is not set -# CONFIG_LEDS_LP5521 is not set -# CONFIG_LEDS_CLEVO_MAIL is not set -# CONFIG_LEDS_PCA955X is not set -# CONFIG_LEDS_BD2802 is not set - -# -# LED Triggers -# -CONFIG_LEDS_TRIGGERS=y -# CONFIG_LEDS_TRIGGER_TIMER is not set -# CONFIG_LEDS_TRIGGER_HEARTBEAT is not set -# CONFIG_LEDS_TRIGGER_BACKLIGHT is not set -# CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set - -# -# iptables trigger is under Netfilter config (LED target) -# -# CONFIG_ACCESSIBILITY is not set -# CONFIG_INFINIBAND is not set  CONFIG_EDAC=y - -# -# Reporting subsystems -# -# CONFIG_EDAC_DEBUG is not set -# CONFIG_EDAC_MM_EDAC is not set -CONFIG_RTC_LIB=y  CONFIG_RTC_CLASS=y  # CONFIG_RTC_HCTOSYS is not set -# CONFIG_RTC_DEBUG is not set - -# -# RTC interfaces -# -CONFIG_RTC_INTF_SYSFS=y -CONFIG_RTC_INTF_PROC=y -CONFIG_RTC_INTF_DEV=y -# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set -# CONFIG_RTC_DRV_TEST is not set - -# -# I2C RTC drivers -# -# CONFIG_RTC_DRV_DS1307 is not set -# CONFIG_RTC_DRV_DS1374 is not set -# CONFIG_RTC_DRV_DS1672 is not set -# CONFIG_RTC_DRV_MAX6900 is not set -# CONFIG_RTC_DRV_RS5C372 is not set -# CONFIG_RTC_DRV_ISL1208 is not set -# CONFIG_RTC_DRV_X1205 is not set -# CONFIG_RTC_DRV_PCF8563 is not set -# CONFIG_RTC_DRV_PCF8583 is not set -# CONFIG_RTC_DRV_M41T80 is not set -# CONFIG_RTC_DRV_S35390A is not set -# CONFIG_RTC_DRV_FM3130 is not set -# CONFIG_RTC_DRV_RX8581 is not set - -# -# SPI RTC drivers -# - -# -# Platform RTC drivers -# -CONFIG_RTC_DRV_CMOS=y -# CONFIG_RTC_DRV_DS1286 is not set -# CONFIG_RTC_DRV_DS1511 is not set -# CONFIG_RTC_DRV_DS1553 is not set -# CONFIG_RTC_DRV_DS1742 is not set -# CONFIG_RTC_DRV_STK17TA8 is not set -# CONFIG_RTC_DRV_M48T86 is not set -# CONFIG_RTC_DRV_M48T35 is not set -# CONFIG_RTC_DRV_M48T59 is not set -# CONFIG_RTC_DRV_BQ4802 is not set -# CONFIG_RTC_DRV_V3020 is not set - -# -# on-CPU RTC drivers -#  CONFIG_DMADEVICES=y - -# -# DMA Devices -# -# CONFIG_INTEL_IOATDMA is not set -# CONFIG_AUXDISPLAY is not set -# CONFIG_UIO is not set -# CONFIG_STAGING is not set -CONFIG_X86_PLATFORM_DEVICES=y -# CONFIG_ACER_WMI is not set -# CONFIG_ASUS_LAPTOP is not set -# CONFIG_FUJITSU_LAPTOP is not set -# CONFIG_MSI_LAPTOP is not set -# CONFIG_PANASONIC_LAPTOP is not set -# CONFIG_COMPAL_LAPTOP is not set -# CONFIG_SONY_LAPTOP is not set -# CONFIG_THINKPAD_ACPI is not set -# CONFIG_INTEL_MENLOW is not set  CONFIG_EEEPC_LAPTOP=y -# CONFIG_ACPI_WMI is not set -# CONFIG_ACPI_ASUS is not set -# CONFIG_ACPI_TOSHIBA is not set - -# -# Firmware Drivers -# -# CONFIG_EDD is not set -CONFIG_FIRMWARE_MEMMAP=y  CONFIG_EFI_VARS=y -# CONFIG_DELL_RBU is not set -# CONFIG_DCDBAS is not set -CONFIG_DMIID=y -# CONFIG_ISCSI_IBFT_FIND is not set - -# -# File systems -# -# CONFIG_EXT2_FS is not set  CONFIG_EXT3_FS=y  # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_XATTR=y  CONFIG_EXT3_FS_POSIX_ACL=y  CONFIG_EXT3_FS_SECURITY=y -# CONFIG_EXT4_FS is not set -CONFIG_JBD=y -# CONFIG_JBD_DEBUG is not set -CONFIG_FS_MBCACHE=y -# CONFIG_REISERFS_FS is not set -# CONFIG_JFS_FS is not set -CONFIG_FS_POSIX_ACL=y -CONFIG_FILE_LOCKING=y -# CONFIG_XFS_FS is not set -# CONFIG_GFS2_FS is not set -# CONFIG_OCFS2_FS is not set -# CONFIG_BTRFS_FS is not set -CONFIG_DNOTIFY=y -CONFIG_INOTIFY=y -CONFIG_INOTIFY_USER=y  CONFIG_QUOTA=y  CONFIG_QUOTA_NETLINK_INTERFACE=y  # CONFIG_PRINT_QUOTA_WARNING is not set -CONFIG_QUOTA_TREE=y -# CONFIG_QFMT_V1 is not set  CONFIG_QFMT_V2=y -CONFIG_QUOTACTL=y -# CONFIG_AUTOFS_FS is not set  CONFIG_AUTOFS4_FS=y -# CONFIG_FUSE_FS is not set -CONFIG_GENERIC_ACL=y - -# -# Caches -# -# CONFIG_FSCACHE is not set - -# -# CD-ROM/DVD Filesystems -#  CONFIG_ISO9660_FS=y  CONFIG_JOLIET=y  CONFIG_ZISOFS=y -# CONFIG_UDF_FS is not set - -# -# DOS/FAT/NT Filesystems -# -CONFIG_FAT_FS=y  CONFIG_MSDOS_FS=y  CONFIG_VFAT_FS=y -CONFIG_FAT_DEFAULT_CODEPAGE=437 -CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" -# CONFIG_NTFS_FS is not set - -# -# Pseudo filesystems -# -CONFIG_PROC_FS=y  CONFIG_PROC_KCORE=y -CONFIG_PROC_VMCORE=y -CONFIG_PROC_SYSCTL=y -CONFIG_PROC_PAGE_MONITOR=y -CONFIG_SYSFS=y  CONFIG_TMPFS=y  CONFIG_TMPFS_POSIX_ACL=y  CONFIG_HUGETLBFS=y -CONFIG_HUGETLB_PAGE=y -# CONFIG_CONFIGFS_FS is not set -CONFIG_MISC_FILESYSTEMS=y -# CONFIG_ADFS_FS is not set -# CONFIG_AFFS_FS is not set -# CONFIG_ECRYPT_FS is not set -# CONFIG_HFS_FS is not set -# CONFIG_HFSPLUS_FS is not set -# CONFIG_BEFS_FS is not set -# CONFIG_BFS_FS is not set -# CONFIG_EFS_FS is not set -# CONFIG_CRAMFS is not set -# CONFIG_SQUASHFS is not set -# CONFIG_VXFS_FS is not set -# CONFIG_MINIX_FS is not set -# CONFIG_OMFS_FS is not set -# CONFIG_HPFS_FS is not set -# CONFIG_QNX4FS_FS is not set -# CONFIG_ROMFS_FS is not set -# CONFIG_SYSV_FS is not set -# CONFIG_UFS_FS is not set -# CONFIG_NILFS2_FS is not set -CONFIG_NETWORK_FILESYSTEMS=y  CONFIG_NFS_FS=y  CONFIG_NFS_V3=y  CONFIG_NFS_V3_ACL=y  CONFIG_NFS_V4=y  CONFIG_ROOT_NFS=y -# CONFIG_NFSD is not set -CONFIG_LOCKD=y -CONFIG_LOCKD_V4=y -CONFIG_NFS_ACL_SUPPORT=y -CONFIG_NFS_COMMON=y -CONFIG_SUNRPC=y -CONFIG_SUNRPC_GSS=y -CONFIG_RPCSEC_GSS_KRB5=y -# CONFIG_RPCSEC_GSS_SPKM3 is not set -# CONFIG_SMB_FS is not set -# CONFIG_CIFS is not set -# CONFIG_NCP_FS is not set -# CONFIG_CODA_FS is not set -# CONFIG_AFS_FS is not set - -# -# Partition Types -#  CONFIG_PARTITION_ADVANCED=y -# CONFIG_ACORN_PARTITION is not set  CONFIG_OSF_PARTITION=y  CONFIG_AMIGA_PARTITION=y -# CONFIG_ATARI_PARTITION is not set  CONFIG_MAC_PARTITION=y -CONFIG_MSDOS_PARTITION=y  CONFIG_BSD_DISKLABEL=y  CONFIG_MINIX_SUBPARTITION=y  CONFIG_SOLARIS_X86_PARTITION=y  CONFIG_UNIXWARE_DISKLABEL=y -# CONFIG_LDM_PARTITION is not set  CONFIG_SGI_PARTITION=y -# CONFIG_ULTRIX_PARTITION is not set  CONFIG_SUN_PARTITION=y  CONFIG_KARMA_PARTITION=y  CONFIG_EFI_PARTITION=y -# CONFIG_SYSV68_PARTITION is not set -CONFIG_NLS=y  CONFIG_NLS_DEFAULT="utf8"  CONFIG_NLS_CODEPAGE_437=y -# CONFIG_NLS_CODEPAGE_737 is not set -# CONFIG_NLS_CODEPAGE_775 is not set -# CONFIG_NLS_CODEPAGE_850 is not set -# CONFIG_NLS_CODEPAGE_852 is not set -# CONFIG_NLS_CODEPAGE_855 is not set -# CONFIG_NLS_CODEPAGE_857 is not set -# CONFIG_NLS_CODEPAGE_860 is not set -# CONFIG_NLS_CODEPAGE_861 is not set -# CONFIG_NLS_CODEPAGE_862 is not set -# CONFIG_NLS_CODEPAGE_863 is not set -# CONFIG_NLS_CODEPAGE_864 is not set -# CONFIG_NLS_CODEPAGE_865 is not set -# CONFIG_NLS_CODEPAGE_866 is not set -# CONFIG_NLS_CODEPAGE_869 is not set -# CONFIG_NLS_CODEPAGE_936 is not set -# CONFIG_NLS_CODEPAGE_950 is not set -# CONFIG_NLS_CODEPAGE_932 is not set -# CONFIG_NLS_CODEPAGE_949 is not set -# CONFIG_NLS_CODEPAGE_874 is not set -# CONFIG_NLS_ISO8859_8 is not set -# CONFIG_NLS_CODEPAGE_1250 is not set -# CONFIG_NLS_CODEPAGE_1251 is not set  CONFIG_NLS_ASCII=y  CONFIG_NLS_ISO8859_1=y -# CONFIG_NLS_ISO8859_2 is not set -# CONFIG_NLS_ISO8859_3 is not set -# CONFIG_NLS_ISO8859_4 is not set -# CONFIG_NLS_ISO8859_5 is not set -# CONFIG_NLS_ISO8859_6 is not set -# CONFIG_NLS_ISO8859_7 is not set -# CONFIG_NLS_ISO8859_9 is not set -# CONFIG_NLS_ISO8859_13 is not set -# CONFIG_NLS_ISO8859_14 is not set -# CONFIG_NLS_ISO8859_15 is not set -# CONFIG_NLS_KOI8_R is not set -# CONFIG_NLS_KOI8_U is not set  CONFIG_NLS_UTF8=y -# CONFIG_DLM is not set - -# -# Kernel hacking -# -CONFIG_TRACE_IRQFLAGS_SUPPORT=y  CONFIG_PRINTK_TIME=y  # CONFIG_ENABLE_WARN_DEPRECATED is not set -CONFIG_ENABLE_MUST_CHECK=y -CONFIG_FRAME_WARN=2048  CONFIG_MAGIC_SYSRQ=y  # CONFIG_UNUSED_SYMBOLS is not set -CONFIG_DEBUG_FS=y -# CONFIG_HEADERS_CHECK is not set  CONFIG_DEBUG_KERNEL=y -# CONFIG_DEBUG_SHIRQ is not set -# CONFIG_DETECT_SOFTLOCKUP is not set -# CONFIG_DETECT_HUNG_TASK is not set  # CONFIG_SCHED_DEBUG is not set  CONFIG_SCHEDSTATS=y  CONFIG_TIMER_STATS=y -# CONFIG_DEBUG_OBJECTS is not set -# CONFIG_SLUB_DEBUG_ON is not set -# CONFIG_SLUB_STATS is not set -# CONFIG_DEBUG_RT_MUTEXES is not set -# CONFIG_RT_MUTEX_TESTER is not set -# CONFIG_DEBUG_SPINLOCK is not set -# CONFIG_DEBUG_MUTEXES is not set -# CONFIG_DEBUG_LOCK_ALLOC is not set -# CONFIG_PROVE_LOCKING is not set -# CONFIG_LOCK_STAT is not set -# CONFIG_DEBUG_SPINLOCK_SLEEP is not set -# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set -CONFIG_STACKTRACE=y -# CONFIG_DEBUG_KOBJECT is not set -CONFIG_DEBUG_BUGVERBOSE=y -# CONFIG_DEBUG_INFO is not set -# CONFIG_DEBUG_VM is not set -# CONFIG_DEBUG_VIRTUAL is not set -# CONFIG_DEBUG_WRITECOUNT is not set -CONFIG_DEBUG_MEMORY_INIT=y -# CONFIG_DEBUG_LIST is not set -# CONFIG_DEBUG_SG is not set -# CONFIG_DEBUG_NOTIFIERS is not set -CONFIG_ARCH_WANT_FRAME_POINTERS=y -CONFIG_FRAME_POINTER=y -# CONFIG_BOOT_PRINTK_DELAY is not set -# CONFIG_RCU_TORTURE_TEST is not set  # CONFIG_RCU_CPU_STALL_DETECTOR is not set -# CONFIG_KPROBES_SANITY_TEST is not set -# CONFIG_BACKTRACE_SELF_TEST is not set -# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set -# CONFIG_LKDTM is not set -# CONFIG_FAULT_INJECTION is not set -# CONFIG_LATENCYTOP is not set  CONFIG_SYSCTL_SYSCALL_CHECK=y -# CONFIG_DEBUG_PAGEALLOC is not set -CONFIG_USER_STACKTRACE_SUPPORT=y -CONFIG_NOP_TRACER=y -CONFIG_HAVE_FUNCTION_TRACER=y -CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y -CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y -CONFIG_HAVE_DYNAMIC_FTRACE=y -CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y -CONFIG_HAVE_HW_BRANCH_TRACER=y -CONFIG_HAVE_SYSCALL_TRACEPOINTS=y -CONFIG_RING_BUFFER=y -CONFIG_TRACING=y -CONFIG_TRACING_SUPPORT=y - -# -# Tracers -# -# CONFIG_FUNCTION_TRACER is not set -# CONFIG_IRQSOFF_TRACER is not set -# CONFIG_SYSPROF_TRACER is not set -# CONFIG_SCHED_TRACER is not set -# CONFIG_CONTEXT_SWITCH_TRACER is not set -# CONFIG_EVENT_TRACER is not set -# CONFIG_FTRACE_SYSCALLS is not set -# CONFIG_BOOT_TRACER is not set -# CONFIG_TRACE_BRANCH_PROFILING is not set -# CONFIG_POWER_TRACER is not set -# CONFIG_STACK_TRACER is not set -# CONFIG_HW_BRANCH_TRACER is not set -# CONFIG_KMEMTRACE is not set -# CONFIG_WORKQUEUE_TRACER is not set  CONFIG_BLK_DEV_IO_TRACE=y -# CONFIG_FTRACE_STARTUP_TEST is not set -# CONFIG_MMIOTRACE is not set  CONFIG_PROVIDE_OHCI1394_DMA_INIT=y -# CONFIG_DYNAMIC_DEBUG is not set -# CONFIG_DMA_API_DEBUG is not set -# CONFIG_SAMPLES is not set -CONFIG_HAVE_ARCH_KGDB=y -# CONFIG_KGDB is not set -# CONFIG_STRICT_DEVMEM is not set -CONFIG_X86_VERBOSE_BOOTUP=y -CONFIG_EARLY_PRINTK=y  CONFIG_EARLY_PRINTK_DBGP=y  CONFIG_DEBUG_STACKOVERFLOW=y  CONFIG_DEBUG_STACK_USAGE=y -# CONFIG_DEBUG_PER_CPU_MAPS is not set -# CONFIG_X86_PTDUMP is not set -CONFIG_DEBUG_RODATA=y  # CONFIG_DEBUG_RODATA_TEST is not set  CONFIG_DEBUG_NX_TEST=m -# CONFIG_IOMMU_DEBUG is not set -CONFIG_HAVE_MMIOTRACE_SUPPORT=y -CONFIG_IO_DELAY_TYPE_0X80=0 -CONFIG_IO_DELAY_TYPE_0XED=1 -CONFIG_IO_DELAY_TYPE_UDELAY=2 -CONFIG_IO_DELAY_TYPE_NONE=3 -CONFIG_IO_DELAY_0X80=y -# CONFIG_IO_DELAY_0XED is not set -# CONFIG_IO_DELAY_UDELAY is not set -# CONFIG_IO_DELAY_NONE is not set -CONFIG_DEFAULT_IO_DELAY_TYPE=0  CONFIG_DEBUG_BOOT_PARAMS=y -# CONFIG_CPA_DEBUG is not set  CONFIG_OPTIMIZE_INLINING=y - -# -# Security options -# -CONFIG_KEYS=y  CONFIG_KEYS_DEBUG_PROC_KEYS=y  CONFIG_SECURITY=y -# CONFIG_SECURITYFS is not set  CONFIG_SECURITY_NETWORK=y -# CONFIG_SECURITY_NETWORK_XFRM is not set -# CONFIG_SECURITY_PATH is not set -CONFIG_SECURITY_FILE_CAPABILITIES=y -# CONFIG_SECURITY_ROOTPLUG is not set -CONFIG_SECURITY_DEFAULT_MMAP_MIN_ADDR=65536  CONFIG_SECURITY_SELINUX=y  CONFIG_SECURITY_SELINUX_BOOTPARAM=y -CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=1  CONFIG_SECURITY_SELINUX_DISABLE=y -CONFIG_SECURITY_SELINUX_DEVELOP=y -CONFIG_SECURITY_SELINUX_AVC_STATS=y -CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1 -# CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX is not set -# CONFIG_SECURITY_SMACK is not set -# CONFIG_SECURITY_TOMOYO is not set -# CONFIG_IMA is not set -CONFIG_CRYPTO=y - -# -# Crypto core or helper -# -# CONFIG_CRYPTO_FIPS is not set -CONFIG_CRYPTO_ALGAPI=y -CONFIG_CRYPTO_ALGAPI2=y -CONFIG_CRYPTO_AEAD=y -CONFIG_CRYPTO_AEAD2=y -CONFIG_CRYPTO_BLKCIPHER=y -CONFIG_CRYPTO_BLKCIPHER2=y -CONFIG_CRYPTO_HASH=y -CONFIG_CRYPTO_HASH2=y -CONFIG_CRYPTO_RNG2=y -CONFIG_CRYPTO_PCOMP=y -CONFIG_CRYPTO_MANAGER=y -CONFIG_CRYPTO_MANAGER2=y -# CONFIG_CRYPTO_GF128MUL is not set -# CONFIG_CRYPTO_NULL is not set -CONFIG_CRYPTO_WORKQUEUE=y -# CONFIG_CRYPTO_CRYPTD is not set -CONFIG_CRYPTO_AUTHENC=y -# CONFIG_CRYPTO_TEST is not set - -# -# Authenticated Encryption with Associated Data -# -# CONFIG_CRYPTO_CCM is not set -# CONFIG_CRYPTO_GCM is not set -# CONFIG_CRYPTO_SEQIV is not set - -# -# Block modes -# -CONFIG_CRYPTO_CBC=y -# CONFIG_CRYPTO_CTR is not set -# CONFIG_CRYPTO_CTS is not set -CONFIG_CRYPTO_ECB=y -# CONFIG_CRYPTO_LRW is not set -# CONFIG_CRYPTO_PCBC is not set -# CONFIG_CRYPTO_XTS is not set - -# -# Hash modes -# -CONFIG_CRYPTO_HMAC=y -# CONFIG_CRYPTO_XCBC is not set - -# -# Digest -# -# CONFIG_CRYPTO_CRC32C is not set -# CONFIG_CRYPTO_CRC32C_INTEL is not set -# CONFIG_CRYPTO_MD4 is not set -CONFIG_CRYPTO_MD5=y -# CONFIG_CRYPTO_MICHAEL_MIC is not set -# CONFIG_CRYPTO_RMD128 is not set -# CONFIG_CRYPTO_RMD160 is not set -# CONFIG_CRYPTO_RMD256 is not set -# CONFIG_CRYPTO_RMD320 is not set -CONFIG_CRYPTO_SHA1=y -# CONFIG_CRYPTO_SHA256 is not set -# CONFIG_CRYPTO_SHA512 is not set -# CONFIG_CRYPTO_TGR192 is not set -# CONFIG_CRYPTO_WP512 is not set - -# -# Ciphers -# -CONFIG_CRYPTO_AES=y -# CONFIG_CRYPTO_AES_X86_64 is not set -# CONFIG_CRYPTO_AES_NI_INTEL is not set -# CONFIG_CRYPTO_ANUBIS is not set -CONFIG_CRYPTO_ARC4=y -# CONFIG_CRYPTO_BLOWFISH is not set -# CONFIG_CRYPTO_CAMELLIA is not set -# CONFIG_CRYPTO_CAST5 is not set -# CONFIG_CRYPTO_CAST6 is not set -CONFIG_CRYPTO_DES=y -# CONFIG_CRYPTO_FCRYPT is not set -# CONFIG_CRYPTO_KHAZAD is not set -# CONFIG_CRYPTO_SALSA20 is not set -# CONFIG_CRYPTO_SALSA20_X86_64 is not set -# CONFIG_CRYPTO_SEED is not set -# CONFIG_CRYPTO_SERPENT is not set -# CONFIG_CRYPTO_TEA is not set -# CONFIG_CRYPTO_TWOFISH is not set -# CONFIG_CRYPTO_TWOFISH_X86_64 is not set - -# -# Compression -# -# CONFIG_CRYPTO_DEFLATE is not set -# CONFIG_CRYPTO_ZLIB is not set -# CONFIG_CRYPTO_LZO is not set - -# -# Random Number Generation -#  # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRYPTO_HW=y -# CONFIG_CRYPTO_DEV_HIFN_795X is not set -CONFIG_HAVE_KVM=y -CONFIG_HAVE_KVM_IRQCHIP=y -CONFIG_VIRTUALIZATION=y -# CONFIG_KVM is not set -# CONFIG_VIRTIO_PCI is not set -# CONFIG_VIRTIO_BALLOON is not set -CONFIG_BINARY_PRINTF=y - -# -# Library routines -# -CONFIG_BITREVERSE=y -CONFIG_GENERIC_FIND_FIRST_BIT=y -CONFIG_GENERIC_FIND_NEXT_BIT=y -CONFIG_GENERIC_FIND_LAST_BIT=y -# CONFIG_CRC_CCITT is not set -# CONFIG_CRC16 is not set  CONFIG_CRC_T10DIF=y -# CONFIG_CRC_ITU_T is not set -CONFIG_CRC32=y -# CONFIG_CRC7 is not set -# CONFIG_LIBCRC32C is not set -CONFIG_ZLIB_INFLATE=y -CONFIG_DECOMPRESS_GZIP=y -CONFIG_DECOMPRESS_BZIP2=y -CONFIG_DECOMPRESS_LZMA=y -CONFIG_HAS_IOMEM=y -CONFIG_HAS_IOPORT=y -CONFIG_HAS_DMA=y -CONFIG_NLATTR=y diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index e790bc1fbfa3..b86feabed69b 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -842,4 +842,7 @@ ia32_sys_call_table:  	.quad compat_sys_rt_tgsigqueueinfo	/* 335 */  	.quad sys_perf_event_open  	.quad compat_sys_recvmmsg +	.quad sys_fanotify_init +	.quad sys32_fanotify_mark +	.quad sys_prlimit64		/* 340 */  ia32_syscall_end: diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index 626be156d88d..849813f398e7 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -51,7 +51,7 @@  #define AA(__x)		((unsigned long)(__x)) -asmlinkage long sys32_truncate64(char __user *filename, +asmlinkage long sys32_truncate64(const char __user *filename,  				 unsigned long offset_low,  				 unsigned long offset_high)  { @@ -96,7 +96,7 @@ static int cp_stat64(struct stat64 __user *ubuf, struct kstat *stat)  	return 0;  } -asmlinkage long sys32_stat64(char __user *filename, +asmlinkage long sys32_stat64(const char __user *filename,  			     struct stat64 __user *statbuf)  {  	struct kstat stat; @@ -107,7 +107,7 @@ asmlinkage long sys32_stat64(char __user *filename,  	return ret;  } -asmlinkage long sys32_lstat64(char __user *filename, +asmlinkage long sys32_lstat64(const char __user *filename,  			      struct stat64 __user *statbuf)  {  	struct kstat stat; @@ -126,7 +126,7 @@ asmlinkage long sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf)  	return ret;  } -asmlinkage long sys32_fstatat(unsigned int dfd, char __user *filename, +asmlinkage long sys32_fstatat(unsigned int dfd, const char __user *filename,  			      struct stat64 __user *statbuf, int flag)  {  	struct kstat stat; @@ -408,8 +408,8 @@ asmlinkage long sys32_pread(unsigned int fd, char __user *ubuf, u32 count,  			 ((loff_t)AA(poshi) << 32) | AA(poslo));  } -asmlinkage long sys32_pwrite(unsigned int fd, char __user *ubuf, u32 count, -			     u32 poslo, u32 poshi) +asmlinkage long sys32_pwrite(unsigned int fd, const char __user *ubuf, +			     u32 count, u32 poslo, u32 poshi)  {  	return sys_pwrite64(fd, ubuf, count,  			  ((loff_t)AA(poshi) << 32) | AA(poslo)); @@ -449,7 +449,7 @@ asmlinkage long sys32_sendfile(int out_fd, int in_fd,  	return ret;  } -asmlinkage long sys32_execve(char __user *name, compat_uptr_t __user *argv, +asmlinkage long sys32_execve(const char __user *name, compat_uptr_t __user *argv,  			     compat_uptr_t __user *envp, struct pt_regs *regs)  {  	long error; @@ -546,3 +546,12 @@ asmlinkage long sys32_fallocate(int fd, int mode, unsigned offset_lo,  	return sys_fallocate(fd, mode, ((u64)offset_hi << 32) | offset_lo,  			     ((u64)len_hi << 32) | len_lo);  } + +asmlinkage long sys32_fanotify_mark(int fanotify_fd, unsigned int flags, +				    u32 mask_lo, u32 mask_hi, +				    int fd, const char  __user *pathname) +{ +	return sys_fanotify_mark(fanotify_fd, flags, +				 ((u64)mask_hi << 32) | mask_lo, +				 fd, pathname); +} diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 493092efaa3b..6fa90a845e4c 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -3,24 +3,23 @@ include include/asm-generic/Kbuild.asm  header-y += boot.h  header-y += bootparam.h  header-y += debugreg.h +header-y += e820.h +header-y += hw_breakpoint.h +header-y += hyperv.h +header-y += ist.h  header-y += ldt.h +header-y += mce.h  header-y += msr-index.h +header-y += msr.h +header-y += mtrr.h +header-y += posix_types_32.h +header-y += posix_types_64.h  header-y += prctl.h +header-y += processor-flags.h  header-y += ptrace-abi.h  header-y += sigcontext32.h  header-y += ucontext.h -header-y += processor-flags.h -header-y += hw_breakpoint.h -header-y += hyperv.h - -unifdef-y += e820.h -unifdef-y += ist.h -unifdef-y += mce.h -unifdef-y += msr.h -unifdef-y += mtrr.h -unifdef-y += posix_types_32.h -unifdef-y += posix_types_64.h -unifdef-y += unistd_32.h -unifdef-y += unistd_64.h -unifdef-y += vm86.h -unifdef-y += vsyscall.h +header-y += unistd_32.h +header-y += unistd_64.h +header-y += vm86.h +header-y += vsyscall.h diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index aa2c39d968fc..92091de11113 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -134,7 +134,7 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate)  	    boot_cpu_data.x86_model <= 0x05 &&  	    boot_cpu_data.x86_mask < 0x0A)  		return 1; -	else if (boot_cpu_has(X86_FEATURE_AMDC1E)) +	else if (c1e_detected)  		return 1;  	else  		return max_cstate; diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 03b6bb5394a0..bc6abb7bc7ee 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -45,10 +45,9 @@  struct alt_instr {  	u8 *instr;		/* original instruction */  	u8 *replacement; -	u8  cpuid;		/* cpuid bit set for replacement */ +	u16 cpuid;		/* cpuid bit set for replacement */  	u8  instrlen;		/* length of original instruction */  	u8  replacementlen;	/* length of new instruction, <= instrlen */ -	u8  pad1;  #ifdef CONFIG_X86_64  	u32 pad2;  #endif @@ -86,9 +85,11 @@ static inline int alternatives_text_reserved(void *start, void *end)        _ASM_ALIGN "\n"							\        _ASM_PTR "661b\n"				/* label           */	\        _ASM_PTR "663f\n"				/* new instruction */	\ -      "	 .byte " __stringify(feature) "\n"	/* feature bit     */	\ +      "	 .word " __stringify(feature) "\n"	/* feature bit     */	\        "	 .byte 662b-661b\n"			/* sourcelen       */	\        "	 .byte 664f-663f\n"			/* replacementlen  */	\ +      ".previous\n"							\ +      ".section .discard,\"aw\",@progbits\n"				\        "	 .byte 0xff + (664f-663f) - (662b-661b)\n" /* rlen <= slen */	\        ".previous\n"							\        ".section .altinstr_replacement, \"ax\"\n"			\ diff --git a/arch/x86/include/asm/apb_timer.h b/arch/x86/include/asm/apb_timer.h index c74a2eebe570..a69b1ac9eaf8 100644 --- a/arch/x86/include/asm/apb_timer.h +++ b/arch/x86/include/asm/apb_timer.h @@ -55,7 +55,6 @@ extern unsigned long apbt_quick_calibrate(void);  extern int arch_setup_apbt_irqs(int irq, int trigger, int mask, int cpu);  extern void apbt_setup_secondary_clock(void);  extern unsigned int boot_cpu_id; -extern int disable_apbt_percpu;  extern struct sfi_timer_table_entry *sfi_get_mtmr(int hint);  extern void sfi_free_mtmr(struct sfi_timer_table_entry *mtmr); diff --git a/arch/x86/include/asm/bootparam.h b/arch/x86/include/asm/bootparam.h index 6be33d83c716..8e6218550e77 100644 --- a/arch/x86/include/asm/bootparam.h +++ b/arch/x86/include/asm/bootparam.h @@ -70,6 +70,14 @@ struct sys_desc_table {  	__u8  table[14];  }; +/* Gleaned from OFW's set-parameters in cpu/x86/pc/linux.fth */ +struct olpc_ofw_header { +	__u32 ofw_magic;	/* OFW signature */ +	__u32 ofw_version; +	__u32 cif_handler;	/* callback into OFW */ +	__u32 irq_desc_table; +} __attribute__((packed)); +  struct efi_info {  	__u32 efi_loader_signature;  	__u32 efi_systab; @@ -92,7 +100,8 @@ struct boot_params {  	__u8  hd0_info[16];	/* obsolete! */		/* 0x080 */  	__u8  hd1_info[16];	/* obsolete! */		/* 0x090 */  	struct sys_desc_table sys_desc_table;		/* 0x0a0 */ -	__u8  _pad4[144];				/* 0x0b0 */ +	struct olpc_ofw_header olpc_ofw_header;		/* 0x0b0 */ +	__u8  _pad4[128];				/* 0x0c0 */  	struct edid_info edid_info;			/* 0x140 */  	struct efi_info efi_info;			/* 0x1c0 */  	__u32 alt_mem_k;				/* 0x1e0 */ diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index 8859e12dd3cf..284a6e8f7ce1 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -11,38 +11,42 @@  extern void __xchg_wrong_size(void);  /* - * Note: no "lock" prefix even on SMP: xchg always implies lock anyway - * Note 2: xchg has side effect, so that attribute volatile is necessary, - *	  but generally the primitive is invalid, *ptr is output argument. --ANK + * Note: no "lock" prefix even on SMP: xchg always implies lock anyway. + * Since this is generally used to protect other memory information, we + * use "asm volatile" and "memory" clobbers to prevent gcc from moving + * information around.   */ - -struct __xchg_dummy { -	unsigned long a[100]; -}; -#define __xg(x) ((struct __xchg_dummy *)(x)) -  #define __xchg(x, ptr, size)						\  ({									\  	__typeof(*(ptr)) __x = (x);					\  	switch (size) {							\  	case 1:								\ -		asm volatile("xchgb %b0,%1"				\ -			     : "=q" (__x)				\ -			     : "m" (*__xg(ptr)), "0" (__x)		\ +	{								\ +		volatile u8 *__ptr = (volatile u8 *)(ptr);		\ +		asm volatile("xchgb %0,%1"				\ +			     : "=q" (__x), "+m" (*__ptr)		\ +			     : "0" (__x)				\  			     : "memory");				\  		break;							\ +	}								\  	case 2:								\ -		asm volatile("xchgw %w0,%1"				\ -			     : "=r" (__x)				\ -			     : "m" (*__xg(ptr)), "0" (__x)		\ +	{								\ +		volatile u16 *__ptr = (volatile u16 *)(ptr);		\ +		asm volatile("xchgw %0,%1"				\ +			     : "=r" (__x), "+m" (*__ptr)		\ +			     : "0" (__x)				\  			     : "memory");				\  		break;							\ +	}								\  	case 4:								\ +	{								\ +		volatile u32 *__ptr = (volatile u32 *)(ptr);		\  		asm volatile("xchgl %0,%1"				\ -			     : "=r" (__x)				\ -			     : "m" (*__xg(ptr)), "0" (__x)		\ +			     : "=r" (__x), "+m" (*__ptr)		\ +			     : "0" (__x)				\  			     : "memory");				\  		break;							\ +	}								\  	default:							\  		__xchg_wrong_size();					\  	}								\ @@ -53,60 +57,33 @@ struct __xchg_dummy {  	__xchg((v), (ptr), sizeof(*ptr))  /* - * The semantics of XCHGCMP8B are a bit strange, this is why - * there is a loop and the loading of %%eax and %%edx has to - * be inside. This inlines well in most cases, the cached - * cost is around ~38 cycles. (in the future we might want - * to do an SIMD/3DNOW!/MMX/FPU 64-bit store here, but that - * might have an implicit FPU-save as a cost, so it's not - * clear which path to go.) + * CMPXCHG8B only writes to the target if we had the previous + * value in registers, otherwise it acts as a read and gives us the + * "new previous" value.  That is why there is a loop.  Preloading + * EDX:EAX is a performance optimization: in the common case it means + * we need only one locked operation.   * - * cmpxchg8b must be used with the lock prefix here to allow - * the instruction to be executed atomically, see page 3-102 - * of the instruction set reference 24319102.pdf. We need - * the reader side to see the coherent 64bit value. + * A SIMD/3DNOW!/MMX/FPU 64-bit store here would require at the very + * least an FPU save and/or %cr0.ts manipulation. + * + * cmpxchg8b must be used with the lock prefix here to allow the + * instruction to be executed atomically.  We need to have the reader + * side to see the coherent 64bit value.   */ -static inline void __set_64bit(unsigned long long *ptr, -			       unsigned int low, unsigned int high) +static inline void set_64bit(volatile u64 *ptr, u64 value)  { +	u32 low  = value; +	u32 high = value >> 32; +	u64 prev = *ptr; +  	asm volatile("\n1:\t" -		     "movl (%0), %%eax\n\t" -		     "movl 4(%0), %%edx\n\t" -		     LOCK_PREFIX "cmpxchg8b (%0)\n\t" +		     LOCK_PREFIX "cmpxchg8b %0\n\t"  		     "jnz 1b" -		     : /* no outputs */ -		     : "D"(ptr), -		       "b"(low), -		       "c"(high) -		     : "ax", "dx", "memory"); -} - -static inline void __set_64bit_constant(unsigned long long *ptr, -					unsigned long long value) -{ -	__set_64bit(ptr, (unsigned int)value, (unsigned int)(value >> 32)); -} - -#define ll_low(x)	*(((unsigned int *)&(x)) + 0) -#define ll_high(x)	*(((unsigned int *)&(x)) + 1) - -static inline void __set_64bit_var(unsigned long long *ptr, -				   unsigned long long value) -{ -	__set_64bit(ptr, ll_low(value), ll_high(value)); +		     : "=m" (*ptr), "+A" (prev) +		     : "b" (low), "c" (high) +		     : "memory");  } -#define set_64bit(ptr, value)			\ -	(__builtin_constant_p((value))		\ -	 ? __set_64bit_constant((ptr), (value))	\ -	 : __set_64bit_var((ptr), (value))) - -#define _set_64bit(ptr, value)						\ -	(__builtin_constant_p(value)					\ -	 ? __set_64bit(ptr, (unsigned int)(value),			\ -		       (unsigned int)((value) >> 32))			\ -	 : __set_64bit(ptr, ll_low((value)), ll_high((value)))) -  extern void __cmpxchg_wrong_size(void);  /* @@ -121,23 +98,32 @@ extern void __cmpxchg_wrong_size(void);  	__typeof__(*(ptr)) __new = (new);				\  	switch (size) {							\  	case 1:								\ -		asm volatile(lock "cmpxchgb %b1,%2"			\ -			     : "=a"(__ret)				\ -			     : "q"(__new), "m"(*__xg(ptr)), "0"(__old)	\ +	{								\ +		volatile u8 *__ptr = (volatile u8 *)(ptr);		\ +		asm volatile(lock "cmpxchgb %2,%1"			\ +			     : "=a" (__ret), "+m" (*__ptr)		\ +			     : "q" (__new), "0" (__old)			\  			     : "memory");				\  		break;							\ +	}								\  	case 2:								\ -		asm volatile(lock "cmpxchgw %w1,%2"			\ -			     : "=a"(__ret)				\ -			     : "r"(__new), "m"(*__xg(ptr)), "0"(__old)	\ +	{								\ +		volatile u16 *__ptr = (volatile u16 *)(ptr);		\ +		asm volatile(lock "cmpxchgw %2,%1"			\ +			     : "=a" (__ret), "+m" (*__ptr)		\ +			     : "r" (__new), "0" (__old)			\  			     : "memory");				\  		break;							\ +	}								\  	case 4:								\ -		asm volatile(lock "cmpxchgl %1,%2"			\ -			     : "=a"(__ret)				\ -			     : "r"(__new), "m"(*__xg(ptr)), "0"(__old)	\ +	{								\ +		volatile u32 *__ptr = (volatile u32 *)(ptr);		\ +		asm volatile(lock "cmpxchgl %2,%1"			\ +			     : "=a" (__ret), "+m" (*__ptr)		\ +			     : "r" (__new), "0" (__old)			\  			     : "memory");				\  		break;							\ +	}								\  	default:							\  		__cmpxchg_wrong_size();					\  	}								\ @@ -175,32 +161,28 @@ extern void __cmpxchg_wrong_size(void);  					       (unsigned long long)(n)))  #endif -static inline unsigned long long __cmpxchg64(volatile void *ptr, -					     unsigned long long old, -					     unsigned long long new) +static inline u64 __cmpxchg64(volatile u64 *ptr, u64 old, u64 new)  { -	unsigned long long prev; -	asm volatile(LOCK_PREFIX "cmpxchg8b %3" -		     : "=A"(prev) -		     : "b"((unsigned long)new), -		       "c"((unsigned long)(new >> 32)), -		       "m"(*__xg(ptr)), -		       "0"(old) +	u64 prev; +	asm volatile(LOCK_PREFIX "cmpxchg8b %1" +		     : "=A" (prev), +		       "+m" (*ptr) +		     : "b" ((u32)new), +		       "c" ((u32)(new >> 32)), +		       "0" (old)  		     : "memory");  	return prev;  } -static inline unsigned long long __cmpxchg64_local(volatile void *ptr, -						   unsigned long long old, -						   unsigned long long new) +static inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new)  { -	unsigned long long prev; -	asm volatile("cmpxchg8b %3" -		     : "=A"(prev) -		     : "b"((unsigned long)new), -		       "c"((unsigned long)(new >> 32)), -		       "m"(*__xg(ptr)), -		       "0"(old) +	u64 prev; +	asm volatile("cmpxchg8b %1" +		     : "=A" (prev), +		       "+m" (*ptr) +		     : "b" ((u32)new), +		       "c" ((u32)(new >> 32)), +		       "0" (old)  		     : "memory");  	return prev;  } @@ -264,8 +246,6 @@ static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old,   * to simulate the cmpxchg8b on the 80386 and 80486 CPU.   */ -extern unsigned long long cmpxchg_486_u64(volatile void *, u64, u64); -  #define cmpxchg64(ptr, o, n)					\  ({								\  	__typeof__(*(ptr)) __ret;				\ @@ -283,20 +263,20 @@ extern unsigned long long cmpxchg_486_u64(volatile void *, u64, u64);  	__ret; }) - -#define cmpxchg64_local(ptr, o, n)					\ -({									\ -	__typeof__(*(ptr)) __ret;					\ -	if (likely(boot_cpu_data.x86 > 4))				\ -		__ret = (__typeof__(*(ptr)))__cmpxchg64_local((ptr),	\ -				(unsigned long long)(o),		\ -				(unsigned long long)(n));		\ -	else								\ -		__ret = (__typeof__(*(ptr)))cmpxchg_486_u64((ptr),	\ -				(unsigned long long)(o),		\ -				(unsigned long long)(n));		\ -	__ret;								\ -}) +#define cmpxchg64_local(ptr, o, n)				\ +({								\ +	__typeof__(*(ptr)) __ret;				\ +	__typeof__(*(ptr)) __old = (o);				\ +	__typeof__(*(ptr)) __new = (n);				\ +	alternative_io("call cmpxchg8b_emu",			\ +		       "cmpxchg8b (%%esi)" ,			\ +		       X86_FEATURE_CX8,				\ +		       "=A" (__ret),				\ +		       "S" ((ptr)), "0" (__old),		\ +		       "b" ((unsigned int)__new),		\ +		       "c" ((unsigned int)(__new>>32))		\ +		       : "memory");				\ +	__ret; })  #endif diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h index 485ae415faec..423ae58aa020 100644 --- a/arch/x86/include/asm/cmpxchg_64.h +++ b/arch/x86/include/asm/cmpxchg_64.h @@ -3,51 +3,60 @@  #include <asm/alternative.h> /* Provides LOCK_PREFIX */ -#define __xg(x) ((volatile long *)(x)) - -static inline void set_64bit(volatile unsigned long *ptr, unsigned long val) +static inline void set_64bit(volatile u64 *ptr, u64 val)  {  	*ptr = val;  } -#define _set_64bit set_64bit -  extern void __xchg_wrong_size(void);  extern void __cmpxchg_wrong_size(void);  /* - * Note: no "lock" prefix even on SMP: xchg always implies lock anyway - * Note 2: xchg has side effect, so that attribute volatile is necessary, - *	  but generally the primitive is invalid, *ptr is output argument. --ANK + * Note: no "lock" prefix even on SMP: xchg always implies lock anyway. + * Since this is generally used to protect other memory information, we + * use "asm volatile" and "memory" clobbers to prevent gcc from moving + * information around.   */  #define __xchg(x, ptr, size)						\  ({									\  	__typeof(*(ptr)) __x = (x);					\  	switch (size) {							\  	case 1:								\ -		asm volatile("xchgb %b0,%1"				\ -			     : "=q" (__x)				\ -			     : "m" (*__xg(ptr)), "0" (__x)		\ +	{								\ +		volatile u8 *__ptr = (volatile u8 *)(ptr);		\ +		asm volatile("xchgb %0,%1"				\ +			     : "=q" (__x), "+m" (*__ptr)		\ +			     : "0" (__x)				\  			     : "memory");				\  		break;							\ +	}								\  	case 2:								\ -		asm volatile("xchgw %w0,%1"				\ -			     : "=r" (__x)				\ -			     : "m" (*__xg(ptr)), "0" (__x)		\ +	{								\ +		volatile u16 *__ptr = (volatile u16 *)(ptr);		\ +		asm volatile("xchgw %0,%1"				\ +			     : "=r" (__x), "+m" (*__ptr)		\ +			     : "0" (__x)				\  			     : "memory");				\  		break;							\ +	}								\  	case 4:								\ -		asm volatile("xchgl %k0,%1"				\ -			     : "=r" (__x)				\ -			     : "m" (*__xg(ptr)), "0" (__x)		\ +	{								\ +		volatile u32 *__ptr = (volatile u32 *)(ptr);		\ +		asm volatile("xchgl %0,%1"				\ +			     : "=r" (__x), "+m" (*__ptr)		\ +			     : "0" (__x)				\  			     : "memory");				\  		break;							\ +	}								\  	case 8:								\ +	{								\ +		volatile u64 *__ptr = (volatile u64 *)(ptr);		\  		asm volatile("xchgq %0,%1"				\ -			     : "=r" (__x)				\ -			     : "m" (*__xg(ptr)), "0" (__x)		\ +			     : "=r" (__x), "+m" (*__ptr)		\ +			     : "0" (__x)				\  			     : "memory");				\  		break;							\ +	}								\  	default:							\  		__xchg_wrong_size();					\  	}								\ @@ -71,29 +80,41 @@ extern void __cmpxchg_wrong_size(void);  	__typeof__(*(ptr)) __new = (new);				\  	switch (size) {							\  	case 1:								\ -		asm volatile(lock "cmpxchgb %b1,%2"			\ -			     : "=a"(__ret)				\ -			     : "q"(__new), "m"(*__xg(ptr)), "0"(__old)	\ +	{								\ +		volatile u8 *__ptr = (volatile u8 *)(ptr);		\ +		asm volatile(lock "cmpxchgb %2,%1"			\ +			     : "=a" (__ret), "+m" (*__ptr)		\ +			     : "q" (__new), "0" (__old)			\  			     : "memory");				\  		break;							\ +	}								\  	case 2:								\ -		asm volatile(lock "cmpxchgw %w1,%2"			\ -			     : "=a"(__ret)				\ -			     : "r"(__new), "m"(*__xg(ptr)), "0"(__old)	\ +	{								\ +		volatile u16 *__ptr = (volatile u16 *)(ptr);		\ +		asm volatile(lock "cmpxchgw %2,%1"			\ +			     : "=a" (__ret), "+m" (*__ptr)		\ +			     : "r" (__new), "0" (__old)			\  			     : "memory");				\  		break;							\ +	}								\  	case 4:								\ -		asm volatile(lock "cmpxchgl %k1,%2"			\ -			     : "=a"(__ret)				\ -			     : "r"(__new), "m"(*__xg(ptr)), "0"(__old)	\ +	{								\ +		volatile u32 *__ptr = (volatile u32 *)(ptr);		\ +		asm volatile(lock "cmpxchgl %2,%1"			\ +			     : "=a" (__ret), "+m" (*__ptr)		\ +			     : "r" (__new), "0" (__old)			\  			     : "memory");				\  		break;							\ +	}								\  	case 8:								\ -		asm volatile(lock "cmpxchgq %1,%2"			\ -			     : "=a"(__ret)				\ -			     : "r"(__new), "m"(*__xg(ptr)), "0"(__old)	\ +	{								\ +		volatile u64 *__ptr = (volatile u64 *)(ptr);		\ +		asm volatile(lock "cmpxchgq %2,%1"			\ +			     : "=a" (__ret), "+m" (*__ptr)		\ +			     : "r" (__new), "0" (__old)			\  			     : "memory");				\  		break;							\ +	}								\  	default:							\  		__cmpxchg_wrong_size();					\  	}								\ diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 468145914389..781a50b29a49 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -6,7 +6,7 @@  #include <asm/required-features.h> -#define NCAPINTS	9	/* N 32-bit words worth of info */ +#define NCAPINTS	10	/* N 32-bit words worth of info */  /*   * Note: If the comment begins with a quoted string, that string is used @@ -89,7 +89,7 @@  #define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* "" Lfence synchronizes RDTSC */  #define X86_FEATURE_11AP	(3*32+19) /* "" Bad local APIC aka 11AP */  #define X86_FEATURE_NOPL	(3*32+20) /* The NOPL (0F 1F) instructions */ -#define X86_FEATURE_AMDC1E	(3*32+21) /* AMD C1E detected */ +					  /* 21 available, was AMD_C1E */  #define X86_FEATURE_XTOPOLOGY	(3*32+22) /* cpu topology enum extensions */  #define X86_FEATURE_TSC_RELIABLE (3*32+23) /* TSC is known to be reliable */  #define X86_FEATURE_NONSTOP_TSC	(3*32+24) /* TSC does not stop in C states */ @@ -124,6 +124,8 @@  #define X86_FEATURE_XSAVE	(4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */  #define X86_FEATURE_OSXSAVE	(4*32+27) /* "" XSAVE enabled in the OS */  #define X86_FEATURE_AVX		(4*32+28) /* Advanced Vector Extensions */ +#define X86_FEATURE_F16C	(4*32+29) /* 16-bit fp conversions */ +#define X86_FEATURE_RDRND	(4*32+30) /* The RDRAND instruction */  #define X86_FEATURE_HYPERVISOR	(4*32+31) /* Running on a hypervisor */  /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ @@ -157,22 +159,29 @@  /*   * Auxiliary flags: Linux defined - For features scattered in various - * CPUID levels like 0x6, 0xA etc + * CPUID levels like 0x6, 0xA etc, word 7   */  #define X86_FEATURE_IDA		(7*32+ 0) /* Intel Dynamic Acceleration */  #define X86_FEATURE_ARAT	(7*32+ 1) /* Always Running APIC Timer */  #define X86_FEATURE_CPB		(7*32+ 2) /* AMD Core Performance Boost */ +#define X86_FEATURE_EPB		(7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ +#define X86_FEATURE_XSAVEOPT	(7*32+ 4) /* Optimized Xsave */ +#define X86_FEATURE_PLN		(7*32+ 5) /* Intel Power Limit Notification */ +#define X86_FEATURE_PTS		(7*32+ 6) /* Intel Package Thermal Status */ -/* Virtualization flags: Linux defined */ +/* Virtualization flags: Linux defined, word 8 */  #define X86_FEATURE_TPR_SHADOW  (8*32+ 0) /* Intel TPR Shadow */  #define X86_FEATURE_VNMI        (8*32+ 1) /* Intel Virtual NMI */  #define X86_FEATURE_FLEXPRIORITY (8*32+ 2) /* Intel FlexPriority */  #define X86_FEATURE_EPT         (8*32+ 3) /* Intel Extended Page Table */  #define X86_FEATURE_VPID        (8*32+ 4) /* Intel Virtual Processor ID */ -#define X86_FEATURE_NPT		(8*32+5)  /* AMD Nested Page Table support */ -#define X86_FEATURE_LBRV	(8*32+6)  /* AMD LBR Virtualization support */ -#define X86_FEATURE_SVML	(8*32+7)  /* "svm_lock" AMD SVM locking MSR */ -#define X86_FEATURE_NRIPS	(8*32+8)  /* "nrip_save" AMD SVM next_rip save */ +#define X86_FEATURE_NPT		(8*32+ 5) /* AMD Nested Page Table support */ +#define X86_FEATURE_LBRV	(8*32+ 6) /* AMD LBR Virtualization support */ +#define X86_FEATURE_SVML	(8*32+ 7) /* "svm_lock" AMD SVM locking MSR */ +#define X86_FEATURE_NRIPS	(8*32+ 8) /* "nrip_save" AMD SVM next_rip save */ + +/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ +#define X86_FEATURE_FSGSBASE	(9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/  #if defined(__KERNEL__) && !defined(__ASSEMBLY__) @@ -194,7 +203,9 @@ extern const char * const x86_power_flags[32];  	   (((bit)>>5)==4 && (1UL<<((bit)&31) & REQUIRED_MASK4)) ||	\  	   (((bit)>>5)==5 && (1UL<<((bit)&31) & REQUIRED_MASK5)) ||	\  	   (((bit)>>5)==6 && (1UL<<((bit)&31) & REQUIRED_MASK6)) ||	\ -	   (((bit)>>5)==7 && (1UL<<((bit)&31) & REQUIRED_MASK7)) )	\ +	   (((bit)>>5)==7 && (1UL<<((bit)&31) & REQUIRED_MASK7)) ||	\ +	   (((bit)>>5)==8 && (1UL<<((bit)&31) & REQUIRED_MASK8)) ||	\ +	   (((bit)>>5)==9 && (1UL<<((bit)&31) & REQUIRED_MASK9)) )	\  	  ? 1 :								\  	 test_cpu_cap(c, bit)) @@ -291,7 +302,7 @@ extern const char * const x86_power_flags[32];   * patch the target code for additional performance.   *   */ -static __always_inline __pure bool __static_cpu_has(u8 bit) +static __always_inline __pure bool __static_cpu_has(u16 bit)  {  #if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)  		asm goto("1: jmp %l[t_no]\n" @@ -300,11 +311,11 @@ static __always_inline __pure bool __static_cpu_has(u8 bit)  			 _ASM_ALIGN "\n"  			 _ASM_PTR "1b\n"  			 _ASM_PTR "0\n" 	/* no replacement */ -			 " .byte %P0\n"		/* feature bit */ +			 " .word %P0\n"		/* feature bit */  			 " .byte 2b - 1b\n"	/* source len */  			 " .byte 0\n"		/* replacement len */ -			 " .byte 0xff + 0 - (2b-1b)\n"	/* padding */  			 ".previous\n" +			 /* skipping size check since replacement size = 0 */  			 : : "i" (bit) : : t_no);  		return true;  	t_no: @@ -318,10 +329,12 @@ static __always_inline __pure bool __static_cpu_has(u8 bit)  			     _ASM_ALIGN "\n"  			     _ASM_PTR "1b\n"  			     _ASM_PTR "3f\n" -			     " .byte %P1\n"		/* feature bit */ +			     " .word %P1\n"		/* feature bit */  			     " .byte 2b - 1b\n"		/* source len */  			     " .byte 4f - 3f\n"		/* replacement len */ -			     " .byte 0xff + (4f-3f) - (2b-1b)\n" /* padding */ +			     ".previous\n" +			     ".section .discard,\"aw\",@progbits\n" +			     " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */  			     ".previous\n"  			     ".section .altinstr_replacement,\"ax\"\n"  			     "3: movb $1,%0\n" @@ -337,7 +350,7 @@ static __always_inline __pure bool __static_cpu_has(u8 bit)  (								\  	__builtin_constant_p(boot_cpu_has(bit)) ?		\  		boot_cpu_has(bit) :				\ -	(__builtin_constant_p(bit) && !((bit) & ~0xff)) ?	\ +	__builtin_constant_p(bit) ?				\  		__static_cpu_has(bit) :				\  		boot_cpu_has(bit)				\  ) diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index ac91eed21061..d4c419f883a0 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -54,7 +54,6 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)  #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)  #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) -#define dma_is_consistent(d, h)	(1)  extern int dma_supported(struct device *hwdev, u64 mask);  extern int dma_set_mask(struct device *dev, u64 mask); @@ -87,13 +86,6 @@ dma_cache_sync(struct device *dev, void *vaddr, size_t size,  	flush_write_buffers();  } -static inline int dma_get_cache_alignment(void) -{ -	/* no easy way to get cache size on all x86, so return the -	 * maximum possible, to be safe */ -	return boot_cpu_data.x86_clflush_size; -} -  static inline unsigned long dma_alloc_coherent_mask(struct device *dev,  						    gfp_t gfp)  { diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h index a726650fc80f..8caac76ac324 100644 --- a/arch/x86/include/asm/highmem.h +++ b/arch/x86/include/asm/highmem.h @@ -61,7 +61,7 @@ void *kmap(struct page *page);  void kunmap(struct page *page);  void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot);  void *kmap_atomic(struct page *page, enum km_type type); -void kunmap_atomic(void *kvaddr, enum km_type type); +void kunmap_atomic_notypecheck(void *kvaddr, enum km_type type);  void *kmap_atomic_pfn(unsigned long pfn, enum km_type type);  void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot);  struct page *kmap_atomic_to_page(void *ptr); diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h index 942255310e6a..528a11e8d3e3 100644 --- a/arch/x86/include/asm/hw_breakpoint.h +++ b/arch/x86/include/asm/hw_breakpoint.h @@ -20,10 +20,10 @@ struct arch_hw_breakpoint {  #include <linux/list.h>  /* Available HW breakpoint length encodings */ +#define X86_BREAKPOINT_LEN_X		0x00  #define X86_BREAKPOINT_LEN_1		0x40  #define X86_BREAKPOINT_LEN_2		0x44  #define X86_BREAKPOINT_LEN_4		0x4c -#define X86_BREAKPOINT_LEN_EXECUTE	0x40  #ifdef CONFIG_X86_64  #define X86_BREAKPOINT_LEN_8		0x48 diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h index 70abda7058c8..ff2546ce7178 100644 --- a/arch/x86/include/asm/hypervisor.h +++ b/arch/x86/include/asm/hypervisor.h @@ -45,5 +45,6 @@ extern const struct hypervisor_x86 *x86_hyper;  /* Recognized hypervisors */  extern const struct hypervisor_x86 x86_hyper_vmware;  extern const struct hypervisor_x86 x86_hyper_ms_hyperv; +extern const struct hypervisor_x86 x86_hyper_xen_hvm;  #endif diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index c991b3a7b904..a73a8d5a5e69 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -31,7 +31,6 @@ extern void mxcsr_feature_mask_init(void);  extern int init_fpu(struct task_struct *child);  extern asmlinkage void math_state_restore(void);  extern void __math_state_restore(void); -extern void init_thread_xstate(void);  extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);  extern user_regset_active_fn fpregs_active, xfpregs_active; @@ -58,11 +57,25 @@ extern int restore_i387_xstate_ia32(void __user *buf);  #define X87_FSW_ES (1 << 7)	/* Exception Summary */ +static __always_inline __pure bool use_xsaveopt(void) +{ +	return static_cpu_has(X86_FEATURE_XSAVEOPT); +} +  static __always_inline __pure bool use_xsave(void)  {  	return static_cpu_has(X86_FEATURE_XSAVE);  } +extern void __sanitize_i387_state(struct task_struct *); + +static inline void sanitize_i387_state(struct task_struct *tsk) +{ +	if (!use_xsaveopt()) +		return; +	__sanitize_i387_state(tsk); +} +  #ifdef CONFIG_X86_64  /* Ignore delayed exceptions from user space */ @@ -127,6 +140,15 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx)  {  	int err; +	/* +	 * Clear the bytes not touched by the fxsave and reserved +	 * for the SW usage. +	 */ +	err = __clear_user(&fx->sw_reserved, +			   sizeof(struct _fpx_sw_bytes)); +	if (unlikely(err)) +		return -EFAULT; +  	asm volatile("1:  rex64/fxsave (%[fx])\n\t"  		     "2:\n"  		     ".section .fixup,\"ax\"\n" @@ -482,6 +504,8 @@ static inline void fpu_copy(struct fpu *dst, struct fpu *src)  	memcpy(dst->state, src->state, xstate_size);  } +extern void fpu_finit(struct fpu *fpu); +  #endif /* __ASSEMBLY__ */  #define PSHUFB_XMM5_XMM0 .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 diff --git a/arch/x86/include/asm/intel_scu_ipc.h b/arch/x86/include/asm/intel_scu_ipc.h index 4470c9ad4a3e..29f66793cc55 100644 --- a/arch/x86/include/asm/intel_scu_ipc.h +++ b/arch/x86/include/asm/intel_scu_ipc.h @@ -1,6 +1,12 @@  #ifndef _ASM_X86_INTEL_SCU_IPC_H_  #define  _ASM_X86_INTEL_SCU_IPC_H_ +#define IPCMSG_VRTC	0xFA	 /* Set vRTC device */ + +/* Command id associated with message IPCMSG_VRTC */ +#define IPC_CMD_VRTC_SETTIME      1 /* Set time */ +#define IPC_CMD_VRTC_SETALARM     2 /* Set alarm */ +  /* Read single register */  int intel_scu_ipc_ioread8(u16 addr, u8 *data); @@ -28,20 +34,6 @@ int intel_scu_ipc_writev(u16 *addr, u8 *data, int len);  /* Update single register based on the mask */  int intel_scu_ipc_update_register(u16 addr, u8 data, u8 mask); -/* - * Indirect register read - * Can be used when SCCB(System Controller Configuration Block) register - * HRIM(Honor Restricted IPC Messages) is set (bit 23) - */ -int intel_scu_ipc_register_read(u32 addr, u32 *data); - -/* - * Indirect register write - * Can be used when SCCB(System Controller Configuration Block) register - * HRIM(Honor Restricted IPC Messages) is set (bit 23) - */ -int intel_scu_ipc_register_write(u32 addr, u32 data); -  /* Issue commands to the SCU with or without data */  int intel_scu_ipc_simple_command(int cmd, int sub);  int intel_scu_ipc_command(int cmd, int sub, u32 *in, int inlen, diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 8767d99c4f64..e2ca30092557 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -125,6 +125,9 @@   */  #define MCE_SELF_VECTOR			0xeb +/* Xen vector callback to receive events in a HVM domain */ +#define XEN_HVM_EVTCHN_CALLBACK		0xe9 +  #define NR_VECTORS			 256  #define FPU_IRQ				  13 diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h index fa7c0b974761..5bdfca86581b 100644 --- a/arch/x86/include/asm/kdebug.h +++ b/arch/x86/include/asm/kdebug.h @@ -33,5 +33,11 @@ extern void __show_regs(struct pt_regs *regs, int all);  extern void show_regs(struct pt_regs *regs);  extern unsigned long oops_begin(void);  extern void oops_end(unsigned long, struct pt_regs *, int signr); +#ifdef CONFIG_KEXEC +extern int in_crash_kexec; +#else +/* no crash dump is ever in progress if no crash kernel can be kexec'd */ +#define in_crash_kexec 0 +#endif  #endif /* _ASM_X86_KDEBUG_H */ diff --git a/arch/x86/include/asm/kgdb.h b/arch/x86/include/asm/kgdb.h index 006da3687cdc..396f5b5fc4d7 100644 --- a/arch/x86/include/asm/kgdb.h +++ b/arch/x86/include/asm/kgdb.h @@ -39,9 +39,11 @@ enum regnames {  	GDB_FS,			/* 14 */  	GDB_GS,			/* 15 */  }; +#define GDB_ORIG_AX		41 +#define DBG_MAX_REG_NUM		16  #define NUMREGBYTES		((GDB_GS+1)*4)  #else /* ! CONFIG_X86_32 */ -enum regnames64 { +enum regnames {  	GDB_AX,			/* 0 */  	GDB_BX,			/* 1 */  	GDB_CX,			/* 2 */ @@ -59,15 +61,15 @@ enum regnames64 {  	GDB_R14,		/* 14 */  	GDB_R15,		/* 15 */  	GDB_PC,			/* 16 */ +	GDB_PS,			/* 17 */ +	GDB_CS,			/* 18 */ +	GDB_SS,			/* 19 */  }; - -enum regnames32 { -	GDB_PS = 34, -	GDB_CS, -	GDB_SS, -}; -#define NUMREGBYTES		((GDB_SS+1)*4) -#endif /* CONFIG_X86_32 */ +#define GDB_ORIG_AX		57 +#define DBG_MAX_REG_NUM		20 +/* 17 64 bit regs and 3 32 bit regs */ +#define NUMREGBYTES		((17 * 8) + (3 * 4)) +#endif /* ! CONFIG_X86_32 */  static inline void arch_kgdb_breakpoint(void)  { diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index ff90055c7f0b..4d8dcbdfc120 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -22,6 +22,8 @@  #define __KVM_HAVE_XEN_HVM  #define __KVM_HAVE_VCPU_EVENTS  #define __KVM_HAVE_DEBUGREGS +#define __KVM_HAVE_XSAVE +#define __KVM_HAVE_XCRS  /* Architectural interrupt line count. */  #define KVM_NR_INTERRUPTS 256 @@ -299,4 +301,24 @@ struct kvm_debugregs {  	__u64 reserved[9];  }; +/* for KVM_CAP_XSAVE */ +struct kvm_xsave { +	__u32 region[1024]; +}; + +#define KVM_MAX_XCRS	16 + +struct kvm_xcr { +	__u32 xcr; +	__u32 reserved; +	__u64 value; +}; + +struct kvm_xcrs { +	__u32 nr_xcrs; +	__u32 flags; +	struct kvm_xcr xcrs[KVM_MAX_XCRS]; +	__u64 padding[16]; +}; +  #endif /* _ASM_X86_KVM_H */ diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 0b2729bf2070..51cfd730ac5d 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -51,8 +51,10 @@ struct x86_emulate_ctxt;  #define X86EMUL_UNHANDLEABLE    1  /* Terminate emulation but return success to the caller. */  #define X86EMUL_PROPAGATE_FAULT 2 /* propagate a generated fault to guest */ -#define X86EMUL_RETRY_INSTR     2 /* retry the instruction for some reason */ -#define X86EMUL_CMPXCHG_FAILED  2 /* cmpxchg did not see expected value */ +#define X86EMUL_RETRY_INSTR     3 /* retry the instruction for some reason */ +#define X86EMUL_CMPXCHG_FAILED  4 /* cmpxchg did not see expected value */ +#define X86EMUL_IO_NEEDED       5 /* IO is needed to complete emulation */ +  struct x86_emulate_ops {  	/*  	 * read_std: Read bytes of standard (non-emulated/special) memory. @@ -92,6 +94,7 @@ struct x86_emulate_ops {  	int (*read_emulated)(unsigned long addr,  			     void *val,  			     unsigned int bytes, +			     unsigned int *error,  			     struct kvm_vcpu *vcpu);  	/* @@ -104,6 +107,7 @@ struct x86_emulate_ops {  	int (*write_emulated)(unsigned long addr,  			      const void *val,  			      unsigned int bytes, +			      unsigned int *error,  			      struct kvm_vcpu *vcpu);  	/* @@ -118,6 +122,7 @@ struct x86_emulate_ops {  				const void *old,  				const void *new,  				unsigned int bytes, +				unsigned int *error,  				struct kvm_vcpu *vcpu);  	int (*pio_in_emulated)(int size, unsigned short port, void *val, @@ -132,18 +137,26 @@ struct x86_emulate_ops {  				      int seg, struct kvm_vcpu *vcpu);  	u16 (*get_segment_selector)(int seg, struct kvm_vcpu *vcpu);  	void (*set_segment_selector)(u16 sel, int seg, struct kvm_vcpu *vcpu); +	unsigned long (*get_cached_segment_base)(int seg, struct kvm_vcpu *vcpu);  	void (*get_gdt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu);  	ulong (*get_cr)(int cr, struct kvm_vcpu *vcpu); -	void (*set_cr)(int cr, ulong val, struct kvm_vcpu *vcpu); +	int (*set_cr)(int cr, ulong val, struct kvm_vcpu *vcpu);  	int (*cpl)(struct kvm_vcpu *vcpu); -	void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); +	int (*get_dr)(int dr, unsigned long *dest, struct kvm_vcpu *vcpu); +	int (*set_dr)(int dr, unsigned long value, struct kvm_vcpu *vcpu); +	int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); +	int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);  };  /* Type, address-of, and value of an instruction's operand. */  struct operand {  	enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type;  	unsigned int bytes; -	unsigned long val, orig_val, *ptr; +	unsigned long orig_val, *ptr; +	union { +		unsigned long val; +		char valptr[sizeof(unsigned long) + 2]; +	};  };  struct fetch_cache { @@ -186,6 +199,7 @@ struct decode_cache {  	unsigned long modrm_val;  	struct fetch_cache fetch;  	struct read_cache io_read; +	struct read_cache mem_read;  };  struct x86_emulate_ctxt { @@ -202,6 +216,12 @@ struct x86_emulate_ctxt {  	int interruptibility;  	bool restart; /* restart string instruction after writeback */ + +	int exception; /* exception that happens during emulation or -1 */ +	u32 error_code; /* error code for exception */ +	bool error_code_valid; +	unsigned long cr2; /* faulted address in case of #PF */ +  	/* decode cache */  	struct decode_cache decode;  }; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 76f5483cffec..502e53f999cf 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -15,6 +15,7 @@  #include <linux/mm.h>  #include <linux/mmu_notifier.h>  #include <linux/tracepoint.h> +#include <linux/cpumask.h>  #include <linux/kvm.h>  #include <linux/kvm_para.h> @@ -39,11 +40,14 @@  				  0xFFFFFF0000000000ULL)  #define INVALID_PAGE (~(hpa_t)0) +#define VALID_PAGE(x) ((x) != INVALID_PAGE) +  #define UNMAPPED_GVA (~(gpa_t)0)  /* KVM Hugepage definitions for x86 */  #define KVM_NR_PAGE_SIZES	3 -#define KVM_HPAGE_SHIFT(x)	(PAGE_SHIFT + (((x) - 1) * 9)) +#define KVM_HPAGE_GFN_SHIFT(x)	(((x) - 1) * 9) +#define KVM_HPAGE_SHIFT(x)	(PAGE_SHIFT + KVM_HPAGE_GFN_SHIFT(x))  #define KVM_HPAGE_SIZE(x)	(1UL << KVM_HPAGE_SHIFT(x))  #define KVM_HPAGE_MASK(x)	(~(KVM_HPAGE_SIZE(x) - 1))  #define KVM_PAGES_PER_HPAGE(x)	(KVM_HPAGE_SIZE(x) / PAGE_SIZE) @@ -69,8 +73,6 @@  #define IOPL_SHIFT 12 -#define KVM_ALIAS_SLOTS 4 -  #define KVM_PERMILLE_MMU_PAGES 20  #define KVM_MIN_ALLOC_MMU_PAGES 64  #define KVM_MMU_HASH_SHIFT 10 @@ -241,7 +243,7 @@ struct kvm_mmu {  	void (*prefetch_page)(struct kvm_vcpu *vcpu,  			      struct kvm_mmu_page *page);  	int (*sync_page)(struct kvm_vcpu *vcpu, -			 struct kvm_mmu_page *sp); +			 struct kvm_mmu_page *sp, bool clear_unsync);  	void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva);  	hpa_t root_hpa;  	int root_level; @@ -301,8 +303,8 @@ struct kvm_vcpu_arch {  		unsigned long mmu_seq;  	} update_pte; -	struct i387_fxsave_struct host_fx_image; -	struct i387_fxsave_struct guest_fx_image; +	struct fpu guest_fpu; +	u64 xcr0;  	gva_t mmio_fault_cr2;  	struct kvm_pio_request pio; @@ -360,26 +362,11 @@ struct kvm_vcpu_arch {  	/* fields used by HYPER-V emulation */  	u64 hv_vapic; -}; - -struct kvm_mem_alias { -	gfn_t base_gfn; -	unsigned long npages; -	gfn_t target_gfn; -#define KVM_ALIAS_INVALID     1UL -	unsigned long flags; -}; -#define KVM_ARCH_HAS_UNALIAS_INSTANTIATION - -struct kvm_mem_aliases { -	struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS]; -	int naliases; +	cpumask_var_t wbinvd_dirty_mask;  };  struct kvm_arch { -	struct kvm_mem_aliases *aliases; -  	unsigned int n_free_mmu_pages;  	unsigned int n_requested_mmu_pages;  	unsigned int n_alloc_mmu_pages; @@ -533,6 +520,8 @@ struct kvm_x86_ops {  	void (*set_supported_cpuid)(u32 func, struct kvm_cpuid_entry2 *entry); +	bool (*has_wbinvd_exit)(void); +  	const struct trace_print_flags *exit_reasons_str;  }; @@ -576,7 +565,6 @@ enum emulation_result {  #define EMULTYPE_SKIP		    (1 << 2)  int emulate_instruction(struct kvm_vcpu *vcpu,  			unsigned long cr2, u16 error_code, int emulation_type); -void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context);  void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address);  void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); @@ -591,10 +579,7 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu);  int kvm_emulate_halt(struct kvm_vcpu *vcpu);  int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address);  int emulate_clts(struct kvm_vcpu *vcpu); -int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, -		    unsigned long *dest); -int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, -		    unsigned long value); +int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);  void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);  int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); @@ -602,15 +587,16 @@ int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg);  int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason,  		    bool has_error_code, u32 error_code); -void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); -void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); -void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); +int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); +int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); +int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);  void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8);  int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val);  int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val);  unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu);  void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw);  void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l); +int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr);  int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);  int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data); @@ -630,12 +616,7 @@ int kvm_pic_set_irq(void *opaque, int irq, int level);  void kvm_inject_nmi(struct kvm_vcpu *vcpu); -void fx_init(struct kvm_vcpu *vcpu); - -int emulator_write_emulated(unsigned long addr, -			    const void *val, -			    unsigned int bytes, -			    struct kvm_vcpu *vcpu); +int fx_init(struct kvm_vcpu *vcpu);  void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu);  void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, @@ -664,8 +645,6 @@ void kvm_disable_tdp(void);  int complete_pio(struct kvm_vcpu *vcpu);  bool kvm_check_iopl(struct kvm_vcpu *vcpu); -struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn); -  static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)  {  	struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT); @@ -719,21 +698,6 @@ static inline unsigned long read_msr(unsigned long msr)  }  #endif -static inline void kvm_fx_save(struct i387_fxsave_struct *image) -{ -	asm("fxsave (%0)":: "r" (image)); -} - -static inline void kvm_fx_restore(struct i387_fxsave_struct *image) -{ -	asm("fxrstor (%0)":: "r" (image)); -} - -static inline void kvm_fx_finit(void) -{ -	asm("finit"); -} -  static inline u32 get_rdx_init_val(void)  {  	return 0x600; /* P6 family */ diff --git a/arch/x86/include/asm/local64.h b/arch/x86/include/asm/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/x86/include/asm/local64.h @@ -0,0 +1 @@ +#include <asm-generic/local64.h> diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index f32a4301c4d4..c62c13cb9788 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -38,6 +38,10 @@  #define MCM_ADDR_MEM	 3	/* memory address */  #define MCM_ADDR_GENERIC 7	/* generic */ +/* CTL2 register defines */ +#define MCI_CTL2_CMCI_EN		(1ULL << 30) +#define MCI_CTL2_CMCI_THRESHOLD_MASK	0x7fffULL +  #define MCJ_CTX_MASK		3  #define MCJ_CTX(flags)		((flags) & MCJ_CTX_MASK)  #define MCJ_CTX_RANDOM		0    /* inject context: random */ diff --git a/arch/x86/include/asm/mrst.h b/arch/x86/include/asm/mrst.h index 451d30e7f62d..16350740edf6 100644 --- a/arch/x86/include/asm/mrst.h +++ b/arch/x86/include/asm/mrst.h @@ -13,6 +13,32 @@  extern int pci_mrst_init(void);  int __init sfi_parse_mrtc(struct sfi_table_header *table); +/* + * Medfield is the follow-up of Moorestown, it combines two chip solution into + * one. Other than that it also added always-on and constant tsc and lapic + * timers. Medfield is the platform name, and the chip name is called Penwell + * we treat Medfield/Penwell as a variant of Moorestown. Penwell can be + * identified via MSRs. + */ +enum mrst_cpu_type { +	MRST_CPU_CHIP_LINCROFT = 1, +	MRST_CPU_CHIP_PENWELL, +}; + +extern enum mrst_cpu_type __mrst_cpu_chip; +static enum mrst_cpu_type mrst_identify_cpu(void) +{ +	return __mrst_cpu_chip; +} + +enum mrst_timer_options { +	MRST_TIMER_DEFAULT, +	MRST_TIMER_APBT_ONLY, +	MRST_TIMER_LAPIC_APBT, +}; + +extern enum mrst_timer_options mrst_timer_options; +  #define SFI_MTMR_MAX_NUM 8  #define SFI_MRTC_MAX	8 diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 8c7ae4318629..986f7790fdb2 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -20,6 +20,7 @@  #define _EFER_LMA		10 /* Long mode active (read-only) */  #define _EFER_NX		11 /* No execute enable */  #define _EFER_SVME		12 /* Enable virtualization */ +#define _EFER_LMSLE		13 /* Long Mode Segment Limit Enable */  #define _EFER_FFXSR		14 /* Enable Fast FXSAVE/FXRSTOR */  #define EFER_SCE		(1<<_EFER_SCE) @@ -27,6 +28,7 @@  #define EFER_LMA		(1<<_EFER_LMA)  #define EFER_NX			(1<<_EFER_NX)  #define EFER_SVME		(1<<_EFER_SVME) +#define EFER_LMSLE		(1<<_EFER_LMSLE)  #define EFER_FFXSR		(1<<_EFER_FFXSR)  /* Intel MSRs. Some also available on other CPUs */ @@ -94,9 +96,6 @@  #define MSR_IA32_MC0_CTL2		0x00000280  #define MSR_IA32_MCx_CTL2(x)		(MSR_IA32_MC0_CTL2 + (x)) -#define CMCI_EN			(1ULL << 30) -#define CMCI_THRESHOLD_MASK		0xffffULL -  #define MSR_P6_PERFCTR0			0x000000c1  #define MSR_P6_PERFCTR1			0x000000c2  #define MSR_P6_EVNTSEL0			0x00000186 @@ -159,8 +158,6 @@  #define MSR_K7_FID_VID_STATUS		0xc0010042  /* K6 MSRs */ -#define MSR_K6_EFER			0xc0000080 -#define MSR_K6_STAR			0xc0000081  #define MSR_K6_WHCR			0xc0000082  #define MSR_K6_UWCCR			0xc0000085  #define MSR_K6_EPMR			0xc0000086 @@ -224,12 +221,14 @@  #define MSR_IA32_THERM_CONTROL		0x0000019a  #define MSR_IA32_THERM_INTERRUPT	0x0000019b -#define THERM_INT_LOW_ENABLE		(1 << 0) -#define THERM_INT_HIGH_ENABLE		(1 << 1) +#define THERM_INT_HIGH_ENABLE		(1 << 0) +#define THERM_INT_LOW_ENABLE		(1 << 1) +#define THERM_INT_PLN_ENABLE		(1 << 24)  #define MSR_IA32_THERM_STATUS		0x0000019c  #define THERM_STATUS_PROCHOT		(1 << 0) +#define THERM_STATUS_POWER_LIMIT	(1 << 10)  #define MSR_THERM2_CTL			0x0000019d @@ -239,6 +238,19 @@  #define MSR_IA32_TEMPERATURE_TARGET	0x000001a2 +#define MSR_IA32_ENERGY_PERF_BIAS	0x000001b0 + +#define MSR_IA32_PACKAGE_THERM_STATUS		0x000001b1 + +#define PACKAGE_THERM_STATUS_PROCHOT		(1 << 0) +#define PACKAGE_THERM_STATUS_POWER_LIMIT	(1 << 10) + +#define MSR_IA32_PACKAGE_THERM_INTERRUPT	0x000001b2 + +#define PACKAGE_THERM_INT_HIGH_ENABLE		(1 << 0) +#define PACKAGE_THERM_INT_LOW_ENABLE		(1 << 1) +#define PACKAGE_THERM_INT_PLN_ENABLE		(1 << 24) +  /* MISC_ENABLE bits: architectural */  #define MSR_IA32_MISC_ENABLE_FAST_STRING	(1ULL << 0)  #define MSR_IA32_MISC_ENABLE_TCC		(1ULL << 1) diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index c5bc4c2d33f5..084ef95274cd 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -148,8 +148,8 @@ static inline unsigned long long native_read_pmc(int counter)  #define rdmsr(msr, val1, val2)					\  do {								\  	u64 __val = native_read_msr((msr));			\ -	(val1) = (u32)__val;					\ -	(val2) = (u32)(__val >> 32);				\ +	(void)((val1) = (u32)__val);				\ +	(void)((val2) = (u32)(__val >> 32));			\  } while (0)  static inline void wrmsr(unsigned msr, unsigned low, unsigned high) diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index 93da9c3f3341..932f0f86b4b7 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -17,7 +17,9 @@ int do_nmi_callback(struct pt_regs *regs, int cpu);  extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);  extern int check_nmi_watchdog(void); +#if !defined(CONFIG_LOCKUP_DETECTOR)  extern int nmi_watchdog_enabled; +#endif  extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);  extern int reserve_perfctr_nmi(unsigned int);  extern void release_perfctr_nmi(unsigned int); diff --git a/arch/x86/include/asm/olpc_ofw.h b/arch/x86/include/asm/olpc_ofw.h new file mode 100644 index 000000000000..08fde475cb3b --- /dev/null +++ b/arch/x86/include/asm/olpc_ofw.h @@ -0,0 +1,31 @@ +#ifndef _ASM_X86_OLPC_OFW_H +#define _ASM_X86_OLPC_OFW_H + +/* index into the page table containing the entry OFW occupies */ +#define OLPC_OFW_PDE_NR 1022 + +#define OLPC_OFW_SIG 0x2057464F	/* aka "OFW " */ + +#ifdef CONFIG_OLPC_OPENFIRMWARE + +/* run an OFW command by calling into the firmware */ +#define olpc_ofw(name, args, res) \ +	__olpc_ofw((name), ARRAY_SIZE(args), args, ARRAY_SIZE(res), res) + +extern int __olpc_ofw(const char *name, int nr_args, const void **args, int nr_res, +		void **res); + +/* determine whether OFW is available and lives in the proper memory */ +extern void olpc_ofw_detect(void); + +/* install OFW's pde permanently into the kernel's pgtable */ +extern void setup_olpc_ofw_pgd(void); + +#else /* !CONFIG_OLPC_OPENFIRMWARE */ + +static inline void olpc_ofw_detect(void) { } +static inline void setup_olpc_ofw_pgd(void) { } + +#endif /* !CONFIG_OLPC_OPENFIRMWARE */ + +#endif /* _ASM_X86_OLPC_OFW_H */ diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index 625c3f0e741a..8ca82839288a 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -37,6 +37,13 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr,  #define __pa_nodebug(x)	__phys_addr_nodebug((unsigned long)(x))  /* __pa_symbol should be used for C visible symbols.     This seems to be the official gcc blessed way to do such arithmetic. */ +/* + * We need __phys_reloc_hide() here because gcc may assume that there is no + * overflow during __pa() calculation and can optimize it unexpectedly. + * Newer versions of gcc provide -fno-strict-overflow switch to handle this + * case properly. Once all supported versions of gcc understand it, we can + * remove this Voodoo magic stuff. (i.e. once gcc3.x is deprecated) + */  #define __pa_symbol(x)	__pa(__phys_reloc_hide((unsigned long)(x)))  #define __va(x)			((void *)((unsigned long)(x)+PAGE_OFFSET)) diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index cd2a31dc5fb8..49c7219826f9 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -30,6 +30,7 @@  #define PCI_HAS_IO_ECS		0x40000  #define PCI_NOASSIGN_ROMS	0x80000  #define PCI_ROOT_NO_CRS		0x100000 +#define PCI_NOASSIGN_BARS	0x200000  extern unsigned int pci_probe;  extern unsigned long pirq_table_addr; diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 254883d0c7e0..6e742cc4251b 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -68,8 +68,9 @@ union cpuid10_eax {  union cpuid10_edx {  	struct { -		unsigned int num_counters_fixed:4; -		unsigned int reserved:28; +		unsigned int num_counters_fixed:5; +		unsigned int bit_width_fixed:8; +		unsigned int reserved:19;  	} split;  	unsigned int full;  }; @@ -140,6 +141,19 @@ extern unsigned long perf_instruction_pointer(struct pt_regs *regs);  extern unsigned long perf_misc_flags(struct pt_regs *regs);  #define perf_misc_flags(regs)	perf_misc_flags(regs) +#include <asm/stacktrace.h> + +/* + * We abuse bit 3 from flags to pass exact information, see perf_misc_flags + * and the comment with PERF_EFLAGS_EXACT. + */ +#define perf_arch_fetch_caller_regs(regs, __ip)		{	\ +	(regs)->ip = (__ip);					\ +	(regs)->bp = caller_frame_pointer();			\ +	(regs)->cs = __KERNEL_CS;				\ +	regs->flags = 0;					\ +} +  #else  static inline void init_hw_perf_events(void)		{ }  static inline void perf_events_lapic_init(void)	{ } diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index 64a8ebff06fc..def500776b16 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h @@ -19,7 +19,6 @@  #define ARCH_P4_RESERVED_ESCR	(2) /* IQ_ESCR(0,1) not always present */  #define ARCH_P4_MAX_ESCR	(ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR)  #define ARCH_P4_MAX_CCCR	(18) -#define ARCH_P4_MAX_COUNTER	(ARCH_P4_MAX_CCCR / 2)  #define P4_ESCR_EVENT_MASK	0x7e000000U  #define P4_ESCR_EVENT_SHIFT	25 @@ -71,10 +70,6 @@  #define P4_CCCR_THRESHOLD(v)		((v) << P4_CCCR_THRESHOLD_SHIFT)  #define P4_CCCR_ESEL(v)			((v) << P4_CCCR_ESCR_SELECT_SHIFT) -/* Custom bits in reerved CCCR area */ -#define P4_CCCR_CACHE_OPS_MASK		0x0000003fU - -  /* Non HT mask */  #define P4_CCCR_MASK				\  	(P4_CCCR_OVF			|	\ @@ -106,8 +101,7 @@   * ESCR and CCCR but rather an only packed value should   * be unpacked and written to a proper addresses   * - * the base idea is to pack as much info as - * possible + * the base idea is to pack as much info as possible   */  #define p4_config_pack_escr(v)		(((u64)(v)) << 32)  #define p4_config_pack_cccr(v)		(((u64)(v)) & 0xffffffffULL) @@ -130,8 +124,6 @@  		t;					\  	}) -#define p4_config_unpack_cache_event(v)	(((u64)(v)) & P4_CCCR_CACHE_OPS_MASK) -  #define P4_CONFIG_HT_SHIFT		63  #define P4_CONFIG_HT			(1ULL << P4_CONFIG_HT_SHIFT) @@ -214,6 +206,12 @@ static inline u32 p4_default_escr_conf(int cpu, int exclude_os, int exclude_usr)  	return escr;  } +/* + * This are the events which should be used in "Event Select" + * field of ESCR register, they are like unique keys which allow + * the kernel to determinate which CCCR and COUNTER should be + * used to track an event + */  enum P4_EVENTS {  	P4_EVENT_TC_DELIVER_MODE,  	P4_EVENT_BPU_FETCH_REQUEST, @@ -561,7 +559,7 @@ enum P4_EVENT_OPCODES {   * a caller should use P4_ESCR_EMASK_NAME helper to   * pick the EventMask needed, for example   * - *	P4_ESCR_EMASK_NAME(P4_EVENT_TC_DELIVER_MODE, DD) + *	P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DD)   */  enum P4_ESCR_EMASKS {  	P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, DD, 0), @@ -753,43 +751,50 @@ enum P4_ESCR_EMASKS {  	P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_COMPLETED, BOGUS, 1),  }; -/* P4 PEBS: stale for a while */ -#define P4_PEBS_METRIC_MASK	0x00001fffU -#define P4_PEBS_UOB_TAG		0x01000000U -#define P4_PEBS_ENABLE		0x02000000U - -/* Replay metrics for MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT */ -#define P4_PEBS__1stl_cache_load_miss_retired	0x3000001 -#define P4_PEBS__2ndl_cache_load_miss_retired	0x3000002 -#define P4_PEBS__dtlb_load_miss_retired		0x3000004 -#define P4_PEBS__dtlb_store_miss_retired	0x3000004 -#define P4_PEBS__dtlb_all_miss_retired		0x3000004 -#define P4_PEBS__tagged_mispred_branch		0x3018000 -#define P4_PEBS__mob_load_replay_retired	0x3000200 -#define P4_PEBS__split_load_retired		0x3000400 -#define P4_PEBS__split_store_retired		0x3000400 - -#define P4_VERT__1stl_cache_load_miss_retired	0x0000001 -#define P4_VERT__2ndl_cache_load_miss_retired	0x0000001 -#define P4_VERT__dtlb_load_miss_retired		0x0000001 -#define P4_VERT__dtlb_store_miss_retired	0x0000002 -#define P4_VERT__dtlb_all_miss_retired		0x0000003 -#define P4_VERT__tagged_mispred_branch		0x0000010 -#define P4_VERT__mob_load_replay_retired	0x0000001 -#define P4_VERT__split_load_retired		0x0000001 -#define P4_VERT__split_store_retired		0x0000002 - -enum P4_CACHE_EVENTS { -	P4_CACHE__NONE, - -	P4_CACHE__1stl_cache_load_miss_retired, -	P4_CACHE__2ndl_cache_load_miss_retired, -	P4_CACHE__dtlb_load_miss_retired, -	P4_CACHE__dtlb_store_miss_retired, -	P4_CACHE__itlb_reference_hit, -	P4_CACHE__itlb_reference_miss, - -	P4_CACHE__MAX +/* + * P4 PEBS specifics (Replay Event only) + * + * Format (bits): + *   0-6: metric from P4_PEBS_METRIC enum + *    7 : reserved + *    8 : reserved + * 9-11 : reserved + * + * Note we have UOP and PEBS bits reserved for now + * just in case if we will need them once + */ +#define P4_PEBS_CONFIG_ENABLE		(1 << 7) +#define P4_PEBS_CONFIG_UOP_TAG		(1 << 8) +#define P4_PEBS_CONFIG_METRIC_MASK	0x3f +#define P4_PEBS_CONFIG_MASK		0xff + +/* + * mem: Only counters MSR_IQ_COUNTER4 (16) and + * MSR_IQ_COUNTER5 (17) are allowed for PEBS sampling + */ +#define P4_PEBS_ENABLE			0x02000000U +#define P4_PEBS_ENABLE_UOP_TAG		0x01000000U + +#define p4_config_unpack_metric(v)	(((u64)(v)) & P4_PEBS_CONFIG_METRIC_MASK) +#define p4_config_unpack_pebs(v)	(((u64)(v)) & P4_PEBS_CONFIG_MASK) + +#define p4_config_pebs_has(v, mask)	(p4_config_unpack_pebs(v) & (mask)) + +enum P4_PEBS_METRIC { +	P4_PEBS_METRIC__none, + +	P4_PEBS_METRIC__1stl_cache_load_miss_retired, +	P4_PEBS_METRIC__2ndl_cache_load_miss_retired, +	P4_PEBS_METRIC__dtlb_load_miss_retired, +	P4_PEBS_METRIC__dtlb_store_miss_retired, +	P4_PEBS_METRIC__dtlb_all_miss_retired, +	P4_PEBS_METRIC__tagged_mispred_branch, +	P4_PEBS_METRIC__mob_load_replay_retired, +	P4_PEBS_METRIC__split_load_retired, +	P4_PEBS_METRIC__split_store_retired, + +	P4_PEBS_METRIC__max  };  #endif /* PERF_EVENT_P4_H */ + diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 181be528c612..076052cd62be 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -126,8 +126,8 @@ static inline int pgd_large(pgd_t pgd) { return 0; }  /* x86-64 always has all page tables mapped. */  #define pte_offset_map(dir, address) pte_offset_kernel((dir), (address))  #define pte_offset_map_nested(dir, address) pte_offset_kernel((dir), (address)) -#define pte_unmap(pte) /* NOP */ -#define pte_unmap_nested(pte) /* NOP */ +#define pte_unmap(pte) ((void)(pte))/* NOP */ +#define pte_unmap_nested(pte) ((void)(pte)) /* NOP */  #define update_mmu_cache(vma, address, ptep) do { } while (0) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 7e5c6a60b8ee..325b7bdbebaa 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -762,6 +762,7 @@ extern void init_c1e_mask(void);  extern unsigned long		boot_option_idle_override;  extern unsigned long		idle_halt;  extern unsigned long		idle_nomwait; +extern bool			c1e_detected;  /*   * on systems with caches, caches must be flashed as the absolute @@ -1025,4 +1026,24 @@ unsigned long calc_aperfmperf_ratio(struct aperfmperf *old,  	return ratio;  } +/* + * AMD errata checking + */ +#ifdef CONFIG_CPU_SUP_AMD +extern const int amd_erratum_383[]; +extern const int amd_erratum_400[]; +extern bool cpu_has_amd_erratum(const int *); + +#define AMD_LEGACY_ERRATUM(...)		{ -1, __VA_ARGS__, 0 } +#define AMD_OSVW_ERRATUM(osvw_id, ...)	{ osvw_id, __VA_ARGS__, 0 } +#define AMD_MODEL_RANGE(f, m_start, s_start, m_end, s_end) \ +	((f << 24) | (m_start << 16) | (s_start << 12) | (m_end << 4) | (s_end)) +#define AMD_MODEL_RANGE_FAMILY(range)	(((range) >> 24) & 0xff) +#define AMD_MODEL_RANGE_START(range)	(((range) >> 12) & 0xfff) +#define AMD_MODEL_RANGE_END(range)	((range) & 0xfff) + +#else +#define cpu_has_amd_erratum(x)	(false) +#endif /* CONFIG_CPU_SUP_AMD */ +  #endif /* _ASM_X86_PROCESSOR_H */ diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index 64cf2d24fad1..6c7fc25f2c34 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h @@ -84,5 +84,7 @@  #define REQUIRED_MASK5	0  #define REQUIRED_MASK6	0  #define REQUIRED_MASK7	0 +#define REQUIRED_MASK8	0 +#define REQUIRED_MASK9	0  #endif /* _ASM_X86_REQUIRED_FEATURES_H */ diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h index 606ede126972..d1e41b0f9b60 100644 --- a/arch/x86/include/asm/rwsem.h +++ b/arch/x86/include/asm/rwsem.h @@ -118,7 +118,7 @@ static inline void __down_read(struct rw_semaphore *sem)  {  	asm volatile("# beginning down_read\n\t"  		     LOCK_PREFIX _ASM_INC "(%1)\n\t" -		     /* adds 0x00000001, returns the old value */ +		     /* adds 0x00000001 */  		     "  jns        1f\n"  		     "  call call_rwsem_down_read_failed\n"  		     "1:\n\t" @@ -156,11 +156,9 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)  static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)  {  	rwsem_count_t tmp; - -	tmp = RWSEM_ACTIVE_WRITE_BIAS;  	asm volatile("# beginning down_write\n\t"  		     LOCK_PREFIX "  xadd      %1,(%2)\n\t" -		     /* subtract 0x0000ffff, returns the old value */ +		     /* adds 0xffff0001, returns the old value */  		     "  test      %1,%1\n\t"  		     /* was the count 0 before? */  		     "  jz        1f\n" @@ -168,7 +166,7 @@ static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)  		     "1:\n"  		     "# ending down_write"  		     : "+m" (sem->count), "=d" (tmp) -		     : "a" (sem), "1" (tmp) +		     : "a" (sem), "1" (RWSEM_ACTIVE_WRITE_BIAS)  		     : "memory", "cc");  } @@ -195,16 +193,16 @@ static inline int __down_write_trylock(struct rw_semaphore *sem)   */  static inline void __up_read(struct rw_semaphore *sem)  { -	rwsem_count_t tmp = -RWSEM_ACTIVE_READ_BIAS; +	rwsem_count_t tmp;  	asm volatile("# beginning __up_read\n\t"  		     LOCK_PREFIX "  xadd      %1,(%2)\n\t"  		     /* subtracts 1, returns the old value */  		     "  jns        1f\n\t" -		     "  call call_rwsem_wake\n" +		     "  call call_rwsem_wake\n" /* expects old value in %edx */  		     "1:\n"  		     "# ending __up_read\n"  		     : "+m" (sem->count), "=d" (tmp) -		     : "a" (sem), "1" (tmp) +		     : "a" (sem), "1" (-RWSEM_ACTIVE_READ_BIAS)  		     : "memory", "cc");  } @@ -216,10 +214,9 @@ static inline void __up_write(struct rw_semaphore *sem)  	rwsem_count_t tmp;  	asm volatile("# beginning __up_write\n\t"  		     LOCK_PREFIX "  xadd      %1,(%2)\n\t" -		     /* tries to transition -			0xffff0001 -> 0x00000000 */ -		     "  jz       1f\n" -		     "  call call_rwsem_wake\n" +		     /* subtracts 0xffff0001, returns the old value */ +		     "  jns        1f\n\t" +		     "  call call_rwsem_wake\n" /* expects old value in %edx */  		     "1:\n\t"  		     "# ending __up_write\n"  		     : "+m" (sem->count), "=d" (tmp) diff --git a/arch/x86/include/asm/scatterlist.h b/arch/x86/include/asm/scatterlist.h index fb0b1874396f..4240878b9d76 100644 --- a/arch/x86/include/asm/scatterlist.h +++ b/arch/x86/include/asm/scatterlist.h @@ -3,7 +3,6 @@  #include <asm-generic/scatterlist.h> -#define ISA_DMA_THRESHOLD (0x00ffffff)  #define ARCH_HAS_SG_CHAIN  #endif /* _ASM_X86_SCATTERLIST_H */ diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 86b1506f4179..ef292c792d74 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -82,7 +82,7 @@ void *extend_brk(size_t size, size_t align);   * executable.)   */  #define RESERVE_BRK(name,sz)						\ -	static void __section(.discard) __used				\ +	static void __section(.discard.text) __used			\  	__brk_reservation_fn_##name##__(void) {				\  		asm volatile (						\  			".pushsection .brk_reservation,\"aw\",@nobits;" \ diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index 4dab78edbad9..2b16a2ad23dc 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -1,6 +1,13 @@ +/* + *  Copyright (C) 1991, 1992  Linus Torvalds + *  Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs + */ +  #ifndef _ASM_X86_STACKTRACE_H  #define _ASM_X86_STACKTRACE_H +#include <linux/uaccess.h> +  extern int kstack_depth_to_print;  struct thread_info; @@ -42,4 +49,46 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs,  		unsigned long *stack, unsigned long bp,  		const struct stacktrace_ops *ops, void *data); +#ifdef CONFIG_X86_32 +#define STACKSLOTS_PER_LINE 8 +#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :) +#else +#define STACKSLOTS_PER_LINE 4 +#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) +#endif + +extern void +show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, +		unsigned long *stack, unsigned long bp, char *log_lvl); + +extern void +show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, +		unsigned long *sp, unsigned long bp, char *log_lvl); + +extern unsigned int code_bytes; + +/* The form of the top of the frame on the stack */ +struct stack_frame { +	struct stack_frame *next_frame; +	unsigned long return_address; +}; + +struct stack_frame_ia32 { +    u32 next_frame; +    u32 return_address; +}; + +static inline unsigned long caller_frame_pointer(void) +{ +	struct stack_frame *frame; + +	get_bp(frame); + +#ifdef CONFIG_FRAME_POINTER +	frame = frame->next_frame; +#endif + +	return (unsigned long)frame; +} +  #endif /* _ASM_X86_STACKTRACE_H */ diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index 3ad421784ae7..cb238526a9f1 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h @@ -18,13 +18,13 @@  #include <asm/ia32.h>  /* ia32/sys_ia32.c */ -asmlinkage long sys32_truncate64(char __user *, unsigned long, unsigned long); +asmlinkage long sys32_truncate64(const char __user *, unsigned long, unsigned long);  asmlinkage long sys32_ftruncate64(unsigned int, unsigned long, unsigned long); -asmlinkage long sys32_stat64(char __user *, struct stat64 __user *); -asmlinkage long sys32_lstat64(char __user *, struct stat64 __user *); +asmlinkage long sys32_stat64(const char __user *, struct stat64 __user *); +asmlinkage long sys32_lstat64(const char __user *, struct stat64 __user *);  asmlinkage long sys32_fstat64(unsigned int, struct stat64 __user *); -asmlinkage long sys32_fstatat(unsigned int, char __user *, +asmlinkage long sys32_fstatat(unsigned int, const char __user *,  			      struct stat64 __user *, int);  struct mmap_arg_struct32;  asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *); @@ -49,12 +49,12 @@ asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *, compat_size_t);  asmlinkage long sys32_rt_sigqueueinfo(int, int, compat_siginfo_t __user *);  asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32); -asmlinkage long sys32_pwrite(unsigned int, char __user *, u32, u32, u32); +asmlinkage long sys32_pwrite(unsigned int, const char __user *, u32, u32, u32);  asmlinkage long sys32_personality(unsigned long);  asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32); -asmlinkage long sys32_execve(char __user *, compat_uptr_t __user *, +asmlinkage long sys32_execve(const char __user *, compat_uptr_t __user *,  			     compat_uptr_t __user *, struct pt_regs *);  asmlinkage long sys32_clone(unsigned int, unsigned int, struct pt_regs *); @@ -80,4 +80,7 @@ asmlinkage long sys32_rt_sigreturn(struct pt_regs *);  /* ia32/ipc32.c */  asmlinkage long sys32_ipc(u32, int, int, int, compat_uptr_t, u32); + +asmlinkage long sys32_fanotify_mark(int, unsigned int, u32, u32, int, +				    const char __user *);  #endif /* _ASM_X86_SYS_IA32_H */ diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 5c044b43e9a7..feb2ff9bfc2d 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -23,7 +23,7 @@ long sys_iopl(unsigned int, struct pt_regs *);  /* kernel/process.c */  int sys_fork(struct pt_regs *);  int sys_vfork(struct pt_regs *); -long sys_execve(char __user *, char __user * __user *, +long sys_execve(const char __user *, char __user * __user *,  		char __user * __user *, struct pt_regs *);  long sys_clone(unsigned long, unsigned long, void __user *,  	       void __user *, struct pt_regs *); diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index e7f4d33c55ed..33ecc3ea8782 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -457,4 +457,11 @@ static __always_inline void rdtsc_barrier(void)  	alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);  } +/* + * We handle most unaligned accesses in hardware.  On the other hand + * unaligned DMA can be quite expensive on some Nehalem processors. + * + * Based on this we disable the IP header alignment in network drivers. + */ +#define NET_IP_ALIGN	0  #endif /* _ASM_X86_SYSTEM_H */ diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h index beb9b5f8f8a4..b766a5e8ba0e 100644 --- a/arch/x86/include/asm/unistd_32.h +++ b/arch/x86/include/asm/unistd_32.h @@ -343,10 +343,13 @@  #define __NR_rt_tgsigqueueinfo	335  #define __NR_perf_event_open	336  #define __NR_recvmmsg		337 +#define __NR_fanotify_init	338 +#define __NR_fanotify_mark	339 +#define __NR_prlimit64		340  #ifdef __KERNEL__ -#define NR_syscalls 338 +#define NR_syscalls 341  #define __ARCH_WANT_IPC_PARSE_VERSION  #define __ARCH_WANT_OLD_READDIR diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index ff4307b0e81e..363e9b8a715b 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h @@ -663,6 +663,12 @@ __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)  __SYSCALL(__NR_perf_event_open, sys_perf_event_open)  #define __NR_recvmmsg				299  __SYSCALL(__NR_recvmmsg, sys_recvmmsg) +#define __NR_fanotify_init			300 +__SYSCALL(__NR_fanotify_init, sys_fanotify_init) +#define __NR_fanotify_mark			301 +__SYSCALL(__NR_fanotify_mark, sys_fanotify_mark) +#define __NR_prlimit64				302 +__SYSCALL(__NR_prlimit64, sys_prlimit64)  #ifndef __NO_STUBS  #define __ARCH_WANT_OLD_READDIR diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index aa558ac0306e..42d412fd8b02 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -34,6 +34,7 @@   */  #define UV_ITEMS_PER_DESCRIPTOR		8 +/* the 'throttle' to prevent the hardware stay-busy bug */  #define MAX_BAU_CONCURRENT		3  #define UV_CPUS_PER_ACT_STATUS		32  #define UV_ACT_STATUS_MASK		0x3 @@ -45,10 +46,26 @@  #define UV_DESC_BASE_PNODE_SHIFT	49  #define UV_PAYLOADQ_PNODE_SHIFT		49  #define UV_PTC_BASENAME			"sgi_uv/ptc_statistics" +#define UV_BAU_BASENAME			"sgi_uv/bau_tunables" +#define UV_BAU_TUNABLES_DIR		"sgi_uv" +#define UV_BAU_TUNABLES_FILE		"bau_tunables" +#define WHITESPACE			" \t\n"  #define uv_physnodeaddr(x)		((__pa((unsigned long)(x)) & uv_mmask))  #define UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT 15  #define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT 16 -#define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD 0x000000000bUL +#define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD 0x0000000009UL +/* [19:16] SOFT_ACK timeout period  19: 1 is urgency 7  17:16 1 is multiplier */ +#define BAU_MISC_CONTROL_MULT_MASK 3 + +#define UVH_AGING_PRESCALE_SEL 0x000000b000UL +/* [30:28] URGENCY_7  an index into a table of times */ +#define BAU_URGENCY_7_SHIFT 28 +#define BAU_URGENCY_7_MASK 7 + +#define UVH_TRANSACTION_TIMEOUT 0x000000b200UL +/* [45:40] BAU - BAU transaction timeout select - a multiplier */ +#define BAU_TRANS_SHIFT 40 +#define BAU_TRANS_MASK 0x3f  /*   * bits in UVH_LB_BAU_SB_ACTIVATION_STATUS_0/1 @@ -59,24 +76,21 @@  #define DESC_STATUS_SOURCE_TIMEOUT	3  /* - * source side threshholds at which message retries print a warning - */ -#define SOURCE_TIMEOUT_LIMIT		20 -#define DESTINATION_TIMEOUT_LIMIT	20 - -/* - * misc. delays, in microseconds + * delay for 'plugged' timeout retries, in microseconds   */ -#define THROTTLE_DELAY			10 -#define TIMEOUT_DELAY			10 -#define BIOS_TO				1000 -/* BIOS is assumed to set the destination timeout to 1003520 nanoseconds */ +#define PLUGGED_DELAY			10  /*   * threshholds at which to use IPI to free resources   */ +/* after this # consecutive 'plugged' timeouts, use IPI to release resources */  #define PLUGSB4RESET 100 -#define TIMEOUTSB4RESET 100 +/* after this many consecutive timeouts, use IPI to release resources */ +#define TIMEOUTSB4RESET 1 +/* at this number uses of IPI to release resources, giveup the request */ +#define IPI_RESET_LIMIT 1 +/* after this # consecutive successes, bump up the throttle if it was lowered */ +#define COMPLETE_THRESHOLD 5  /*   * number of entries in the destination side payload queue @@ -96,6 +110,13 @@  #define FLUSH_COMPLETE			4  /* + * tuning the action when the numalink network is extremely delayed + */ +#define CONGESTED_RESPONSE_US 1000 /* 'long' response time, in microseconds */ +#define CONGESTED_REPS 10 /* long delays averaged over this many broadcasts */ +#define CONGESTED_PERIOD 30 /* time for the bau to be disabled, in seconds */ + +/*   * Distribution: 32 bytes (256 bits) (bytes 0-0x1f of descriptor)   * If the 'multilevel' flag in the header portion of the descriptor   * has been set to 0, then endpoint multi-unicast mode is selected. @@ -300,37 +321,16 @@ struct bau_payload_queue_entry {  	/* bytes 24-31 */  }; -/* - * one per-cpu; to locate the software tables - */ -struct bau_control { -	struct bau_desc *descriptor_base; +struct msg_desc { +	struct bau_payload_queue_entry *msg; +	int msg_slot; +	int sw_ack_slot;  	struct bau_payload_queue_entry *va_queue_first;  	struct bau_payload_queue_entry *va_queue_last; -	struct bau_payload_queue_entry *bau_msg_head; -	struct bau_control *uvhub_master; -	struct bau_control *socket_master; -	unsigned long timeout_interval; -	atomic_t active_descriptor_count; -	int max_concurrent; -	int max_concurrent_constant; -	int retry_message_scans; -	int plugged_tries; -	int timeout_tries; -	int ipi_attempts; -	int conseccompletes; -	short cpu; -	short uvhub_cpu; -	short uvhub; -	short cpus_in_socket; -	short cpus_in_uvhub; -	unsigned short message_number; -	unsigned short uvhub_quiesce; -	short socket_acknowledge_count[DEST_Q_SIZE]; -	cycles_t send_message; -	spinlock_t masks_lock; -	spinlock_t uvhub_lock; -	spinlock_t queue_lock; +}; + +struct reset_args { +	int sender;  };  /* @@ -344,18 +344,25 @@ struct ptc_stats {  	unsigned long s_dtimeout; /* destination side timeouts */  	unsigned long s_time; /* time spent in sending side */  	unsigned long s_retriesok; /* successful retries */ -	unsigned long s_ntargcpu; /* number of cpus targeted */ -	unsigned long s_ntarguvhub; /* number of uvhubs targeted */ -	unsigned long s_ntarguvhub16; /* number of times >= 16 target hubs */ -	unsigned long s_ntarguvhub8; /* number of times >= 8 target hubs */ -	unsigned long s_ntarguvhub4; /* number of times >= 4 target hubs */ -	unsigned long s_ntarguvhub2; /* number of times >= 2 target hubs */ -	unsigned long s_ntarguvhub1; /* number of times == 1 target hub */ +	unsigned long s_ntargcpu; /* total number of cpu's targeted */ +	unsigned long s_ntargself; /* times the sending cpu was targeted */ +	unsigned long s_ntarglocals; /* targets of cpus on the local blade */ +	unsigned long s_ntargremotes; /* targets of cpus on remote blades */ +	unsigned long s_ntarglocaluvhub; /* targets of the local hub */ +	unsigned long s_ntargremoteuvhub; /* remotes hubs targeted */ +	unsigned long s_ntarguvhub; /* total number of uvhubs targeted */ +	unsigned long s_ntarguvhub16; /* number of times target hubs >= 16*/ +	unsigned long s_ntarguvhub8; /* number of times target hubs >= 8 */ +	unsigned long s_ntarguvhub4; /* number of times target hubs >= 4 */ +	unsigned long s_ntarguvhub2; /* number of times target hubs >= 2 */ +	unsigned long s_ntarguvhub1; /* number of times target hubs == 1 */  	unsigned long s_resets_plug; /* ipi-style resets from plug state */  	unsigned long s_resets_timeout; /* ipi-style resets from timeouts */  	unsigned long s_busy; /* status stayed busy past s/w timer */  	unsigned long s_throttles; /* waits in throttle */  	unsigned long s_retry_messages; /* retry broadcasts */ +	unsigned long s_bau_reenabled; /* for bau enable/disable */ +	unsigned long s_bau_disabled; /* for bau enable/disable */  	/* destination statistics */  	unsigned long d_alltlb; /* times all tlb's on this cpu were flushed */  	unsigned long d_onetlb; /* times just one tlb on this cpu was flushed */ @@ -370,6 +377,52 @@ struct ptc_stats {  	unsigned long d_rcanceled; /* number of messages canceled by resets */  }; +/* + * one per-cpu; to locate the software tables + */ +struct bau_control { +	struct bau_desc *descriptor_base; +	struct bau_payload_queue_entry *va_queue_first; +	struct bau_payload_queue_entry *va_queue_last; +	struct bau_payload_queue_entry *bau_msg_head; +	struct bau_control *uvhub_master; +	struct bau_control *socket_master; +	struct ptc_stats *statp; +	unsigned long timeout_interval; +	unsigned long set_bau_on_time; +	atomic_t active_descriptor_count; +	int plugged_tries; +	int timeout_tries; +	int ipi_attempts; +	int conseccompletes; +	int baudisabled; +	int set_bau_off; +	short cpu; +	short uvhub_cpu; +	short uvhub; +	short cpus_in_socket; +	short cpus_in_uvhub; +	unsigned short message_number; +	unsigned short uvhub_quiesce; +	short socket_acknowledge_count[DEST_Q_SIZE]; +	cycles_t send_message; +	spinlock_t uvhub_lock; +	spinlock_t queue_lock; +	/* tunables */ +	int max_bau_concurrent; +	int max_bau_concurrent_constant; +	int plugged_delay; +	int plugsb4reset; +	int timeoutsb4reset; +	int ipi_reset_limit; +	int complete_threshold; +	int congested_response_us; +	int congested_reps; +	int congested_period; +	cycles_t period_time; +	long period_requests; +}; +  static inline int bau_uvhub_isset(int uvhub, struct bau_target_uvhubmask *dstp)  {  	return constant_test_bit(uvhub, &dstp->bits[0]); diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 9e6779f7cf2d..9f0cbd987d50 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -257,6 +257,7 @@ enum vmcs_field {  #define EXIT_REASON_IO_INSTRUCTION      30  #define EXIT_REASON_MSR_READ            31  #define EXIT_REASON_MSR_WRITE           32 +#define EXIT_REASON_INVALID_STATE	33  #define EXIT_REASON_MWAIT_INSTRUCTION   36  #define EXIT_REASON_MONITOR_INSTRUCTION 39  #define EXIT_REASON_PAUSE_INSTRUCTION   40 @@ -266,6 +267,7 @@ enum vmcs_field {  #define EXIT_REASON_EPT_VIOLATION       48  #define EXIT_REASON_EPT_MISCONFIG       49  #define EXIT_REASON_WBINVD		54 +#define EXIT_REASON_XSETBV		55  /*   * Interruption-information format @@ -375,6 +377,9 @@ enum vmcs_field {  #define VMX_EPT_EXTENT_CONTEXT_BIT		(1ull << 25)  #define VMX_EPT_EXTENT_GLOBAL_BIT		(1ull << 26) +#define VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT      (1ull << 9) /* (41 - 32) */ +#define VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT      (1ull << 10) /* (42 - 32) */ +  #define VMX_EPT_DEFAULT_GAW			3  #define VMX_EPT_MAX_GAW				0x4  #define VMX_EPT_MT_EPTE_SHIFT			3 diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 9c371e4a9fa6..7fda040a76cd 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -417,6 +417,12 @@ HYPERVISOR_nmi_op(unsigned long op, unsigned long arg)  	return _hypercall2(int, nmi_op, op, arg);  } +static inline unsigned long __must_check +HYPERVISOR_hvm_op(int op, void *arg) +{ +       return _hypercall2(unsigned long, hvm_op, op, arg); +} +  static inline void  MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set)  { diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 018a0a400799..bf5f7d32bd08 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -112,13 +112,9 @@ static inline xpaddr_t machine_to_phys(xmaddr_t machine)   */  static inline unsigned long mfn_to_local_pfn(unsigned long mfn)  { -	extern unsigned long max_mapnr;  	unsigned long pfn = mfn_to_pfn(mfn); -	if ((pfn < max_mapnr) -	    && !xen_feature(XENFEAT_auto_translated_physmap) -	    && (get_phys_to_machine(pfn) != mfn)) -		return max_mapnr; /* force !pfn_valid() */ -	/* XXX fixme; not true with sparsemem */ +	if (get_phys_to_machine(pfn) != mfn) +		return -1; /* force !pfn_valid() */  	return pfn;  } diff --git a/arch/x86/include/asm/xen/swiotlb-xen.h b/arch/x86/include/asm/xen/swiotlb-xen.h new file mode 100644 index 000000000000..1be1ab7d6a41 --- /dev/null +++ b/arch/x86/include/asm/xen/swiotlb-xen.h @@ -0,0 +1,14 @@ +#ifndef _ASM_X86_SWIOTLB_XEN_H +#define _ASM_X86_SWIOTLB_XEN_H + +#ifdef CONFIG_SWIOTLB_XEN +extern int xen_swiotlb; +extern int __init pci_xen_swiotlb_detect(void); +extern void __init pci_xen_swiotlb_init(void); +#else +#define xen_swiotlb (0) +static inline int __init pci_xen_swiotlb_detect(void) { return 0; } +static inline void __init pci_xen_swiotlb_init(void) { } +#endif + +#endif /* _ASM_X86_SWIOTLB_XEN_H */ diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index 2c4390cae228..c6ce2452f10c 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -3,7 +3,8 @@  #include <linux/types.h>  #include <asm/processor.h> -#include <asm/i387.h> + +#define XSTATE_CPUID		0x0000000d  #define XSTATE_FP	0x1  #define XSTATE_SSE	0x2 @@ -13,6 +14,12 @@  #define FXSAVE_SIZE	512 +#define XSAVE_HDR_SIZE	    64 +#define XSAVE_HDR_OFFSET    FXSAVE_SIZE + +#define XSAVE_YMM_SIZE	    256 +#define XSAVE_YMM_OFFSET    (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET) +  /*   * These are the features that the OS can handle currently.   */ @@ -26,10 +33,8 @@  extern unsigned int xstate_size;  extern u64 pcntxt_mask; -extern struct xsave_struct *init_xstate_buf;  extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; -extern void xsave_cntxt_init(void);  extern void xsave_init(void);  extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask);  extern int init_fpu(struct task_struct *child); @@ -59,6 +64,16 @@ static inline int fpu_xrstor_checking(struct fpu *fpu)  static inline int xsave_user(struct xsave_struct __user *buf)  {  	int err; + +	/* +	 * Clear the xsave header first, so that reserved fields are +	 * initialized to zero. +	 */ +	err = __clear_user(&buf->xsave_hdr, +			   sizeof(struct xsave_hdr_struct)); +	if (unlikely(err)) +		return -EFAULT; +  	__asm__ __volatile__("1: .byte " REX_PREFIX "0x0f,0xae,0x27\n"  			     "2:\n"  			     ".section .fixup,\"ax\"\n" @@ -111,12 +126,25 @@ static inline void xrstor_state(struct xsave_struct *fx, u64 mask)  		     :   "memory");  } +static inline void xsave_state(struct xsave_struct *fx, u64 mask) +{ +	u32 lmask = mask; +	u32 hmask = mask >> 32; + +	asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x27\n\t" +		     : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) +		     :   "memory"); +} +  static inline void fpu_xsave(struct fpu *fpu)  {  	/* This, however, we can work around by forcing the compiler to select  	   an addressing mode that doesn't require extended registers. */ -	__asm__ __volatile__(".byte " REX_PREFIX "0x0f,0xae,0x27" -			     : : "D" (&(fpu->state->xsave)), -				 "a" (-1), "d"(-1) : "memory"); +	alternative_input( +		".byte " REX_PREFIX "0x0f,0xae,0x27", +		".byte " REX_PREFIX "0x0f,0xae,0x37", +		X86_FEATURE_XSAVEOPT, +		[fx] "D" (&fpu->state->xsave), "a" (-1), "d" (-1) : +		"memory");  }  #endif diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index e77b22083721..0925676266bd 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -104,6 +104,7 @@ obj-$(CONFIG_SCx200)		+= scx200.o  scx200-y			+= scx200_32.o  obj-$(CONFIG_OLPC)		+= olpc.o +obj-$(CONFIG_OLPC_OPENFIRMWARE)	+= olpc_ofw.o  obj-$(CONFIG_X86_MRST)		+= mrst.o  microcode-y				:= microcode_core.o diff --git a/arch/x86/kernel/acpi/realmode/wakeup.S b/arch/x86/kernel/acpi/realmode/wakeup.S index 580b4e296010..28595d6df47c 100644 --- a/arch/x86/kernel/acpi/realmode/wakeup.S +++ b/arch/x86/kernel/acpi/realmode/wakeup.S @@ -104,7 +104,7 @@ _start:  	movl	%eax, %ecx  	orl	%edx, %ecx  	jz	1f -	movl	$0xc0000080, %ecx +	movl	$MSR_EFER, %ecx  	wrmsr  1: diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index fcc3c61fdecc..33cec152070d 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -2,7 +2,7 @@   * sleep.c - x86-specific ACPI sleep support.   *   *  Copyright (C) 2001-2003 Patrick Mochel - *  Copyright (C) 2001-2003 Pavel Machek <pavel@suse.cz> + *  Copyright (C) 2001-2003 Pavel Machek <pavel@ucw.cz>   */  #include <linux/acpi.h> diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 70237732a6c7..f65ab8b014c4 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -214,6 +214,7 @@ void __init_or_module apply_alternatives(struct alt_instr *start,  		u8 *instr = a->instr;  		BUG_ON(a->replacementlen > a->instrlen);  		BUG_ON(a->instrlen > sizeof(insnbuf)); +		BUG_ON(a->cpuid >= NCAPINTS*32);  		if (!boot_cpu_has(a->cpuid))  			continue;  #ifdef CONFIG_X86_64 diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 0d20286d78c6..fa044e1e30a2 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -2572,6 +2572,11 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,  static int amd_iommu_domain_has_cap(struct iommu_domain *domain,  				    unsigned long cap)  { +	switch (cap) { +	case IOMMU_CAP_CACHE_COHERENCY: +		return 1; +	} +  	return 0;  } @@ -2609,8 +2614,7 @@ int __init amd_iommu_init_passthrough(void)  	pt_domain->mode |= PAGE_MODE_NONE; -	while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { - +	for_each_pci_dev(dev) {  		if (!check_device(&dev->dev))  			continue; diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index a35347501d36..8dd77800ff5d 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c @@ -43,10 +43,11 @@  #include <asm/fixmap.h>  #include <asm/apb_timer.h> +#include <asm/mrst.h>  #define APBT_MASK			CLOCKSOURCE_MASK(32)  #define APBT_SHIFT			22 -#define APBT_CLOCKEVENT_RATING		150 +#define APBT_CLOCKEVENT_RATING		110  #define APBT_CLOCKSOURCE_RATING		250  #define APBT_MIN_DELTA_USEC		200 @@ -83,8 +84,6 @@ struct apbt_dev {  	char name[10];  }; -int disable_apbt_percpu __cpuinitdata; -  static DEFINE_PER_CPU(struct apbt_dev, cpu_apbt_dev);  #ifdef CONFIG_SMP @@ -195,29 +194,6 @@ static struct clock_event_device apbt_clockevent = {  };  /* - * if user does not want to use per CPU apb timer, just give it a lower rating - * than local apic timer and skip the late per cpu timer init. - */ -static inline int __init setup_x86_mrst_timer(char *arg) -{ -	if (!arg) -		return -EINVAL; - -	if (strcmp("apbt_only", arg) == 0) -		disable_apbt_percpu = 0; -	else if (strcmp("lapic_and_apbt", arg) == 0) -		disable_apbt_percpu = 1; -	else { -		pr_warning("X86 MRST timer option %s not recognised" -			   " use x86_mrst_timer=apbt_only or lapic_and_apbt\n", -			   arg); -		return -EINVAL; -	} -	return 0; -} -__setup("x86_mrst_timer=", setup_x86_mrst_timer); - -/*   * start count down from 0xffff_ffff. this is done by toggling the enable bit   * then load initial load count to ~0.   */ @@ -335,7 +311,7 @@ static int __init apbt_clockevent_register(void)  	adev->num = smp_processor_id();  	memcpy(&adev->evt, &apbt_clockevent, sizeof(struct clock_event_device)); -	if (disable_apbt_percpu) { +	if (mrst_timer_options == MRST_TIMER_LAPIC_APBT) {  		apbt_clockevent.rating = APBT_CLOCKEVENT_RATING - 100;  		global_clock_event = &adev->evt;  		printk(KERN_DEBUG "%s clockevent registered as global\n", @@ -429,7 +405,8 @@ static int apbt_cpuhp_notify(struct notifier_block *n,  static __init int apbt_late_init(void)  { -	if (disable_apbt_percpu || !apb_timer_block_enabled) +	if (mrst_timer_options == MRST_TIMER_LAPIC_APBT || +		!apb_timer_block_enabled)  		return 0;  	/* This notifier should be called after workqueue is ready */  	hotcpu_notifier(apbt_cpuhp_notify, -20); @@ -450,6 +427,8 @@ static void apbt_set_mode(enum clock_event_mode mode,  	int timer_num;  	struct apbt_dev *adev = EVT_TO_APBT_DEV(evt); +	BUG_ON(!apbt_virt_address); +  	timer_num = adev->num;  	pr_debug("%s CPU %d timer %d mode=%d\n",  		 __func__, first_cpu(*evt->cpumask), timer_num, mode); @@ -676,7 +655,7 @@ void __init apbt_time_init(void)  	}  #ifdef CONFIG_SMP  	/* kernel cmdline disable apb timer, so we will use lapic timers */ -	if (disable_apbt_percpu) { +	if (mrst_timer_options == MRST_TIMER_LAPIC_APBT) {  		printk(KERN_INFO "apbt: disabled per cpu timer\n");  		return;  	} diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index b5d8b0bcf235..a2e0caf26e17 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -280,7 +280,7 @@ void __init early_gart_iommu_check(void)  	 * or BIOS forget to put that in reserved.  	 * try to update e820 to make that region as reserved.  	 */ -	u32 agp_aper_base = 0, agp_aper_order = 0; +	u32 agp_aper_order = 0;  	int i, fix, slot, valid_agp = 0;  	u32 ctl;  	u32 aper_size = 0, aper_order = 0, last_aper_order = 0; @@ -291,7 +291,7 @@ void __init early_gart_iommu_check(void)  		return;  	/* This is mostly duplicate of iommu_hole_init */ -	agp_aper_base = search_agp_bridge(&agp_aper_order, &valid_agp); +	search_agp_bridge(&agp_aper_order, &valid_agp);  	fix = 0;  	for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile index 565c1bfc507d..910f20b457c4 100644 --- a/arch/x86/kernel/apic/Makefile +++ b/arch/x86/kernel/apic/Makefile @@ -2,7 +2,12 @@  # Makefile for local APIC drivers and for the IO-APIC code  # -obj-$(CONFIG_X86_LOCAL_APIC)	+= apic.o apic_noop.o probe_$(BITS).o ipi.o nmi.o +obj-$(CONFIG_X86_LOCAL_APIC)	+= apic.o apic_noop.o probe_$(BITS).o ipi.o +ifneq ($(CONFIG_HARDLOCKUP_DETECTOR),y) +obj-$(CONFIG_X86_LOCAL_APIC)	+= nmi.o +endif +obj-$(CONFIG_HARDLOCKUP_DETECTOR)	+= hw_nmi.o +  obj-$(CONFIG_X86_IO_APIC)	+= io_apic.o  obj-$(CONFIG_SMP)		+= ipi.o diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index a96489ee6cab..e3b534cda49a 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -460,7 +460,7 @@ static void lapic_timer_broadcast(const struct cpumask *mask)  }  /* - * Setup the local APIC timer for this CPU. Copy the initilized values + * Setup the local APIC timer for this CPU. Copy the initialized values   * of the boot CPU and register the clock event in the framework.   */  static void __cpuinit setup_APIC_timer(void) @@ -1606,7 +1606,7 @@ void __init init_apic_mappings(void)  		 * acpi lapic path already maps that address in  		 * acpi_register_lapic_address()  		 */ -		if (!acpi_lapic) +		if (!acpi_lapic && !smp_found_config)  			set_fixmap_nocache(FIX_APIC_BASE, apic_phys);  		apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n", diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index 425e53a87feb..8593582d8022 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c @@ -129,7 +129,6 @@ int					es7000_plat;   * GSI override for ES7000 platforms.   */ -static unsigned int			base;  static int __cpuinit wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip)  { diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c new file mode 100644 index 000000000000..cefd6942f0e9 --- /dev/null +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -0,0 +1,107 @@ +/* + *  HW NMI watchdog support + * + *  started by Don Zickus, Copyright (C) 2010 Red Hat, Inc. + * + *  Arch specific calls to support NMI watchdog + * + *  Bits copied from original nmi.c file + * + */ +#include <asm/apic.h> + +#include <linux/cpumask.h> +#include <linux/kdebug.h> +#include <linux/notifier.h> +#include <linux/kprobes.h> +#include <linux/nmi.h> +#include <linux/module.h> + +/* For reliability, we're prepared to waste bits here. */ +static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; + +u64 hw_nmi_get_sample_period(void) +{ +	return (u64)(cpu_khz) * 1000 * 60; +} + +#ifdef ARCH_HAS_NMI_WATCHDOG +void arch_trigger_all_cpu_backtrace(void) +{ +	int i; + +	cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask); + +	printk(KERN_INFO "sending NMI to all CPUs:\n"); +	apic->send_IPI_all(NMI_VECTOR); + +	/* Wait for up to 10 seconds for all CPUs to do the backtrace */ +	for (i = 0; i < 10 * 1000; i++) { +		if (cpumask_empty(to_cpumask(backtrace_mask))) +			break; +		mdelay(1); +	} +} + +static int __kprobes +arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self, +			 unsigned long cmd, void *__args) +{ +	struct die_args *args = __args; +	struct pt_regs *regs; +	int cpu = smp_processor_id(); + +	switch (cmd) { +	case DIE_NMI: +	case DIE_NMI_IPI: +		break; + +	default: +		return NOTIFY_DONE; +	} + +	regs = args->regs; + +	if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { +		static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED; + +		arch_spin_lock(&lock); +		printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); +		show_regs(regs); +		dump_stack(); +		arch_spin_unlock(&lock); +		cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); +		return NOTIFY_STOP; +	} + +	return NOTIFY_DONE; +} + +static __read_mostly struct notifier_block backtrace_notifier = { +	.notifier_call          = arch_trigger_all_cpu_backtrace_handler, +	.next                   = NULL, +	.priority               = 1 +}; + +static int __init register_trigger_all_cpu_backtrace(void) +{ +	register_die_notifier(&backtrace_notifier); +	return 0; +} +early_initcall(register_trigger_all_cpu_backtrace); +#endif + +/* STUB calls to mimic old nmi_watchdog behaviour */ +#if defined(CONFIG_X86_LOCAL_APIC) +unsigned int nmi_watchdog = NMI_NONE; +EXPORT_SYMBOL(nmi_watchdog); +void acpi_nmi_enable(void) { return; } +void acpi_nmi_disable(void) { return; } +#endif +atomic_t nmi_active = ATOMIC_INIT(0);           /* oprofile uses this */ +EXPORT_SYMBOL(nmi_active); +int unknown_nmi_panic; +void cpu_nmi_set_wd_enabled(void) { return; } +void stop_apic_nmi_watchdog(void *unused) { return; } +void setup_apic_nmi_watchdog(void *unused) { return; } +int __init check_nmi_watchdog(void) { return 0; } diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index e41ed24ab26d..4dc0084ec1b1 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3397,7 +3397,7 @@ static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)  	cfg = desc->chip_data; -	read_msi_msg_desc(desc, &msg); +	get_cached_msi_msg_desc(desc, &msg);  	msg.data &= ~MSI_DATA_VECTOR_MASK;  	msg.data |= MSI_DATA_VECTOR(cfg->vector); diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index 1edaf15c0b8e..a43f71cb30f8 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c @@ -401,13 +401,6 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)  	int cpu = smp_processor_id();  	int rc = 0; -	/* check for other users first */ -	if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) -			== NOTIFY_STOP) { -		rc = 1; -		touched = 1; -	} -  	sum = get_timer_irqs(cpu);  	if (__get_cpu_var(nmi_touch)) { diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index e46f98f36e31..7b598b84c902 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -604,6 +604,10 @@ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)  {  	if (reason != DIE_NMI_IPI)  		return NOTIFY_OK; + +	if (in_crash_kexec) +		/* do nothing if entering the crash kernel */ +		return NOTIFY_OK;  	/*  	 * Use a lock so only one cpu prints at a time  	 * to prevent intermixed output. diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index c4f9182ca3ac..4c9c67bf09b7 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -140,7 +140,7 @@   *         is now the way life works).   *         Fix thinko in suspend() (wrong return).   *         Notify drivers on critical suspend. - *         Make kapmd absorb more idle time (Pavel Machek <pavel@suse.cz> + *         Make kapmd absorb more idle time (Pavel Machek <pavel@ucw.cz>   *         modified by sfr).   *         Disable interrupts while we are suspended (Andy Henroid   *         <andy_henroid@yahoo.com> fixed by sfr). diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 3a785da34b6f..3f0ebe429a01 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -12,11 +12,11 @@ endif  nostackp := $(call cc-option, -fno-stack-protector)  CFLAGS_common.o		:= $(nostackp) -obj-y			:= intel_cacheinfo.o addon_cpuid_features.o +obj-y			:= intel_cacheinfo.o scattered.o topology.o  obj-y			+= proc.o capflags.o powerflags.o common.o  obj-y			+= vmware.o hypervisor.o sched.o mshyperv.o -obj-$(CONFIG_X86_32)	+= bugs.o cmpxchg.o +obj-$(CONFIG_X86_32)	+= bugs.o  obj-$(CONFIG_X86_64)	+= bugs_64.o  obj-$(CONFIG_CPU_SUP_INTEL)		+= intel.o diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index e485825130d2..60a57b13082d 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -466,7 +466,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)  		}  	} -	if (c->x86 == 0x10 || c->x86 == 0x11) +	if (c->x86 >= 0x10)  		set_cpu_cap(c, X86_FEATURE_REP_GOOD);  	/* get apicid instead of initial apic id from cpuid */ @@ -529,7 +529,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)  			num_cache_leaves = 3;  	} -	if (c->x86 >= 0xf && c->x86 <= 0x11) +	if (c->x86 >= 0xf)  		set_cpu_cap(c, X86_FEATURE_K8);  	if (cpu_has_xmm2) { @@ -546,7 +546,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)  		fam10h_check_enable_mmcfg();  	} -	if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) { +	if (c == &boot_cpu_data && c->x86 >= 0xf) {  		unsigned long long tseg;  		/* @@ -609,3 +609,74 @@ static const struct cpu_dev __cpuinitconst amd_cpu_dev = {  };  cpu_dev_register(amd_cpu_dev); + +/* + * AMD errata checking + * + * Errata are defined as arrays of ints using the AMD_LEGACY_ERRATUM() or + * AMD_OSVW_ERRATUM() macros. The latter is intended for newer errata that + * have an OSVW id assigned, which it takes as first argument. Both take a + * variable number of family-specific model-stepping ranges created by + * AMD_MODEL_RANGE(). Each erratum also has to be declared as extern const + * int[] in arch/x86/include/asm/processor.h. + * + * Example: + * + * const int amd_erratum_319[] = + *	AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0x4, 0x2), + *			   AMD_MODEL_RANGE(0x10, 0x8, 0x0, 0x8, 0x0), + *			   AMD_MODEL_RANGE(0x10, 0x9, 0x0, 0x9, 0x0)); + */ + +const int amd_erratum_400[] = +	AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf), +			    AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf)); +EXPORT_SYMBOL_GPL(amd_erratum_400); + +const int amd_erratum_383[] = +	AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0, 0, 0xff, 0xf)); +EXPORT_SYMBOL_GPL(amd_erratum_383); + +bool cpu_has_amd_erratum(const int *erratum) +{ +	struct cpuinfo_x86 *cpu = ¤t_cpu_data; +	int osvw_id = *erratum++; +	u32 range; +	u32 ms; + +	/* +	 * If called early enough that current_cpu_data hasn't been initialized +	 * yet, fall back to boot_cpu_data. +	 */ +	if (cpu->x86 == 0) +		cpu = &boot_cpu_data; + +	if (cpu->x86_vendor != X86_VENDOR_AMD) +		return false; + +	if (osvw_id >= 0 && osvw_id < 65536 && +	    cpu_has(cpu, X86_FEATURE_OSVW)) { +		u64 osvw_len; + +		rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, osvw_len); +		if (osvw_id < osvw_len) { +			u64 osvw_bits; + +			rdmsrl(MSR_AMD64_OSVW_STATUS + (osvw_id >> 6), +			    osvw_bits); +			return osvw_bits & (1ULL << (osvw_id & 0x3f)); +		} +	} + +	/* OSVW unavailable or ID unknown, match family-model-stepping range */ +	ms = (cpu->x86_model << 8) | cpu->x86_mask; +	while ((range = *erratum++)) +		if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) && +		    (ms >= AMD_MODEL_RANGE_START(range)) && +		    (ms <= AMD_MODEL_RANGE_END(range))) +			return true; + +	return false; +} + +EXPORT_SYMBOL_GPL(cpu_has_amd_erratum); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 68e4a6f2211e..490dac63c2d2 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -140,10 +140,18 @@ EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);  static int __init x86_xsave_setup(char *s)  {  	setup_clear_cpu_cap(X86_FEATURE_XSAVE); +	setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);  	return 1;  }  __setup("noxsave", x86_xsave_setup); +static int __init x86_xsaveopt_setup(char *s) +{ +	setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); +	return 1; +} +__setup("noxsaveopt", x86_xsaveopt_setup); +  #ifdef CONFIG_X86_32  static int cachesize_override __cpuinitdata = -1;  static int disable_x86_serial_nr __cpuinitdata = 1; @@ -551,6 +559,16 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c)  		c->x86_capability[4] = excap;  	} +	/* Additional Intel-defined flags: level 0x00000007 */ +	if (c->cpuid_level >= 0x00000007) { +		u32 eax, ebx, ecx, edx; + +		cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx); + +		if (eax > 0) +			c->x86_capability[9] = ebx; +	} +  	/* AMD-defined flags: level 0x80000001 */  	xlvl = cpuid_eax(0x80000000);  	c->extended_cpuid_level = xlvl; @@ -576,6 +594,7 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c)  	if (c->extended_cpuid_level >= 0x80000007)  		c->x86_power = cpuid_edx(0x80000007); +	init_scattered_cpuid_features(c);  }  static void __cpuinit identify_cpu_without_cpuid(struct cpuinfo_x86 *c) @@ -731,7 +750,6 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c)  	get_model_name(c); /* Default name */ -	init_scattered_cpuid_features(c);  	detect_nopl(c);  } @@ -1192,6 +1210,7 @@ void __cpuinit cpu_init(void)  	dbg_restore_debug_regs();  	fpu_init(); +	xsave_init();  	raw_local_save_flags(kernel_eflags); @@ -1252,12 +1271,7 @@ void __cpuinit cpu_init(void)  	clear_used_math();  	mxcsr_feature_mask_init(); -	/* -	 * Boot processor to setup the FP and extended state context info. -	 */ -	if (smp_processor_id() == boot_cpu_id) -		init_thread_xstate(); - +	fpu_init();  	xsave_init();  }  #endif diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 1d3cddaa40ee..cd8da247dda1 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -34,7 +34,6 @@  #include <linux/compiler.h>  #include <linux/dmi.h>  #include <linux/slab.h> -#include <trace/events/power.h>  #include <linux/acpi.h>  #include <linux/io.h> @@ -73,7 +72,7 @@ struct acpi_cpufreq_data {  static DEFINE_PER_CPU(struct acpi_cpufreq_data *, acfreq_data);  /* acpi_perf_data is a pointer to percpu data. */ -static struct acpi_processor_performance *acpi_perf_data; +static struct acpi_processor_performance __percpu *acpi_perf_data;  static struct cpufreq_driver acpi_cpufreq_driver; @@ -324,8 +323,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,  		}  	} -	trace_power_frequency(POWER_PSTATE, data->freq_table[next_state].frequency); -  	switch (data->cpu_feature) {  	case SYSTEM_INTEL_MSR_CAPABLE:  		cmd.type = SYSTEM_INTEL_MSR_CAPABLE; @@ -351,7 +348,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,  	freqs.old = perf->states[perf->state].core_frequency * 1000;  	freqs.new = data->freq_table[next_state].frequency; -	for_each_cpu(i, cmd.mask) { +	for_each_cpu(i, policy->cpus) {  		freqs.cpu = i;  		cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);  	} @@ -367,7 +364,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,  		}  	} -	for_each_cpu(i, cmd.mask) { +	for_each_cpu(i, policy->cpus) {  		freqs.cpu = i;  		cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);  	} diff --git a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c index 16e3483be9e3..32974cf84232 100644 --- a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c +++ b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c @@ -169,12 +169,9 @@ static int gx_freq_mult[16] = {   *	Low Level chipset interface				*   ****************************************************************/  static struct pci_device_id gx_chipset_tbl[] __initdata = { -	{ PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, -		PCI_ANY_ID, PCI_ANY_ID }, -	{ PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520, -		PCI_ANY_ID, PCI_ANY_ID }, -	{ PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510, -		PCI_ANY_ID, PCI_ANY_ID }, +	{ PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY), }, +	{ PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5520), }, +	{ PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5510), },  	{ 0, },  }; @@ -199,7 +196,7 @@ static __init struct pci_dev *gx_detect_chipset(void)  	}  	/* detect which companion chip is used */ -	while ((gx_pci = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, gx_pci)) != NULL) { +	for_each_pci_dev(gx_pci) {  		if ((pci_match_id(gx_chipset_tbl, gx_pci)) != NULL)  			return gx_pci;  	} diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c index 7e7eea4f8261..03162dac6271 100644 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.c +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c @@ -426,7 +426,7 @@ static int guess_fsb(int mult)  } -static int __init longhaul_get_ranges(void) +static int __cpuinit longhaul_get_ranges(void)  {  	unsigned int i, j, k = 0;  	unsigned int ratio; @@ -530,7 +530,7 @@ static int __init longhaul_get_ranges(void)  } -static void __init longhaul_setup_voltagescaling(void) +static void __cpuinit longhaul_setup_voltagescaling(void)  {  	union msr_longhaul longhaul;  	struct mV_pos minvid, maxvid, vid; @@ -784,7 +784,7 @@ static int longhaul_setup_southbridge(void)  	return 0;  } -static int __init longhaul_cpu_init(struct cpufreq_policy *policy) +static int __cpuinit longhaul_cpu_init(struct cpufreq_policy *policy)  {  	struct cpuinfo_x86 *c = &cpu_data(0);  	char *cpuname = NULL; diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.h b/arch/x86/kernel/cpu/cpufreq/longhaul.h index e2360a469f79..cbf48fbca881 100644 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.h +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.h @@ -56,7 +56,7 @@ union msr_longhaul {  /*   * VIA C3 Samuel 1  & Samuel 2 (stepping 0)   */ -static const int __initdata samuel1_mults[16] = { +static const int __cpuinitdata samuel1_mults[16] = {  	-1, /* 0000 -> RESERVED */  	30, /* 0001 ->  3.0x */  	40, /* 0010 ->  4.0x */ @@ -75,7 +75,7 @@ static const int __initdata samuel1_mults[16] = {  	-1, /* 1111 -> RESERVED */  }; -static const int __initdata samuel1_eblcr[16] = { +static const int __cpuinitdata samuel1_eblcr[16] = {  	50, /* 0000 -> RESERVED */  	30, /* 0001 ->  3.0x */  	40, /* 0010 ->  4.0x */ @@ -97,7 +97,7 @@ static const int __initdata samuel1_eblcr[16] = {  /*   * VIA C3 Samuel2 Stepping 1->15   */ -static const int __initdata samuel2_eblcr[16] = { +static const int __cpuinitdata samuel2_eblcr[16] = {  	50,  /* 0000 ->  5.0x */  	30,  /* 0001 ->  3.0x */  	40,  /* 0010 ->  4.0x */ @@ -119,7 +119,7 @@ static const int __initdata samuel2_eblcr[16] = {  /*   * VIA C3 Ezra   */ -static const int __initdata ezra_mults[16] = { +static const int __cpuinitdata ezra_mults[16] = {  	100, /* 0000 -> 10.0x */  	30,  /* 0001 ->  3.0x */  	40,  /* 0010 ->  4.0x */ @@ -138,7 +138,7 @@ static const int __initdata ezra_mults[16] = {  	120, /* 1111 -> 12.0x */  }; -static const int __initdata ezra_eblcr[16] = { +static const int __cpuinitdata ezra_eblcr[16] = {  	50,  /* 0000 ->  5.0x */  	30,  /* 0001 ->  3.0x */  	40,  /* 0010 ->  4.0x */ @@ -160,7 +160,7 @@ static const int __initdata ezra_eblcr[16] = {  /*   * VIA C3 (Ezra-T) [C5M].   */ -static const int __initdata ezrat_mults[32] = { +static const int __cpuinitdata ezrat_mults[32] = {  	100, /* 0000 -> 10.0x */  	30,  /* 0001 ->  3.0x */  	40,  /* 0010 ->  4.0x */ @@ -196,7 +196,7 @@ static const int __initdata ezrat_mults[32] = {  	-1,  /* 1111 -> RESERVED (12.0x) */  }; -static const int __initdata ezrat_eblcr[32] = { +static const int __cpuinitdata ezrat_eblcr[32] = {  	50,  /* 0000 ->  5.0x */  	30,  /* 0001 ->  3.0x */  	40,  /* 0010 ->  4.0x */ @@ -235,7 +235,7 @@ static const int __initdata ezrat_eblcr[32] = {  /*   * VIA C3 Nehemiah */ -static const int __initdata  nehemiah_mults[32] = { +static const int __cpuinitdata nehemiah_mults[32] = {  	100, /* 0000 -> 10.0x */  	-1, /* 0001 -> 16.0x */  	40,  /* 0010 ->  4.0x */ @@ -270,7 +270,7 @@ static const int __initdata  nehemiah_mults[32] = {  	-1, /* 1111 -> 12.0x */  }; -static const int __initdata nehemiah_eblcr[32] = { +static const int __cpuinitdata nehemiah_eblcr[32] = {  	50,  /* 0000 ->  5.0x */  	160, /* 0001 -> 16.0x */  	40,  /* 0010 ->  4.0x */ @@ -315,7 +315,7 @@ struct mV_pos {  	unsigned short pos;  }; -static const struct mV_pos __initdata vrm85_mV[32] = { +static const struct mV_pos __cpuinitdata vrm85_mV[32] = {  	{1250, 8},	{1200, 6},	{1150, 4},	{1100, 2},  	{1050, 0},	{1800, 30},	{1750, 28},	{1700, 26},  	{1650, 24},	{1600, 22},	{1550, 20},	{1500, 18}, @@ -326,14 +326,14 @@ static const struct mV_pos __initdata vrm85_mV[32] = {  	{1475, 17},	{1425, 15},	{1375, 13},	{1325, 11}  }; -static const unsigned char __initdata mV_vrm85[32] = { +static const unsigned char __cpuinitdata mV_vrm85[32] = {  	0x04,	0x14,	0x03,	0x13,	0x02,	0x12,	0x01,	0x11,  	0x00,	0x10,	0x0f,	0x1f,	0x0e,	0x1e,	0x0d,	0x1d,  	0x0c,	0x1c,	0x0b,	0x1b,	0x0a,	0x1a,	0x09,	0x19,  	0x08,	0x18,	0x07,	0x17,	0x06,	0x16,	0x05,	0x15  }; -static const struct mV_pos __initdata mobilevrm_mV[32] = { +static const struct mV_pos __cpuinitdata mobilevrm_mV[32] = {  	{1750, 31},	{1700, 30},	{1650, 29},	{1600, 28},  	{1550, 27},	{1500, 26},	{1450, 25},	{1400, 24},  	{1350, 23},	{1300, 22},	{1250, 21},	{1200, 20}, @@ -344,7 +344,7 @@ static const struct mV_pos __initdata mobilevrm_mV[32] = {  	{675, 3},	{650, 2},	{625, 1},	{600, 0}  }; -static const unsigned char __initdata mV_mobilevrm[32] = { +static const unsigned char __cpuinitdata mV_mobilevrm[32] = {  	0x1f,	0x1e,	0x1d,	0x1c,	0x1b,	0x1a,	0x19,	0x18,  	0x17,	0x16,	0x15,	0x14,	0x13,	0x12,	0x11,	0x10,  	0x0f,	0x0e,	0x0d,	0x0c,	0x0b,	0x0a,	0x09,	0x08, diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c index e7b559d74c52..fc09f142d94d 100644 --- a/arch/x86/kernel/cpu/cpufreq/longrun.c +++ b/arch/x86/kernel/cpu/cpufreq/longrun.c @@ -165,8 +165,8 @@ static unsigned int longrun_get(unsigned int cpu)   * TMTA rules:   * performance_pctg = (target_freq - low_freq)/(high_freq - low_freq)   */ -static unsigned int __init longrun_determine_freqs(unsigned int *low_freq, -						   unsigned int *high_freq) +static unsigned int __cpuinit longrun_determine_freqs(unsigned int *low_freq, +						      unsigned int *high_freq)  {  	u32 msr_lo, msr_hi;  	u32 save_lo, save_hi; @@ -258,7 +258,7 @@ static unsigned int __init longrun_determine_freqs(unsigned int *low_freq,  } -static int __init longrun_cpu_init(struct cpufreq_policy *policy) +static int __cpuinit longrun_cpu_init(struct cpufreq_policy *policy)  {  	int result = 0; diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index 7b8a8ba67b07..bd1cac747f67 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c @@ -178,13 +178,8 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)  		}  	} -	if (c->x86 != 0xF) { -		if (!cpu_has(c, X86_FEATURE_EST)) -			printk(KERN_WARNING PFX "Unknown CPU. " -				"Please send an e-mail to " -				"<cpufreq@vger.kernel.org>\n"); +	if (c->x86 != 0xF)  		return 0; -	}  	/* on P-4s, the TSC runs with constant frequency independent whether  	 * throttling is active or not. */ diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c index a36de5bbb622..994230d4dc4e 100644 --- a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c @@ -110,7 +110,7 @@ struct pcc_cpu {  	u32 output_offset;  }; -static struct pcc_cpu *pcc_cpu_info; +static struct pcc_cpu __percpu *pcc_cpu_info;  static int pcc_cpufreq_verify(struct cpufreq_policy *policy)  { diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c index 9a97116f89e5..4a45fd6e41ba 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c @@ -569,7 +569,7 @@ static int powernow_verify(struct cpufreq_policy *policy)   * We will then get the same kind of behaviour already tested under   * the "well-known" other OS.   */ -static int __init fixup_sgtc(void) +static int __cpuinit fixup_sgtc(void)  {  	unsigned int sgtc;  	unsigned int m; @@ -603,7 +603,7 @@ static unsigned int powernow_get(unsigned int cpu)  } -static int __init acer_cpufreq_pst(const struct dmi_system_id *d) +static int __cpuinit acer_cpufreq_pst(const struct dmi_system_id *d)  {  	printk(KERN_WARNING PFX  		"%s laptop with broken PST tables in BIOS detected.\n", @@ -621,7 +621,7 @@ static int __init acer_cpufreq_pst(const struct dmi_system_id *d)   * A BIOS update is all that can save them.   * Mention this, and disable cpufreq.   */ -static struct dmi_system_id __initdata powernow_dmi_table[] = { +static struct dmi_system_id __cpuinitdata powernow_dmi_table[] = {  	{  		.callback = acer_cpufreq_pst,  		.ident = "Acer Aspire", @@ -633,7 +633,7 @@ static struct dmi_system_id __initdata powernow_dmi_table[] = {  	{ }  }; -static int __init powernow_cpu_init(struct cpufreq_policy *policy) +static int __cpuinit powernow_cpu_init(struct cpufreq_policy *policy)  {  	union msr_fidvidstatus fidvidstatus;  	int result; diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 3e90cce3dc8b..491977baf6c0 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -9,7 +9,7 @@   *  Based on the powernow-k7.c module written by Dave Jones.   *  (C) 2003 Dave Jones on behalf of SuSE Labs   *  (C) 2004 Dominik Brodowski <linux@brodo.de> - *  (C) 2004 Pavel Machek <pavel@suse.cz> + *  (C) 2004 Pavel Machek <pavel@ucw.cz>   *  Licensed under the terms of the GNU GPL License version 2.   *  Based upon datasheets & sample CPUs kindly provided by AMD.   * @@ -806,6 +806,8 @@ static int find_psb_table(struct powernow_k8_data *data)  	 * www.amd.com  	 */  	printk(KERN_ERR FW_BUG PFX "No PSB or ACPI _PSS objects\n"); +	printk(KERN_ERR PFX "Make sure that your BIOS is up to date" +		" and Cool'N'Quiet support is enabled in BIOS setup\n");  	return -ENODEV;  } @@ -910,8 +912,8 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data,  {  	int i;  	u32 hi = 0, lo = 0; -	rdmsr(MSR_PSTATE_CUR_LIMIT, hi, lo); -	data->max_hw_pstate = (hi & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT; +	rdmsr(MSR_PSTATE_CUR_LIMIT, lo, hi); +	data->max_hw_pstate = (lo & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT;  	for (i = 0; i < data->acpi_data.state_count; i++) {  		u32 index; diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c index dd531cc56a8f..8095f8611f8a 100644 --- a/arch/x86/kernel/cpu/hypervisor.c +++ b/arch/x86/kernel/cpu/hypervisor.c @@ -34,6 +34,9 @@ static const __initconst struct hypervisor_x86 * const hypervisors[] =  {  	&x86_hyper_vmware,  	&x86_hyper_ms_hyperv, +#ifdef CONFIG_XEN_PVHVM +	&x86_hyper_xen_hvm, +#endif  };  const struct hypervisor_x86 *x86_hyper; diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 33eae2062cf5..898c2f4eab88 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -347,8 +347,8 @@ static struct amd_l3_cache * __cpuinit amd_init_l3_cache(int node)  	return l3;  } -static void __cpuinit -amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) +static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, +					   int index)  {  	int node; @@ -396,20 +396,39 @@ amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf)  	this_leaf->l3 = l3_caches[node];  } +/* + * check whether a slot used for disabling an L3 index is occupied. + * @l3: L3 cache descriptor + * @slot: slot number (0..1) + * + * @returns: the disabled index if used or negative value if slot free. + */ +int amd_get_l3_disable_slot(struct amd_l3_cache *l3, unsigned slot) +{ +	unsigned int reg = 0; + +	pci_read_config_dword(l3->dev, 0x1BC + slot * 4, ®); + +	/* check whether this slot is activated already */ +	if (reg & (3UL << 30)) +		return reg & 0xfff; + +	return -1; +} +  static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,  				  unsigned int slot)  { -	struct pci_dev *dev = this_leaf->l3->dev; -	unsigned int reg = 0; +	int index;  	if (!this_leaf->l3 || !this_leaf->l3->can_disable)  		return -EINVAL; -	if (!dev) -		return -EINVAL; +	index = amd_get_l3_disable_slot(this_leaf->l3, slot); +	if (index >= 0) +		return sprintf(buf, "%d\n", index); -	pci_read_config_dword(dev, 0x1BC + slot * 4, ®); -	return sprintf(buf, "0x%08x\n", reg); +	return sprintf(buf, "FREE\n");  }  #define SHOW_CACHE_DISABLE(slot)					\ @@ -451,37 +470,74 @@ static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu,  	}  } - -static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, -				   const char *buf, size_t count, -				   unsigned int slot) +/* + * disable a L3 cache index by using a disable-slot + * + * @l3:    L3 cache descriptor + * @cpu:   A CPU on the node containing the L3 cache + * @slot:  slot number (0..1) + * @index: index to disable + * + * @return: 0 on success, error status on failure + */ +int amd_set_l3_disable_slot(struct amd_l3_cache *l3, int cpu, unsigned slot, +			    unsigned long index)  { -	struct pci_dev *dev = this_leaf->l3->dev; -	int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); -	unsigned long val = 0; +	int ret = 0;  #define SUBCACHE_MASK	(3UL << 20)  #define SUBCACHE_INDEX	0xfff -	if (!this_leaf->l3 || !this_leaf->l3->can_disable) +	/* +	 * check whether this slot is already used or +	 * the index is already disabled +	 */ +	ret = amd_get_l3_disable_slot(l3, slot); +	if (ret >= 0)  		return -EINVAL; +	/* +	 * check whether the other slot has disabled the +	 * same index already +	 */ +	if (index == amd_get_l3_disable_slot(l3, !slot)) +		return -EINVAL; + +	/* do not allow writes outside of allowed bits */ +	if ((index & ~(SUBCACHE_MASK | SUBCACHE_INDEX)) || +	    ((index & SUBCACHE_INDEX) > l3->indices)) +		return -EINVAL; + +	amd_l3_disable_index(l3, cpu, slot, index); + +	return 0; +} + +static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, +				  const char *buf, size_t count, +				  unsigned int slot) +{ +	unsigned long val = 0; +	int cpu, err = 0; +  	if (!capable(CAP_SYS_ADMIN))  		return -EPERM; -	if (!dev) +	if (!this_leaf->l3 || !this_leaf->l3->can_disable)  		return -EINVAL; -	if (strict_strtoul(buf, 10, &val) < 0) -		return -EINVAL; +	cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); -	/* do not allow writes outside of allowed bits */ -	if ((val & ~(SUBCACHE_MASK | SUBCACHE_INDEX)) || -	    ((val & SUBCACHE_INDEX) > this_leaf->l3->indices)) +	if (strict_strtoul(buf, 10, &val) < 0)  		return -EINVAL; -	amd_l3_disable_index(this_leaf->l3, cpu, slot, val); - +	err = amd_set_l3_disable_slot(this_leaf->l3, cpu, slot, val); +	if (err) { +		if (err == -EEXIST) +			printk(KERN_WARNING "L3 disable slot %d in use!\n", +					    slot); +		return err; +	}  	return count;  } @@ -502,7 +558,7 @@ static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,  #else	/* CONFIG_CPU_SUP_AMD */  static void __cpuinit -amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) +amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, int index)  {  };  #endif /* CONFIG_CPU_SUP_AMD */ @@ -518,7 +574,7 @@ __cpuinit cpuid4_cache_lookup_regs(int index,  	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {  		amd_cpuid4(index, &eax, &ebx, &ecx); -		amd_check_l3_disable(index, this_leaf); +		amd_check_l3_disable(this_leaf, index);  	} else {  		cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);  	} diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c index 745b54f9be89..8209472b27a5 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-apei.c +++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c @@ -80,7 +80,7 @@ int apei_write_mce(struct mce *m)  	rcd.hdr.revision = CPER_RECORD_REV;  	rcd.hdr.signature_end = CPER_SIG_END;  	rcd.hdr.section_count = 1; -	rcd.hdr.error_severity = CPER_SER_FATAL; +	rcd.hdr.error_severity = CPER_SEV_FATAL;  	/* timestamp, platform_id, partition_id are all invalid */  	rcd.hdr.validation_bits = 0;  	rcd.hdr.record_length = sizeof(rcd); @@ -96,7 +96,7 @@ int apei_write_mce(struct mce *m)  	rcd.sec_hdr.validation_bits = 0;  	rcd.sec_hdr.flags = CPER_SEC_PRIMARY;  	rcd.sec_hdr.section_type = CPER_SECTION_TYPE_MCE; -	rcd.sec_hdr.section_severity = CPER_SER_FATAL; +	rcd.sec_hdr.section_severity = CPER_SEV_FATAL;  	memcpy(&rcd.mce, m, sizeof(*m)); diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 18cc42562250..ed41562909fe 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -51,7 +51,7 @@  static DEFINE_MUTEX(mce_read_mutex);  #define rcu_dereference_check_mce(p) \ -	rcu_dereference_check((p), \ +	rcu_dereference_index_check((p), \  			      rcu_read_lock_sched_held() || \  			      lockdep_is_held(&mce_read_mutex)) @@ -107,8 +107,8 @@ EXPORT_SYMBOL_GPL(x86_mce_decoder_chain);  static int default_decode_mce(struct notifier_block *nb, unsigned long val,  			       void *data)  { -	pr_emerg("No human readable MCE decoding support on this CPU type.\n"); -	pr_emerg("Run the message through 'mcelog --ascii' to decode.\n"); +	pr_emerg(HW_ERR "No human readable MCE decoding support on this CPU type.\n"); +	pr_emerg(HW_ERR "Run the message through 'mcelog --ascii' to decode.\n");  	return NOTIFY_STOP;  } @@ -211,11 +211,11 @@ void mce_log(struct mce *mce)  static void print_mce(struct mce *m)  { -	pr_emerg("CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", +	pr_emerg(HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n",  	       m->extcpu, m->mcgstatus, m->bank, m->status);  	if (m->ip) { -		pr_emerg("RIP%s %02x:<%016Lx> ", +		pr_emerg(HW_ERR "RIP%s %02x:<%016Lx> ",  			!(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",  				m->cs, m->ip); @@ -224,14 +224,14 @@ static void print_mce(struct mce *m)  		pr_cont("\n");  	} -	pr_emerg("TSC %llx ", m->tsc); +	pr_emerg(HW_ERR "TSC %llx ", m->tsc);  	if (m->addr)  		pr_cont("ADDR %llx ", m->addr);  	if (m->misc)  		pr_cont("MISC %llx ", m->misc);  	pr_cont("\n"); -	pr_emerg("PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", +	pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",  		m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid);  	/* @@ -241,16 +241,6 @@ static void print_mce(struct mce *m)  	atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m);  } -static void print_mce_head(void) -{ -	pr_emerg("\nHARDWARE ERROR\n"); -} - -static void print_mce_tail(void) -{ -	pr_emerg("This is not a software problem!\n"); -} -  #define PANIC_TIMEOUT 5 /* 5 seconds */  static atomic_t mce_paniced; @@ -291,7 +281,6 @@ static void mce_panic(char *msg, struct mce *final, char *exp)  		if (atomic_inc_return(&mce_fake_paniced) > 1)  			return;  	} -	print_mce_head();  	/* First print corrected ones that are still unlogged */  	for (i = 0; i < MCE_LOG_LEN; i++) {  		struct mce *m = &mcelog.entry[i]; @@ -322,16 +311,15 @@ static void mce_panic(char *msg, struct mce *final, char *exp)  			apei_err = apei_write_mce(final);  	}  	if (cpu_missing) -		printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n"); -	print_mce_tail(); +		pr_emerg(HW_ERR "Some CPUs didn't answer in synchronization\n");  	if (exp) -		printk(KERN_EMERG "Machine check: %s\n", exp); +		pr_emerg(HW_ERR "Machine check: %s\n", exp);  	if (!fake_panic) {  		if (panic_timeout == 0)  			panic_timeout = mce_panic_timeout;  		panic(msg);  	} else -		printk(KERN_EMERG "Fake kernel panic: %s\n", msg); +		pr_emerg(HW_ERR "Fake kernel panic: %s\n", msg);  }  /* Support code for software error injection */ @@ -600,6 +588,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)  		 */  		if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) {  			mce_log(&m); +			atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, &m);  			add_taint(TAINT_MACHINE_CHECK);  		} @@ -1220,7 +1209,7 @@ int mce_notify_irq(void)  			schedule_work(&mce_trigger_work);  		if (__ratelimit(&ratelimit)) -			printk(KERN_INFO "Machine check events logged\n"); +			pr_info(HW_ERR "Machine check events logged\n");  		return 1;  	} diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 62b48e40920a..6fcd0936194f 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -95,19 +95,20 @@ static void cmci_discover(int banks, int boot)  		rdmsrl(MSR_IA32_MCx_CTL2(i), val);  		/* Already owned by someone else? */ -		if (val & CMCI_EN) { +		if (val & MCI_CTL2_CMCI_EN) {  			if (test_and_clear_bit(i, owned) && !boot)  				print_update("SHD", &hdr, i);  			__clear_bit(i, __get_cpu_var(mce_poll_banks));  			continue;  		} -		val |= CMCI_EN | CMCI_THRESHOLD; +		val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; +		val |= MCI_CTL2_CMCI_EN | CMCI_THRESHOLD;  		wrmsrl(MSR_IA32_MCx_CTL2(i), val);  		rdmsrl(MSR_IA32_MCx_CTL2(i), val);  		/* Did the enable bit stick? -- the bank supports CMCI */ -		if (val & CMCI_EN) { +		if (val & MCI_CTL2_CMCI_EN) {  			if (!test_and_set_bit(i, owned) && !boot)  				print_update("CMCI", &hdr, i);  			__clear_bit(i, __get_cpu_var(mce_poll_banks)); @@ -155,7 +156,7 @@ void cmci_clear(void)  			continue;  		/* Disable CMCI */  		rdmsrl(MSR_IA32_MCx_CTL2(i), val); -		val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK); +		val &= ~(MCI_CTL2_CMCI_EN|MCI_CTL2_CMCI_THRESHOLD_MASK);  		wrmsrl(MSR_IA32_MCx_CTL2(i), val);  		__clear_bit(i, __get_cpu_var(mce_banks_owned));  	} diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index e1a0a3bf9716..c2a8b26d4fea 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -34,15 +34,25 @@  /* How long to wait between reporting thermal events */  #define CHECK_INTERVAL		(300 * HZ) +#define THERMAL_THROTTLING_EVENT	0 +#define POWER_LIMIT_EVENT		1 +  /* - * Current thermal throttling state: + * Current thermal event state:   */ -struct thermal_state { -	bool			is_throttled; - +struct _thermal_state { +	bool			new_event; +	int			event;  	u64			next_check; -	unsigned long		throttle_count; -	unsigned long		last_throttle_count; +	unsigned long		count; +	unsigned long		last_count; +}; + +struct thermal_state { +	struct _thermal_state core_throttle; +	struct _thermal_state core_power_limit; +	struct _thermal_state package_throttle; +	struct _thermal_state package_power_limit;  };  static DEFINE_PER_CPU(struct thermal_state, thermal_state); @@ -53,11 +63,13 @@ static u32 lvtthmr_init __read_mostly;  #ifdef CONFIG_SYSFS  #define define_therm_throt_sysdev_one_ro(_name)				\ -	static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) +	static SYSDEV_ATTR(_name, 0444,					\ +			   therm_throt_sysdev_show_##_name,		\ +				   NULL)				\ -#define define_therm_throt_sysdev_show_func(name)			\ +#define define_therm_throt_sysdev_show_func(event, name)		\  									\ -static ssize_t therm_throt_sysdev_show_##name(				\ +static ssize_t therm_throt_sysdev_show_##event##_##name(		\  			struct sys_device *dev,				\  			struct sysdev_attribute *attr,			\  			char *buf)					\ @@ -66,30 +78,42 @@ static ssize_t therm_throt_sysdev_show_##name(				\  	ssize_t ret;							\  									\  	preempt_disable();	/* CPU hotplug */			\ -	if (cpu_online(cpu))						\ +	if (cpu_online(cpu)) {						\  		ret = sprintf(buf, "%lu\n",				\ -			      per_cpu(thermal_state, cpu).name);	\ -	else								\ +			      per_cpu(thermal_state, cpu).event.name);	\ +	} else								\  		ret = 0;						\  	preempt_enable();						\  									\  	return ret;							\  } -define_therm_throt_sysdev_show_func(throttle_count); -define_therm_throt_sysdev_one_ro(throttle_count); +define_therm_throt_sysdev_show_func(core_throttle, count); +define_therm_throt_sysdev_one_ro(core_throttle_count); + +define_therm_throt_sysdev_show_func(core_power_limit, count); +define_therm_throt_sysdev_one_ro(core_power_limit_count); + +define_therm_throt_sysdev_show_func(package_throttle, count); +define_therm_throt_sysdev_one_ro(package_throttle_count); + +define_therm_throt_sysdev_show_func(package_power_limit, count); +define_therm_throt_sysdev_one_ro(package_power_limit_count);  static struct attribute *thermal_throttle_attrs[] = { -	&attr_throttle_count.attr, +	&attr_core_throttle_count.attr,  	NULL  }; -static struct attribute_group thermal_throttle_attr_group = { +static struct attribute_group thermal_attr_group = {  	.attrs	= thermal_throttle_attrs,  	.name	= "thermal_throttle"  };  #endif /* CONFIG_SYSFS */ +#define CORE_LEVEL	0 +#define PACKAGE_LEVEL	1 +  /***   * therm_throt_process - Process thermal throttling event from interrupt   * @curr: Whether the condition is current or not (boolean), since the @@ -106,39 +130,70 @@ static struct attribute_group thermal_throttle_attr_group = {   *          1 : Event should be logged further, and a message has been   *              printed to the syslog.   */ -static int therm_throt_process(bool is_throttled) +static int therm_throt_process(bool new_event, int event, int level)  { -	struct thermal_state *state; -	unsigned int this_cpu; -	bool was_throttled; +	struct _thermal_state *state; +	unsigned int this_cpu = smp_processor_id(); +	bool old_event;  	u64 now; +	struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu); -	this_cpu = smp_processor_id();  	now = get_jiffies_64(); -	state = &per_cpu(thermal_state, this_cpu); +	if (level == CORE_LEVEL) { +		if (event == THERMAL_THROTTLING_EVENT) +			state = &pstate->core_throttle; +		else if (event == POWER_LIMIT_EVENT) +			state = &pstate->core_power_limit; +		else +			 return 0; +	} else if (level == PACKAGE_LEVEL) { +		if (event == THERMAL_THROTTLING_EVENT) +			state = &pstate->package_throttle; +		else if (event == POWER_LIMIT_EVENT) +			state = &pstate->package_power_limit; +		else +			return 0; +	} else +		return 0; -	was_throttled = state->is_throttled; -	state->is_throttled = is_throttled; +	old_event = state->new_event; +	state->new_event = new_event; -	if (is_throttled) -		state->throttle_count++; +	if (new_event) +		state->count++;  	if (time_before64(now, state->next_check) && -			state->throttle_count != state->last_throttle_count) +			state->count != state->last_count)  		return 0;  	state->next_check = now + CHECK_INTERVAL; -	state->last_throttle_count = state->throttle_count; +	state->last_count = state->count;  	/* if we just entered the thermal event */ -	if (is_throttled) { -		printk(KERN_CRIT "CPU%d: Temperature above threshold, cpu clock throttled (total events = %lu)\n", this_cpu, state->throttle_count); +	if (new_event) { +		if (event == THERMAL_THROTTLING_EVENT) +			printk(KERN_CRIT "CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n", +				this_cpu, +				level == CORE_LEVEL ? "Core" : "Package", +				state->count); +		else +			printk(KERN_CRIT "CPU%d: %s power limit notification (total events = %lu)\n", +				this_cpu, +				level == CORE_LEVEL ? "Core" : "Package", +				state->count);  		add_taint(TAINT_MACHINE_CHECK);  		return 1;  	} -	if (was_throttled) { -		printk(KERN_INFO "CPU%d: Temperature/speed normal\n", this_cpu); +	if (old_event) { +		if (event == THERMAL_THROTTLING_EVENT) +			printk(KERN_INFO "CPU%d: %s temperature/speed normal\n", +				this_cpu, +				level == CORE_LEVEL ? "Core" : "Package"); +		else +			printk(KERN_INFO "CPU%d: %s power limit normal\n", +				this_cpu, +				level == CORE_LEVEL ? "Core" : "Package");  		return 1;  	} @@ -149,13 +204,32 @@ static int therm_throt_process(bool is_throttled)  /* Add/Remove thermal_throttle interface for CPU device: */  static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev)  { -	return sysfs_create_group(&sys_dev->kobj, -				  &thermal_throttle_attr_group); +	int err; +	struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); + +	err = sysfs_create_group(&sys_dev->kobj, &thermal_attr_group); +	if (err) +		return err; + +	if (cpu_has(c, X86_FEATURE_PLN)) +		err = sysfs_add_file_to_group(&sys_dev->kobj, +					      &attr_core_power_limit_count.attr, +					      thermal_attr_group.name); +	if (cpu_has(c, X86_FEATURE_PTS)) +		err = sysfs_add_file_to_group(&sys_dev->kobj, +					      &attr_package_throttle_count.attr, +					      thermal_attr_group.name); +		if (cpu_has(c, X86_FEATURE_PLN)) +			err = sysfs_add_file_to_group(&sys_dev->kobj, +					&attr_package_power_limit_count.attr, +					thermal_attr_group.name); + +	return err;  }  static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev)  { -	sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group); +	sysfs_remove_group(&sys_dev->kobj, &thermal_attr_group);  }  /* Mutex protecting device creation against CPU hotplug: */ @@ -226,14 +300,50 @@ device_initcall(thermal_throttle_init_device);  #endif /* CONFIG_SYSFS */ +/* + * Set up the most two significant bit to notify mce log that this thermal + * event type. + * This is a temp solution. May be changed in the future with mce log + * infrasture. + */ +#define CORE_THROTTLED		(0) +#define CORE_POWER_LIMIT	((__u64)1 << 62) +#define PACKAGE_THROTTLED	((__u64)2 << 62) +#define PACKAGE_POWER_LIMIT	((__u64)3 << 62) +  /* Thermal transition interrupt handler */  static void intel_thermal_interrupt(void)  {  	__u64 msr_val; +	struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());  	rdmsrl(MSR_IA32_THERM_STATUS, msr_val); -	if (therm_throt_process((msr_val & THERM_STATUS_PROCHOT) != 0)) -		mce_log_therm_throt_event(msr_val); + +	if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT, +				THERMAL_THROTTLING_EVENT, +				CORE_LEVEL) != 0) +		mce_log_therm_throt_event(CORE_THROTTLED | msr_val); + +	if (cpu_has(c, X86_FEATURE_PLN)) +		if (therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT, +					POWER_LIMIT_EVENT, +					CORE_LEVEL) != 0) +			mce_log_therm_throt_event(CORE_POWER_LIMIT | msr_val); + +	if (cpu_has(c, X86_FEATURE_PTS)) { +		rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val); +		if (therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT, +					THERMAL_THROTTLING_EVENT, +					PACKAGE_LEVEL) != 0) +			mce_log_therm_throt_event(PACKAGE_THROTTLED | msr_val); +		if (cpu_has(c, X86_FEATURE_PLN)) +			if (therm_throt_process(msr_val & +					PACKAGE_THERM_STATUS_POWER_LIMIT, +					POWER_LIMIT_EVENT, +					PACKAGE_LEVEL) != 0) +				mce_log_therm_throt_event(PACKAGE_POWER_LIMIT +							  | msr_val); +	}  }  static void unexpected_thermal_interrupt(void) @@ -335,8 +445,26 @@ void intel_init_thermal(struct cpuinfo_x86 *c)  	apic_write(APIC_LVTTHMR, h);  	rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); -	wrmsr(MSR_IA32_THERM_INTERRUPT, -		l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h); +	if (cpu_has(c, X86_FEATURE_PLN)) +		wrmsr(MSR_IA32_THERM_INTERRUPT, +		      l | (THERM_INT_LOW_ENABLE +			| THERM_INT_HIGH_ENABLE | THERM_INT_PLN_ENABLE), h); +	else +		wrmsr(MSR_IA32_THERM_INTERRUPT, +		      l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h); + +	if (cpu_has(c, X86_FEATURE_PTS)) { +		rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); +		if (cpu_has(c, X86_FEATURE_PLN)) +			wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, +			      l | (PACKAGE_THERM_INT_LOW_ENABLE +				| PACKAGE_THERM_INT_HIGH_ENABLE +				| PACKAGE_THERM_INT_PLN_ENABLE), h); +		else +			wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, +			      l | (PACKAGE_THERM_INT_LOW_ENABLE +				| PACKAGE_THERM_INT_HIGH_ENABLE), h); +	}  	smp_thermal_vector = intel_thermal_interrupt; diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 16f41bbe46b6..d944bf6c50e9 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -18,6 +18,7 @@  #include <asm/mshyperv.h>  struct ms_hyperv_info ms_hyperv; +EXPORT_SYMBOL_GPL(ms_hyperv);  static bool __init ms_hyperv_platform(void)  { diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index 06130b52f012..c5f59d071425 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c @@ -632,9 +632,9 @@ static void __init mtrr_print_out_one_result(int i)  	unsigned long gran_base, chunk_base, lose_base;  	char gran_factor, chunk_factor, lose_factor; -	gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), -	chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), -	lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), +	gran_base = to_size_factor(result[i].gran_sizek, &gran_factor); +	chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor); +	lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor);  	pr_info("%sgran_size: %ld%c \tchunk_size: %ld%c \t",  		result[i].bad ? "*BAD*" : " ", diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index fd31a441c61c..7d28d7d03885 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -433,13 +433,12 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,  {  	unsigned int mask_lo, mask_hi, base_lo, base_hi;  	unsigned int tmp, hi; -	int cpu;  	/*  	 * get_mtrr doesn't need to update mtrr_state, also it could be called  	 * from any cpu, so try to print it out directly.  	 */ -	cpu = get_cpu(); +	get_cpu();  	rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi); diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 79556bd9b602..01c0f3ee6cc3 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -35,6 +35,7 @@  #include <linux/types.h> /* FIXME: kvm_para.h needs this */ +#include <linux/stop_machine.h>  #include <linux/kvm_para.h>  #include <linux/uaccess.h>  #include <linux/module.h> @@ -143,22 +144,28 @@ struct set_mtrr_data {  	mtrr_type	smp_type;  }; +static DEFINE_PER_CPU(struct cpu_stop_work, mtrr_work); +  /** - * ipi_handler - Synchronisation handler. Executed by "other" CPUs. + * mtrr_work_handler - Synchronisation handler. Executed by "other" CPUs.   * @info: pointer to mtrr configuration data   *   * Returns nothing.   */ -static void ipi_handler(void *info) +static int mtrr_work_handler(void *info)  {  #ifdef CONFIG_SMP  	struct set_mtrr_data *data = info;  	unsigned long flags; +	atomic_dec(&data->count); +	while (!atomic_read(&data->gate)) +		cpu_relax(); +  	local_irq_save(flags);  	atomic_dec(&data->count); -	while (!atomic_read(&data->gate)) +	while (atomic_read(&data->gate))  		cpu_relax();  	/*  The master has cleared me to execute  */ @@ -173,12 +180,13 @@ static void ipi_handler(void *info)  	}  	atomic_dec(&data->count); -	while (atomic_read(&data->gate)) +	while (!atomic_read(&data->gate))  		cpu_relax();  	atomic_dec(&data->count);  	local_irq_restore(flags);  #endif +	return 0;  }  static inline int types_compatible(mtrr_type type1, mtrr_type type2) @@ -198,7 +206,7 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2)   *   * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly:   * - * 1. Send IPI to do the following: + * 1. Queue work to do the following on all processors:   * 2. Disable Interrupts   * 3. Wait for all procs to do so   * 4. Enter no-fill cache mode @@ -215,14 +223,17 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2)   * 15. Enable interrupts.   *   * What does that mean for us? Well, first we set data.count to the number - * of CPUs. As each CPU disables interrupts, it'll decrement it once. We wait - * until it hits 0 and proceed. We set the data.gate flag and reset data.count. - * Meanwhile, they are waiting for that flag to be set. Once it's set, each + * of CPUs. As each CPU announces that it started the rendezvous handler by + * decrementing the count, We reset data.count and set the data.gate flag + * allowing all the cpu's to proceed with the work. As each cpu disables + * interrupts, it'll decrement data.count once. We wait until it hits 0 and + * proceed. We clear the data.gate flag and reset data.count. Meanwhile, they + * are waiting for that flag to be cleared. Once it's cleared, each   * CPU goes through the transition of updating MTRRs.   * The CPU vendors may each do it differently,   * so we call mtrr_if->set() callback and let them take care of it.   * When they're done, they again decrement data->count and wait for data.gate - * to be reset. + * to be set.   * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag   * Everyone then enables interrupts and we all continue on.   * @@ -234,6 +245,9 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ  {  	struct set_mtrr_data data;  	unsigned long flags; +	int cpu; + +	preempt_disable();  	data.smp_reg = reg;  	data.smp_base = base; @@ -246,10 +260,15 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ  	atomic_set(&data.gate, 0);  	/* Start the ball rolling on other CPUs */ -	if (smp_call_function(ipi_handler, &data, 0) != 0) -		panic("mtrr: timed out waiting for other CPUs\n"); +	for_each_online_cpu(cpu) { +		struct cpu_stop_work *work = &per_cpu(mtrr_work, cpu); + +		if (cpu == smp_processor_id()) +			continue; + +		stop_one_cpu_nowait(cpu, mtrr_work_handler, &data, work); +	} -	local_irq_save(flags);  	while (atomic_read(&data.count))  		cpu_relax(); @@ -259,6 +278,16 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ  	smp_wmb();  	atomic_set(&data.gate, 1); +	local_irq_save(flags); + +	while (atomic_read(&data.count)) +		cpu_relax(); + +	/* Ok, reset count and toggle gate */ +	atomic_set(&data.count, num_booting_cpus() - 1); +	smp_wmb(); +	atomic_set(&data.gate, 0); +  	/* Do our MTRR business */  	/* @@ -279,7 +308,7 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ  	atomic_set(&data.count, num_booting_cpus() - 1);  	smp_wmb(); -	atomic_set(&data.gate, 0); +	atomic_set(&data.gate, 1);  	/*  	 * Wait here for everyone to have seen the gate change @@ -289,6 +318,7 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ  		cpu_relax();  	local_irq_restore(flags); +	preempt_enable();  }  /** diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 5db5b7d65a18..f2da20fda02d 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -220,6 +220,7 @@ struct x86_pmu {  						 struct perf_event *event);  	struct event_constraint *event_constraints;  	void		(*quirks)(void); +	int		perfctr_second_write;  	int		(*cpu_prepare)(int cpu);  	void		(*cpu_starting)(int cpu); @@ -295,10 +296,10 @@ x86_perf_event_update(struct perf_event *event)  	 * count to the generic event atomically:  	 */  again: -	prev_raw_count = atomic64_read(&hwc->prev_count); +	prev_raw_count = local64_read(&hwc->prev_count);  	rdmsrl(hwc->event_base + idx, new_raw_count); -	if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, +	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,  					new_raw_count) != prev_raw_count)  		goto again; @@ -313,8 +314,8 @@ again:  	delta = (new_raw_count << shift) - (prev_raw_count << shift);  	delta >>= shift; -	atomic64_add(delta, &event->count); -	atomic64_sub(delta, &hwc->period_left); +	local64_add(delta, &event->count); +	local64_sub(delta, &hwc->period_left);  	return new_raw_count;  } @@ -438,7 +439,7 @@ static int x86_setup_perfctr(struct perf_event *event)  	if (!hwc->sample_period) {  		hwc->sample_period = x86_pmu.max_period;  		hwc->last_period = hwc->sample_period; -		atomic64_set(&hwc->period_left, hwc->sample_period); +		local64_set(&hwc->period_left, hwc->sample_period);  	} else {  		/*  		 * If we have a PMU initialized but no APIC @@ -885,7 +886,7 @@ static int  x86_perf_event_set_period(struct perf_event *event)  {  	struct hw_perf_event *hwc = &event->hw; -	s64 left = atomic64_read(&hwc->period_left); +	s64 left = local64_read(&hwc->period_left);  	s64 period = hwc->sample_period;  	int ret = 0, idx = hwc->idx; @@ -897,14 +898,14 @@ x86_perf_event_set_period(struct perf_event *event)  	 */  	if (unlikely(left <= -period)) {  		left = period; -		atomic64_set(&hwc->period_left, left); +		local64_set(&hwc->period_left, left);  		hwc->last_period = period;  		ret = 1;  	}  	if (unlikely(left <= 0)) {  		left += period; -		atomic64_set(&hwc->period_left, left); +		local64_set(&hwc->period_left, left);  		hwc->last_period = period;  		ret = 1;  	} @@ -923,10 +924,19 @@ x86_perf_event_set_period(struct perf_event *event)  	 * The hw event starts counting from this event offset,  	 * mark it to be able to extra future deltas:  	 */ -	atomic64_set(&hwc->prev_count, (u64)-left); +	local64_set(&hwc->prev_count, (u64)-left); -	wrmsrl(hwc->event_base + idx, +	wrmsrl(hwc->event_base + idx, (u64)(-left) & x86_pmu.cntval_mask); + +	/* +	 * Due to erratum on certan cpu we need +	 * a second write to be sure the register +	 * is updated properly +	 */ +	if (x86_pmu.perfctr_second_write) { +		wrmsrl(hwc->event_base + idx,  			(u64)(-left) & x86_pmu.cntval_mask); +	}  	perf_event_update_userpage(event); @@ -969,7 +979,7 @@ static int x86_pmu_enable(struct perf_event *event)  	 * skip the schedulability test here, it will be peformed  	 * at commit time(->commit_txn) as a whole  	 */ -	if (cpuc->group_flag & PERF_EVENT_TXN_STARTED) +	if (cpuc->group_flag & PERF_EVENT_TXN)  		goto out;  	ret = x86_pmu.schedule_events(cpuc, n, assign); @@ -1096,7 +1106,7 @@ static void x86_pmu_disable(struct perf_event *event)  	 * The events never got scheduled and ->cancel_txn will truncate  	 * the event_list.  	 */ -	if (cpuc->group_flag & PERF_EVENT_TXN_STARTED) +	if (cpuc->group_flag & PERF_EVENT_TXN)  		return;  	x86_pmu_stop(event); @@ -1388,7 +1398,7 @@ static void x86_pmu_start_txn(const struct pmu *pmu)  {  	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); -	cpuc->group_flag |= PERF_EVENT_TXN_STARTED; +	cpuc->group_flag |= PERF_EVENT_TXN;  	cpuc->n_txn = 0;  } @@ -1401,7 +1411,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu)  {  	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); -	cpuc->group_flag &= ~PERF_EVENT_TXN_STARTED; +	cpuc->group_flag &= ~PERF_EVENT_TXN;  	/*  	 * Truncate the collected events.  	 */ @@ -1435,11 +1445,7 @@ static int x86_pmu_commit_txn(const struct pmu *pmu)  	 */  	memcpy(cpuc->assign, assign, n*sizeof(int)); -	/* -	 * Clear out the txn count so that ->cancel_txn() which gets -	 * run after ->commit_txn() doesn't undo things. -	 */ -	cpuc->n_txn = 0; +	cpuc->group_flag &= ~PERF_EVENT_TXN;  	return 0;  } @@ -1607,8 +1613,6 @@ static const struct stacktrace_ops backtrace_ops = {  	.walk_stack		= print_context_stack_bp,  }; -#include "../dumpstack.h" -  static void  perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)  { @@ -1730,22 +1734,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)  	return entry;  } -void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip) -{ -	regs->ip = ip; -	/* -	 * perf_arch_fetch_caller_regs adds another call, we need to increment -	 * the skip level -	 */ -	regs->bp = rewind_frame_pointer(skip + 1); -	regs->cs = __KERNEL_CS; -	/* -	 * We abuse bit 3 to pass exact information, see perf_misc_flags -	 * and the comment with PERF_EFLAGS_EXACT. -	 */ -	regs->flags = 0; -} -  unsigned long perf_instruction_pointer(struct pt_regs *regs)  {  	unsigned long ip; diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index ae85d69644d1..febb12cea795 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -21,22 +21,36 @@ struct p4_event_bind {  	char cntr[2][P4_CNTR_LIMIT];		/* counter index (offset), -1 on abscence */  }; -struct p4_cache_event_bind { +struct p4_pebs_bind {  	unsigned int metric_pebs;  	unsigned int metric_vert;  }; -#define P4_GEN_CACHE_EVENT_BIND(name)		\ -	[P4_CACHE__##name] = {			\ -		.metric_pebs = P4_PEBS__##name,	\ -		.metric_vert = P4_VERT__##name,	\ +/* it sets P4_PEBS_ENABLE_UOP_TAG as well */ +#define P4_GEN_PEBS_BIND(name, pebs, vert)			\ +	[P4_PEBS_METRIC__##name] = {				\ +		.metric_pebs = pebs | P4_PEBS_ENABLE_UOP_TAG,	\ +		.metric_vert = vert,				\  	} -static struct p4_cache_event_bind p4_cache_event_bind_map[] = { -	P4_GEN_CACHE_EVENT_BIND(1stl_cache_load_miss_retired), -	P4_GEN_CACHE_EVENT_BIND(2ndl_cache_load_miss_retired), -	P4_GEN_CACHE_EVENT_BIND(dtlb_load_miss_retired), -	P4_GEN_CACHE_EVENT_BIND(dtlb_store_miss_retired), +/* + * note we have P4_PEBS_ENABLE_UOP_TAG always set here + * + * it's needed for mapping P4_PEBS_CONFIG_METRIC_MASK bits of + * event configuration to find out which values are to be + * written into MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT + * resgisters + */ +static struct p4_pebs_bind p4_pebs_bind_map[] = { +	P4_GEN_PEBS_BIND(1stl_cache_load_miss_retired,	0x0000001, 0x0000001), +	P4_GEN_PEBS_BIND(2ndl_cache_load_miss_retired,	0x0000002, 0x0000001), +	P4_GEN_PEBS_BIND(dtlb_load_miss_retired,	0x0000004, 0x0000001), +	P4_GEN_PEBS_BIND(dtlb_store_miss_retired,	0x0000004, 0x0000002), +	P4_GEN_PEBS_BIND(dtlb_all_miss_retired,		0x0000004, 0x0000003), +	P4_GEN_PEBS_BIND(tagged_mispred_branch,		0x0018000, 0x0000010), +	P4_GEN_PEBS_BIND(mob_load_replay_retired,	0x0000200, 0x0000001), +	P4_GEN_PEBS_BIND(split_load_retired,		0x0000400, 0x0000001), +	P4_GEN_PEBS_BIND(split_store_retired,		0x0000400, 0x0000002),  };  /* @@ -281,10 +295,10 @@ static struct p4_event_bind p4_event_bind_map[] = {  	},  }; -#define P4_GEN_CACHE_EVENT(event, bit, cache_event)			  \ +#define P4_GEN_CACHE_EVENT(event, bit, metric)				  \  	p4_config_pack_escr(P4_ESCR_EVENT(event)			| \  			    P4_ESCR_EMASK_BIT(event, bit))		| \ -	p4_config_pack_cccr(cache_event					| \ +	p4_config_pack_cccr(metric					| \  			    P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event))))  static __initconst const u64 p4_hw_cache_event_ids @@ -296,34 +310,34 @@ static __initconst const u64 p4_hw_cache_event_ids  	[ C(OP_READ) ] = {  		[ C(RESULT_ACCESS) ] = 0x0,  		[ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, -						P4_CACHE__1stl_cache_load_miss_retired), +						P4_PEBS_METRIC__1stl_cache_load_miss_retired),  	},   },   [ C(LL  ) ] = {  	[ C(OP_READ) ] = {  		[ C(RESULT_ACCESS) ] = 0x0,  		[ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, -						P4_CACHE__2ndl_cache_load_miss_retired), +						P4_PEBS_METRIC__2ndl_cache_load_miss_retired),  	},  },   [ C(DTLB) ] = {  	[ C(OP_READ) ] = {  		[ C(RESULT_ACCESS) ] = 0x0,  		[ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, -						P4_CACHE__dtlb_load_miss_retired), +						P4_PEBS_METRIC__dtlb_load_miss_retired),  	},  	[ C(OP_WRITE) ] = {  		[ C(RESULT_ACCESS) ] = 0x0,  		[ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, -						P4_CACHE__dtlb_store_miss_retired), +						P4_PEBS_METRIC__dtlb_store_miss_retired),  	},   },   [ C(ITLB) ] = {  	[ C(OP_READ) ] = {  		[ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT, -						P4_CACHE__itlb_reference_hit), +						P4_PEBS_METRIC__none),  		[ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS, -						P4_CACHE__itlb_reference_miss), +						P4_PEBS_METRIC__none),  	},  	[ C(OP_WRITE) ] = {  		[ C(RESULT_ACCESS) ] = -1, @@ -414,11 +428,37 @@ static u64 p4_pmu_event_map(int hw_event)  	return config;  } +static int p4_validate_raw_event(struct perf_event *event) +{ +	unsigned int v; + +	/* user data may have out-of-bound event index */ +	v = p4_config_unpack_event(event->attr.config); +	if (v >= ARRAY_SIZE(p4_event_bind_map)) { +		pr_warning("P4 PMU: Unknown event code: %d\n", v); +		return -EINVAL; +	} + +	/* +	 * it may have some screwed PEBS bits +	 */ +	if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) { +		pr_warning("P4 PMU: PEBS are not supported yet\n"); +		return -EINVAL; +	} +	v = p4_config_unpack_metric(event->attr.config); +	if (v >= ARRAY_SIZE(p4_pebs_bind_map)) { +		pr_warning("P4 PMU: Unknown metric code: %d\n", v); +		return -EINVAL; +	} + +	return 0; +} +  static int p4_hw_config(struct perf_event *event)  {  	int cpu = get_cpu();  	int rc = 0; -	unsigned int evnt;  	u32 escr, cccr;  	/* @@ -438,12 +478,9 @@ static int p4_hw_config(struct perf_event *event)  	if (event->attr.type == PERF_TYPE_RAW) { -		/* user data may have out-of-bound event index */ -		evnt = p4_config_unpack_event(event->attr.config); -		if (evnt >= ARRAY_SIZE(p4_event_bind_map)) { -			rc = -EINVAL; +		rc = p4_validate_raw_event(event); +		if (rc)  			goto out; -		}  		/*  		 * We don't control raw events so it's up to the caller @@ -451,12 +488,15 @@ static int p4_hw_config(struct perf_event *event)  		 * on HT machine but allow HT-compatible specifics to be  		 * passed on)  		 * +		 * Note that for RAW events we allow user to use P4_CCCR_RESERVED +		 * bits since we keep additional info here (for cache events and etc) +		 *  		 * XXX: HT wide things should check perf_paranoid_cpu() &&  		 *      CAP_SYS_ADMIN  		 */  		event->hw.config |= event->attr.config &  			(p4_config_pack_escr(P4_ESCR_MASK_HT) | -			 p4_config_pack_cccr(P4_CCCR_MASK_HT)); +			 p4_config_pack_cccr(P4_CCCR_MASK_HT | P4_CCCR_RESERVED));  	}  	rc = x86_setup_perfctr(event); @@ -482,6 +522,29 @@ static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc)  	return overflow;  } +static void p4_pmu_disable_pebs(void) +{ +	/* +	 * FIXME +	 * +	 * It's still allowed that two threads setup same cache +	 * events so we can't simply clear metrics until we knew +	 * noone is depending on us, so we need kind of counter +	 * for "ReplayEvent" users. +	 * +	 * What is more complex -- RAW events, if user (for some +	 * reason) will pass some cache event metric with improper +	 * event opcode -- it's fine from hardware point of view +	 * but completely nonsence from "meaning" of such action. +	 * +	 * So at moment let leave metrics turned on forever -- it's +	 * ok for now but need to be revisited! +	 * +	 * (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)0); +	 * (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)0); +	 */ +} +  static inline void p4_pmu_disable_event(struct perf_event *event)  {  	struct hw_perf_event *hwc = &event->hw; @@ -507,6 +570,26 @@ static void p4_pmu_disable_all(void)  			continue;  		p4_pmu_disable_event(event);  	} + +	p4_pmu_disable_pebs(); +} + +/* configuration must be valid */ +static void p4_pmu_enable_pebs(u64 config) +{ +	struct p4_pebs_bind *bind; +	unsigned int idx; + +	BUILD_BUG_ON(P4_PEBS_METRIC__max > P4_PEBS_CONFIG_METRIC_MASK); + +	idx = p4_config_unpack_metric(config); +	if (idx == P4_PEBS_METRIC__none) +		return; + +	bind = &p4_pebs_bind_map[idx]; + +	(void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE,	(u64)bind->metric_pebs); +	(void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT,	(u64)bind->metric_vert);  }  static void p4_pmu_enable_event(struct perf_event *event) @@ -515,9 +598,7 @@ static void p4_pmu_enable_event(struct perf_event *event)  	int thread = p4_ht_config_thread(hwc->config);  	u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config));  	unsigned int idx = p4_config_unpack_event(hwc->config); -	unsigned int idx_cache = p4_config_unpack_cache_event(hwc->config);  	struct p4_event_bind *bind; -	struct p4_cache_event_bind *bind_cache;  	u64 escr_addr, cccr;  	bind = &p4_event_bind_map[idx]; @@ -537,16 +618,10 @@ static void p4_pmu_enable_event(struct perf_event *event)  	cccr = p4_config_unpack_cccr(hwc->config);  	/* -	 * it could be Cache event so that we need to -	 * set metrics into additional MSRs +	 * it could be Cache event so we need to write metrics +	 * into additional MSRs  	 */ -	BUILD_BUG_ON(P4_CACHE__MAX > P4_CCCR_CACHE_OPS_MASK); -	if (idx_cache > P4_CACHE__NONE && -		idx_cache < ARRAY_SIZE(p4_cache_event_bind_map)) { -		bind_cache = &p4_cache_event_bind_map[idx_cache]; -		(void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind_cache->metric_pebs); -		(void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind_cache->metric_vert); -	} +	p4_pmu_enable_pebs(hwc->config);  	(void)checking_wrmsrl(escr_addr, escr_conf);  	(void)checking_wrmsrl(hwc->config_base + hwc->idx, @@ -581,6 +656,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)  	cpuc = &__get_cpu_var(cpu_hw_events);  	for (idx = 0; idx < x86_pmu.num_counters; idx++) { +		int overflow;  		if (!test_bit(idx, cpuc->active_mask))  			continue; @@ -591,12 +667,14 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)  		WARN_ON_ONCE(hwc->idx != idx);  		/* it might be unflagged overflow */ -		handled = p4_pmu_clear_cccr_ovf(hwc); +		overflow = p4_pmu_clear_cccr_ovf(hwc);  		val = x86_perf_event_update(event); -		if (!handled && (val & (1ULL << (x86_pmu.cntval_bits - 1)))) +		if (!overflow && (val & (1ULL << (x86_pmu.cntval_bits - 1))))  			continue; +		handled += overflow; +  		/* event overflow for sure */  		data.period = event->hw.last_period; @@ -612,7 +690,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)  		inc_irq_stat(apic_perf_irqs);  	} -	return handled; +	return handled > 0;  }  /* @@ -829,6 +907,15 @@ static __initconst const struct x86_pmu p4_pmu = {  	.max_period		= (1ULL << 39) - 1,  	.hw_config		= p4_hw_config,  	.schedule_events	= p4_pmu_schedule_events, +	/* +	 * This handles erratum N15 in intel doc 249199-029, +	 * the counter may not be updated correctly on write +	 * so we need a second write operation to do the trick +	 * (the official workaround didn't work) +	 * +	 * the former idea is taken from OProfile code +	 */ +	.perfctr_second_write	= 1,  };  static __init int p4_pmu_init(void) diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c new file mode 100644 index 000000000000..34b4dad6f0b8 --- /dev/null +++ b/arch/x86/kernel/cpu/scattered.c @@ -0,0 +1,63 @@ +/* + *	Routines to indentify additional cpu features that are scattered in + *	cpuid space. + */ +#include <linux/cpu.h> + +#include <asm/pat.h> +#include <asm/processor.h> + +#include <asm/apic.h> + +struct cpuid_bit { +	u16 feature; +	u8 reg; +	u8 bit; +	u32 level; +	u32 sub_leaf; +}; + +enum cpuid_regs { +	CR_EAX = 0, +	CR_ECX, +	CR_EDX, +	CR_EBX +}; + +void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) +{ +	u32 max_level; +	u32 regs[4]; +	const struct cpuid_bit *cb; + +	static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { +		{ X86_FEATURE_IDA,		CR_EAX, 1, 0x00000006, 0 }, +		{ X86_FEATURE_ARAT,		CR_EAX, 2, 0x00000006, 0 }, +		{ X86_FEATURE_PLN,		CR_EAX, 4, 0x00000006, 0 }, +		{ X86_FEATURE_PTS,		CR_EAX, 6, 0x00000006, 0 }, +		{ X86_FEATURE_APERFMPERF,	CR_ECX, 0, 0x00000006, 0 }, +		{ X86_FEATURE_EPB,		CR_ECX, 3, 0x00000006, 0 }, +		{ X86_FEATURE_XSAVEOPT,		CR_EAX,	0, 0x0000000d, 1 }, +		{ X86_FEATURE_CPB,		CR_EDX, 9, 0x80000007, 0 }, +		{ X86_FEATURE_NPT,		CR_EDX, 0, 0x8000000a, 0 }, +		{ X86_FEATURE_LBRV,		CR_EDX, 1, 0x8000000a, 0 }, +		{ X86_FEATURE_SVML,		CR_EDX, 2, 0x8000000a, 0 }, +		{ X86_FEATURE_NRIPS,		CR_EDX, 3, 0x8000000a, 0 }, +		{ 0, 0, 0, 0, 0 } +	}; + +	for (cb = cpuid_bits; cb->feature; cb++) { + +		/* Verify that the level is valid */ +		max_level = cpuid_eax(cb->level & 0xffff0000); +		if (max_level < cb->level || +		    max_level > (cb->level | 0xffff)) +			continue; + +		cpuid_count(cb->level, cb->sub_leaf, ®s[CR_EAX], +			    ®s[CR_EBX], ®s[CR_ECX], ®s[CR_EDX]); + +		if (regs[cb->reg] & (1 << cb->bit)) +			set_cpu_cap(c, cb->feature); +	} +} diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/topology.c index 10fa5684a662..4397e987a1cf 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/topology.c @@ -1,62 +1,14 @@  /* - *	Routines to indentify additional cpu features that are scattered in - *	cpuid space. + * Check for extended topology enumeration cpuid leaf 0xb and if it + * exists, use it for populating initial_apicid and cpu topology + * detection.   */ -#include <linux/cpu.h> +#include <linux/cpu.h> +#include <asm/apic.h>  #include <asm/pat.h>  #include <asm/processor.h> -#include <asm/apic.h> - -struct cpuid_bit { -	u16 feature; -	u8 reg; -	u8 bit; -	u32 level; -}; - -enum cpuid_regs { -	CR_EAX = 0, -	CR_ECX, -	CR_EDX, -	CR_EBX -}; - -void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) -{ -	u32 max_level; -	u32 regs[4]; -	const struct cpuid_bit *cb; - -	static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { -		{ X86_FEATURE_IDA,   		CR_EAX, 1, 0x00000006 }, -		{ X86_FEATURE_ARAT,  		CR_EAX, 2, 0x00000006 }, -		{ X86_FEATURE_APERFMPERF,	CR_ECX, 0, 0x00000006 }, -		{ X86_FEATURE_CPB,   		CR_EDX, 9, 0x80000007 }, -		{ X86_FEATURE_NPT,   		CR_EDX, 0, 0x8000000a }, -		{ X86_FEATURE_LBRV,  		CR_EDX, 1, 0x8000000a }, -		{ X86_FEATURE_SVML,  		CR_EDX, 2, 0x8000000a }, -		{ X86_FEATURE_NRIPS, 		CR_EDX, 3, 0x8000000a }, -		{ 0, 0, 0, 0 } -	}; - -	for (cb = cpuid_bits; cb->feature; cb++) { - -		/* Verify that the level is valid */ -		max_level = cpuid_eax(cb->level & 0xffff0000); -		if (max_level < cb->level || -		    max_level > (cb->level | 0xffff)) -			continue; - -		cpuid(cb->level, ®s[CR_EAX], ®s[CR_EBX], -			®s[CR_ECX], ®s[CR_EDX]); - -		if (regs[cb->reg] & (1 << cb->bit)) -			set_cpu_cap(c, cb->feature); -	} -} -  /* leaf 0xb SMT level */  #define SMT_LEVEL	0 diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index b9d1ff588445..227b0448960d 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -51,7 +51,7 @@ static inline int __vmware_platform(void)  static unsigned long vmware_get_tsc_khz(void)  { -	uint64_t tsc_hz; +	uint64_t tsc_hz, lpj;  	uint32_t eax, ebx, ecx, edx;  	VMWARE_PORT(GETHZ, eax, ebx, ecx, edx); @@ -62,6 +62,13 @@ static unsigned long vmware_get_tsc_khz(void)  	printk(KERN_INFO "TSC freq read from hypervisor : %lu.%03lu MHz\n",  			 (unsigned long) tsc_hz / 1000,  			 (unsigned long) tsc_hz % 1000); + +	if (!preset_lpj) { +		lpj = ((u64)tsc_hz * 1000); +		do_div(lpj, HZ); +		preset_lpj = lpj; +	} +  	return tsc_hz;  } diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index ebd4c51d096a..764c7c2b1811 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -28,6 +28,8 @@  #include <asm/reboot.h>  #include <asm/virtext.h> +int in_crash_kexec; +  #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)  static void kdump_nmi_callback(int cpu, struct die_args *args) @@ -61,6 +63,7 @@ static void kdump_nmi_callback(int cpu, struct die_args *args)  static void kdump_nmi_shootdown_cpus(void)  { +	in_crash_kexec = 1;  	nmi_shootdown_cpus(kdump_nmi_callback);  	disable_local_APIC(); diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index c89a386930b7..6e8752c1bd52 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -18,7 +18,6 @@  #include <asm/stacktrace.h> -#include "dumpstack.h"  int panic_on_unrecovered_nmi;  int panic_on_io_nmi; diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h deleted file mode 100644 index e1a93be4fd44..000000000000 --- a/arch/x86/kernel/dumpstack.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - *  Copyright (C) 1991, 1992  Linus Torvalds - *  Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs - */ - -#ifndef DUMPSTACK_H -#define DUMPSTACK_H - -#ifdef CONFIG_X86_32 -#define STACKSLOTS_PER_LINE 8 -#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :) -#else -#define STACKSLOTS_PER_LINE 4 -#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) -#endif - -#include <linux/uaccess.h> - -extern void -show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, -		unsigned long *stack, unsigned long bp, char *log_lvl); - -extern void -show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, -		unsigned long *sp, unsigned long bp, char *log_lvl); - -extern unsigned int code_bytes; - -/* The form of the top of the frame on the stack */ -struct stack_frame { -	struct stack_frame *next_frame; -	unsigned long return_address; -}; - -struct stack_frame_ia32 { -    u32 next_frame; -    u32 return_address; -}; - -static inline unsigned long rewind_frame_pointer(int n) -{ -	struct stack_frame *frame; - -	get_bp(frame); - -#ifdef CONFIG_FRAME_POINTER -	while (n--) { -		if (probe_kernel_address(&frame->next_frame, frame)) -			break; -	} -#endif - -	return (unsigned long)frame; -} - -#endif /* DUMPSTACK_H */ diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 11540a189d93..0f6376ffa2d9 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -16,8 +16,6 @@  #include <asm/stacktrace.h> -#include "dumpstack.h" -  void dump_trace(struct task_struct *task, struct pt_regs *regs,  		unsigned long *stack, unsigned long bp, diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 272c9f1f05f3..57a21f11c791 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -16,7 +16,6 @@  #include <asm/stacktrace.h> -#include "dumpstack.h"  #define N_EXCEPTION_STACKS_END \  		(N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2) diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index cd49141cf153..227d00920d2f 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -611,14 +611,14 @@ ldt_ss:   * compensating for the offset by changing to the ESPFIX segment with   * a base address that matches for the difference.   */ +#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8)  	mov %esp, %edx			/* load kernel esp */  	mov PT_OLDESP(%esp), %eax	/* load userspace esp */  	mov %dx, %ax			/* eax: new kernel esp */  	sub %eax, %edx			/* offset (low word is 0) */ -	PER_CPU(gdt_page, %ebx)  	shr $16, %edx -	mov %dl, GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx) /* bits 16..23 */ -	mov %dh, GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx) /* bits 24..31 */ +	mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */ +	mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */  	pushl $__ESPFIX_SS  	CFI_ADJUST_CFA_OFFSET 4  	push %eax			/* new kernel esp */ @@ -791,9 +791,8 @@ ptregs_clone:   * normal stack and adjusts ESP with the matching offset.   */  	/* fixup the stack */ -	PER_CPU(gdt_page, %ebx) -	mov GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx), %al /* bits 16..23 */ -	mov GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx), %ah /* bits 24..31 */ +	mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */ +	mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */  	shl $16, %eax  	addl %esp, %eax			/* the adjusted stack pointer */  	pushl $__KERNEL_DS @@ -914,7 +913,7 @@ ENTRY(simd_coprocessor_error)  	.balign 4  	.long 661b  	.long 663f -	.byte X86_FEATURE_XMM +	.word X86_FEATURE_XMM  	.byte 662b-661b  	.byte 664f-663f  .previous @@ -1166,6 +1165,9 @@ ENTRY(xen_failsafe_callback)  .previous  ENDPROC(xen_failsafe_callback) +BUILD_INTERRUPT3(xen_hvm_callback_vector, XEN_HVM_EVTCHN_CALLBACK, +		xen_evtchn_do_upcall) +  #endif	/* CONFIG_XEN */  #ifdef CONFIG_FUNCTION_TRACER diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 4db7c4d12ffa..17be5ec7cbba 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1065,6 +1065,7 @@ ENTRY(\sym)  END(\sym)  .endm +#define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8)  .macro paranoidzeroentry_ist sym do_sym ist  ENTRY(\sym)  	INTR_FRAME @@ -1076,10 +1077,9 @@ ENTRY(\sym)  	TRACE_IRQS_OFF  	movq %rsp,%rdi		/* pt_regs pointer */  	xorl %esi,%esi		/* no error code */ -	PER_CPU(init_tss, %r12) -	subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%r12) +	subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist)  	call \do_sym -	addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%r12) +	addq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist)  	jmp paranoid_exit	/* %ebx: no swapgs flag */  	CFI_ENDPROC  END(\sym) @@ -1185,13 +1185,13 @@ END(kernel_thread_helper)   * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.   *   * C extern interface: - *	 extern long execve(char *name, char **argv, char **envp) + *	 extern long execve(const char *name, char **argv, char **envp)   *   * asm input arguments:   *	rdi: name, rsi: argv, rdx: envp   *   * We want to fallback into: - *	extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs *regs) + *	extern long sys_execve(const char *name, char **argv,char **envp, struct pt_regs *regs)   *   * do_sys_execve asm fallback arguments:   *	rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack @@ -1329,6 +1329,9 @@ ENTRY(xen_failsafe_callback)  	CFI_ENDPROC  END(xen_failsafe_callback) +apicinterrupt XEN_HVM_EVTCHN_CALLBACK \ +	xen_hvm_callback_vector xen_evtchn_do_upcall +  #endif /* CONFIG_XEN */  /* diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index b2e246037392..784360c0625c 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -20,7 +20,7 @@  static void __init i386_default_early_setup(void)  { -	/* Initilize 32bit specific setup functions */ +	/* Initialize 32bit specific setup functions */  	x86_init.resources.probe_roms = probe_roms;  	x86_init.resources.reserve_resources = i386_reserve_resources;  	x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc; diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 37c3d4b17d85..ff4c453e13f3 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -131,6 +131,12 @@ ENTRY(startup_32)  	movsl  1: +#ifdef CONFIG_OLPC_OPENFIRMWARE +	/* save OFW's pgdir table for later use when calling into OFW */ +	movl %cr3, %eax +	movl %eax, pa(olpc_ofw_pgd) +#endif +  #ifdef CONFIG_PARAVIRT  	/* This is can only trip for a broken bootloader... */  	cmpw $0x207, pa(boot_params + BP_version) diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 3d1e6f16b7a6..239046bd447f 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -234,9 +234,8 @@ ENTRY(secondary_startup_64)  	 * init data section till per cpu areas are set up.  	 */  	movl	$MSR_GS_BASE,%ecx -	movq	initial_gs(%rip),%rax -	movq    %rax,%rdx -	shrq	$32,%rdx +	movl	initial_gs(%rip),%eax +	movl	initial_gs+4(%rip),%edx  	wrmsr	  	/* esi is pointer to real mode structure with interesting info. diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index ba390d731175..351f9c0fea1f 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -16,7 +16,6 @@  #include <asm/hpet.h>  #define HPET_MASK			CLOCKSOURCE_MASK(32) -#define HPET_SHIFT			22  /* FSEC = 10^-15     NSEC = 10^-9 */ @@ -583,7 +582,7 @@ static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu)  	 * scaled math multiplication factor for nanosecond to hpet tick  	 * conversion.  	 */ -	hpet_freq = 1000000000000000ULL; +	hpet_freq = FSEC_PER_SEC;  	do_div(hpet_freq, hpet_period);  	evt->mult = div_sc((unsigned long) hpet_freq,  				      NSEC_PER_SEC, evt->shift); @@ -787,7 +786,6 @@ static struct clocksource clocksource_hpet = {  	.rating		= 250,  	.read		= read_hpet,  	.mask		= HPET_MASK, -	.shift		= HPET_SHIFT,  	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,  	.resume		= hpet_resume_counter,  #ifdef CONFIG_X86_64 @@ -798,6 +796,7 @@ static struct clocksource clocksource_hpet = {  static int hpet_clocksource_register(void)  {  	u64 start, now; +	u64 hpet_freq;  	cycle_t t1;  	/* Start the counter */ @@ -832,9 +831,15 @@ static int hpet_clocksource_register(void)  	 *  mult = (hpet_period * 2^shift)/10^6  	 *  mult = (hpet_period << shift)/FSEC_PER_NSEC  	 */ -	clocksource_hpet.mult = div_sc(hpet_period, FSEC_PER_NSEC, HPET_SHIFT); -	clocksource_register(&clocksource_hpet); +	/* Need to convert hpet_period (fsec/cyc) to cyc/sec: +	 * +	 * cyc/sec = FSEC_PER_SEC/hpet_period(fsec/cyc) +	 * cyc/sec = (FSEC_PER_NSEC * NSEC_PER_SEC)/hpet_period +	 */ +	hpet_freq = FSEC_PER_SEC; +	do_div(hpet_freq, hpet_period); +	clocksource_register_hz(&clocksource_hpet, (u32)hpet_freq);  	return 0;  } diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index a8f1b803d2fd..a474ec37c32f 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -208,6 +208,9 @@ int arch_bp_generic_fields(int x86_len, int x86_type,  {  	/* Len */  	switch (x86_len) { +	case X86_BREAKPOINT_LEN_X: +		*gen_len = sizeof(long); +		break;  	case X86_BREAKPOINT_LEN_1:  		*gen_len = HW_BREAKPOINT_LEN_1;  		break; @@ -251,6 +254,29 @@ static int arch_build_bp_info(struct perf_event *bp)  	info->address = bp->attr.bp_addr; +	/* Type */ +	switch (bp->attr.bp_type) { +	case HW_BREAKPOINT_W: +		info->type = X86_BREAKPOINT_WRITE; +		break; +	case HW_BREAKPOINT_W | HW_BREAKPOINT_R: +		info->type = X86_BREAKPOINT_RW; +		break; +	case HW_BREAKPOINT_X: +		info->type = X86_BREAKPOINT_EXECUTE; +		/* +		 * x86 inst breakpoints need to have a specific undefined len. +		 * But we still need to check userspace is not trying to setup +		 * an unsupported length, to get a range breakpoint for example. +		 */ +		if (bp->attr.bp_len == sizeof(long)) { +			info->len = X86_BREAKPOINT_LEN_X; +			return 0; +		} +	default: +		return -EINVAL; +	} +  	/* Len */  	switch (bp->attr.bp_len) {  	case HW_BREAKPOINT_LEN_1: @@ -271,21 +297,6 @@ static int arch_build_bp_info(struct perf_event *bp)  		return -EINVAL;  	} -	/* Type */ -	switch (bp->attr.bp_type) { -	case HW_BREAKPOINT_W: -		info->type = X86_BREAKPOINT_WRITE; -		break; -	case HW_BREAKPOINT_W | HW_BREAKPOINT_R: -		info->type = X86_BREAKPOINT_RW; -		break; -	case HW_BREAKPOINT_X: -		info->type = X86_BREAKPOINT_EXECUTE; -		break; -	default: -		return -EINVAL; -	} -  	return 0;  }  /* @@ -305,6 +316,9 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)  	ret = -EINVAL;  	switch (info->len) { +	case X86_BREAKPOINT_LEN_X: +		align = sizeof(long) -1; +		break;  	case X86_BREAKPOINT_LEN_1:  		align = 0;  		break; @@ -466,6 +480,13 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)  		perf_bp_event(bp, args->regs); +		/* +		 * Set up resume flag to avoid breakpoint recursion when +		 * returning back to origin. +		 */ +		if (bp->hw.info.type == X86_BREAKPOINT_EXECUTE) +			args->regs->flags |= X86_EFLAGS_RF; +  		rcu_read_unlock();  	}  	/* diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 86cef6b32253..1f11f5ce668f 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -59,18 +59,18 @@ void __cpuinit mxcsr_feature_mask_init(void)  	stts();  } -void __cpuinit init_thread_xstate(void) +static void __cpuinit init_thread_xstate(void)  { +	/* +	 * Note that xstate_size might be overwriten later during +	 * xsave_init(). +	 */ +  	if (!HAVE_HWFP) {  		xstate_size = sizeof(struct i387_soft_struct);  		return;  	} -	if (cpu_has_xsave) { -		xsave_cntxt_init(); -		return; -	} -  	if (cpu_has_fxsr)  		xstate_size = sizeof(struct i387_fxsave_struct);  #ifdef CONFIG_X86_32 @@ -84,6 +84,7 @@ void __cpuinit init_thread_xstate(void)   * Called at bootup to set up the initial FPU state that is later cloned   * into all processes.   */ +  void __cpuinit fpu_init(void)  {  	unsigned long oldcr0 = read_cr0(); @@ -93,21 +94,26 @@ void __cpuinit fpu_init(void)  	write_cr0(oldcr0 & ~(X86_CR0_TS|X86_CR0_EM)); /* clear TS and EM */ -	/* -	 * Boot processor to setup the FP and extended state context info. -	 */  	if (!smp_processor_id())  		init_thread_xstate(); -	xsave_init();  	mxcsr_feature_mask_init();  	/* clean state in init */  	current_thread_info()->status = 0;  	clear_used_math();  } -#endif	/* CONFIG_X86_64 */ -static void fpu_finit(struct fpu *fpu) +#else	/* CONFIG_X86_64 */ + +void __cpuinit fpu_init(void) +{ +	if (!smp_processor_id()) +		init_thread_xstate(); +} + +#endif	/* CONFIG_X86_32 */ + +void fpu_finit(struct fpu *fpu)  {  #ifdef CONFIG_X86_32  	if (!HAVE_HWFP) { @@ -132,6 +138,7 @@ static void fpu_finit(struct fpu *fpu)  		fp->fos = 0xffff0000u;  	}  } +EXPORT_SYMBOL_GPL(fpu_finit);  /*   * The _current_ task is using the FPU for the first time @@ -190,6 +197,8 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset,  	if (ret)  		return ret; +	sanitize_i387_state(target); +  	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,  				   &target->thread.fpu.state->fxsave, 0, -1);  } @@ -207,6 +216,8 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,  	if (ret)  		return ret; +	sanitize_i387_state(target); +  	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,  				 &target->thread.fpu.state->fxsave, 0, -1); @@ -446,6 +457,8 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset,  					   -1);  	} +	sanitize_i387_state(target); +  	if (kbuf && pos == 0 && count == sizeof(env)) {  		convert_from_fxsr(kbuf, target);  		return 0; @@ -467,6 +480,8 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,  	if (ret)  		return ret; +	sanitize_i387_state(target); +  	if (!HAVE_HWFP)  		return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); @@ -533,6 +548,9 @@ static int save_i387_xsave(void __user *buf)  	struct _fpstate_ia32 __user *fx = buf;  	int err = 0; + +	sanitize_i387_state(tsk); +  	/*  	 * For legacy compatible, we always set FP/SSE bits in the bit  	 * vector while saving the state to the user context. diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 01ab17ae2ae7..ef10940e1af0 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -49,55 +49,94 @@  #include <asm/system.h>  #include <asm/apic.h> -/** - *	pt_regs_to_gdb_regs - Convert ptrace regs to GDB regs - *	@gdb_regs: A pointer to hold the registers in the order GDB wants. - *	@regs: The &struct pt_regs of the current process. - * - *	Convert the pt_regs in @regs into the format for registers that - *	GDB expects, stored in @gdb_regs. - */ -void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) +struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] =  { -#ifndef CONFIG_X86_32 -	u32 *gdb_regs32 = (u32 *)gdb_regs; +#ifdef CONFIG_X86_32 +	{ "ax", 4, offsetof(struct pt_regs, ax) }, +	{ "cx", 4, offsetof(struct pt_regs, cx) }, +	{ "dx", 4, offsetof(struct pt_regs, dx) }, +	{ "bx", 4, offsetof(struct pt_regs, bx) }, +	{ "sp", 4, offsetof(struct pt_regs, sp) }, +	{ "bp", 4, offsetof(struct pt_regs, bp) }, +	{ "si", 4, offsetof(struct pt_regs, si) }, +	{ "di", 4, offsetof(struct pt_regs, di) }, +	{ "ip", 4, offsetof(struct pt_regs, ip) }, +	{ "flags", 4, offsetof(struct pt_regs, flags) }, +	{ "cs", 4, offsetof(struct pt_regs, cs) }, +	{ "ss", 4, offsetof(struct pt_regs, ss) }, +	{ "ds", 4, offsetof(struct pt_regs, ds) }, +	{ "es", 4, offsetof(struct pt_regs, es) }, +	{ "fs", 4, -1 }, +	{ "gs", 4, -1 }, +#else +	{ "ax", 8, offsetof(struct pt_regs, ax) }, +	{ "bx", 8, offsetof(struct pt_regs, bx) }, +	{ "cx", 8, offsetof(struct pt_regs, cx) }, +	{ "dx", 8, offsetof(struct pt_regs, dx) }, +	{ "si", 8, offsetof(struct pt_regs, dx) }, +	{ "di", 8, offsetof(struct pt_regs, di) }, +	{ "bp", 8, offsetof(struct pt_regs, bp) }, +	{ "sp", 8, offsetof(struct pt_regs, sp) }, +	{ "r8", 8, offsetof(struct pt_regs, r8) }, +	{ "r9", 8, offsetof(struct pt_regs, r9) }, +	{ "r10", 8, offsetof(struct pt_regs, r10) }, +	{ "r11", 8, offsetof(struct pt_regs, r11) }, +	{ "r12", 8, offsetof(struct pt_regs, r12) }, +	{ "r13", 8, offsetof(struct pt_regs, r13) }, +	{ "r14", 8, offsetof(struct pt_regs, r14) }, +	{ "r15", 8, offsetof(struct pt_regs, r15) }, +	{ "ip", 8, offsetof(struct pt_regs, ip) }, +	{ "flags", 4, offsetof(struct pt_regs, flags) }, +	{ "cs", 4, offsetof(struct pt_regs, cs) }, +	{ "ss", 4, offsetof(struct pt_regs, ss) },  #endif -	gdb_regs[GDB_AX]	= regs->ax; -	gdb_regs[GDB_BX]	= regs->bx; -	gdb_regs[GDB_CX]	= regs->cx; -	gdb_regs[GDB_DX]	= regs->dx; -	gdb_regs[GDB_SI]	= regs->si; -	gdb_regs[GDB_DI]	= regs->di; -	gdb_regs[GDB_BP]	= regs->bp; -	gdb_regs[GDB_PC]	= regs->ip; +}; + +int dbg_set_reg(int regno, void *mem, struct pt_regs *regs) +{ +	if (  #ifdef CONFIG_X86_32 -	gdb_regs[GDB_PS]	= regs->flags; -	gdb_regs[GDB_DS]	= regs->ds; -	gdb_regs[GDB_ES]	= regs->es; -	gdb_regs[GDB_CS]	= regs->cs; -	gdb_regs[GDB_FS]	= 0xFFFF; -	gdb_regs[GDB_GS]	= 0xFFFF; -	if (user_mode_vm(regs)) { -		gdb_regs[GDB_SS] = regs->ss; -		gdb_regs[GDB_SP] = regs->sp; -	} else { -		gdb_regs[GDB_SS] = __KERNEL_DS; -		gdb_regs[GDB_SP] = kernel_stack_pointer(regs); +	    regno == GDB_SS || regno == GDB_FS || regno == GDB_GS || +#endif +	    regno == GDB_SP || regno == GDB_ORIG_AX) +		return 0; + +	if (dbg_reg_def[regno].offset != -1) +		memcpy((void *)regs + dbg_reg_def[regno].offset, mem, +		       dbg_reg_def[regno].size); +	return 0; +} + +char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs) +{ +	if (regno == GDB_ORIG_AX) { +		memcpy(mem, ®s->orig_ax, sizeof(regs->orig_ax)); +		return "orig_ax";  	} -#else -	gdb_regs[GDB_R8]	= regs->r8; -	gdb_regs[GDB_R9]	= regs->r9; -	gdb_regs[GDB_R10]	= regs->r10; -	gdb_regs[GDB_R11]	= regs->r11; -	gdb_regs[GDB_R12]	= regs->r12; -	gdb_regs[GDB_R13]	= regs->r13; -	gdb_regs[GDB_R14]	= regs->r14; -	gdb_regs[GDB_R15]	= regs->r15; -	gdb_regs32[GDB_PS]	= regs->flags; -	gdb_regs32[GDB_CS]	= regs->cs; -	gdb_regs32[GDB_SS]	= regs->ss; -	gdb_regs[GDB_SP]	= kernel_stack_pointer(regs); +	if (regno >= DBG_MAX_REG_NUM || regno < 0) +		return NULL; + +	if (dbg_reg_def[regno].offset != -1) +		memcpy(mem, (void *)regs + dbg_reg_def[regno].offset, +		       dbg_reg_def[regno].size); + +	switch (regno) { +#ifdef CONFIG_X86_32 +	case GDB_SS: +		if (!user_mode_vm(regs)) +			*(unsigned long *)mem = __KERNEL_DS; +		break; +	case GDB_SP: +		if (!user_mode_vm(regs)) +			*(unsigned long *)mem = kernel_stack_pointer(regs); +		break; +	case GDB_GS: +	case GDB_FS: +		*(unsigned long *)mem = 0xFFFF; +		break;  #endif +	} +	return dbg_reg_def[regno].name;  }  /** @@ -150,54 +189,13 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)  	gdb_regs[GDB_SP]	= p->thread.sp;  } -/** - *	gdb_regs_to_pt_regs - Convert GDB regs to ptrace regs. - *	@gdb_regs: A pointer to hold the registers we've received from GDB. - *	@regs: A pointer to a &struct pt_regs to hold these values in. - * - *	Convert the GDB regs in @gdb_regs into the pt_regs, and store them - *	in @regs. - */ -void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) -{ -#ifndef CONFIG_X86_32 -	u32 *gdb_regs32 = (u32 *)gdb_regs; -#endif -	regs->ax		= gdb_regs[GDB_AX]; -	regs->bx		= gdb_regs[GDB_BX]; -	regs->cx		= gdb_regs[GDB_CX]; -	regs->dx		= gdb_regs[GDB_DX]; -	regs->si		= gdb_regs[GDB_SI]; -	regs->di		= gdb_regs[GDB_DI]; -	regs->bp		= gdb_regs[GDB_BP]; -	regs->ip		= gdb_regs[GDB_PC]; -#ifdef CONFIG_X86_32 -	regs->flags		= gdb_regs[GDB_PS]; -	regs->ds		= gdb_regs[GDB_DS]; -	regs->es		= gdb_regs[GDB_ES]; -	regs->cs		= gdb_regs[GDB_CS]; -#else -	regs->r8		= gdb_regs[GDB_R8]; -	regs->r9		= gdb_regs[GDB_R9]; -	regs->r10		= gdb_regs[GDB_R10]; -	regs->r11		= gdb_regs[GDB_R11]; -	regs->r12		= gdb_regs[GDB_R12]; -	regs->r13		= gdb_regs[GDB_R13]; -	regs->r14		= gdb_regs[GDB_R14]; -	regs->r15		= gdb_regs[GDB_R15]; -	regs->flags		= gdb_regs32[GDB_PS]; -	regs->cs		= gdb_regs32[GDB_CS]; -	regs->ss		= gdb_regs32[GDB_SS]; -#endif -} -  static struct hw_breakpoint {  	unsigned		enabled;  	unsigned long		addr;  	int			len;  	int			type;  	struct perf_event	**pev; -} breakinfo[4]; +} breakinfo[HBP_NUM];  static unsigned long early_dr7; @@ -205,7 +203,7 @@ static void kgdb_correct_hw_break(void)  {  	int breakno; -	for (breakno = 0; breakno < 4; breakno++) { +	for (breakno = 0; breakno < HBP_NUM; breakno++) {  		struct perf_event *bp;  		struct arch_hw_breakpoint *info;  		int val; @@ -292,10 +290,10 @@ kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)  {  	int i; -	for (i = 0; i < 4; i++) +	for (i = 0; i < HBP_NUM; i++)  		if (breakinfo[i].addr == addr && breakinfo[i].enabled)  			break; -	if (i == 4) +	if (i == HBP_NUM)  		return -1;  	if (hw_break_release_slot(i)) { @@ -313,7 +311,7 @@ static void kgdb_remove_all_hw_break(void)  	int cpu = raw_smp_processor_id();  	struct perf_event *bp; -	for (i = 0; i < 4; i++) { +	for (i = 0; i < HBP_NUM; i++) {  		if (!breakinfo[i].enabled)  			continue;  		bp = *per_cpu_ptr(breakinfo[i].pev, cpu); @@ -333,10 +331,10 @@ kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)  {  	int i; -	for (i = 0; i < 4; i++) +	for (i = 0; i < HBP_NUM; i++)  		if (!breakinfo[i].enabled)  			break; -	if (i == 4) +	if (i == HBP_NUM)  		return -1;  	switch (bptype) { @@ -397,7 +395,7 @@ void kgdb_disable_hw_debug(struct pt_regs *regs)  	/* Disable hardware debugging while we are in kgdb: */  	set_debugreg(0UL, 7); -	for (i = 0; i < 4; i++) { +	for (i = 0; i < HBP_NUM; i++) {  		if (!breakinfo[i].enabled)  			continue;  		if (dbg_is_early) { @@ -458,7 +456,6 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code,  {  	unsigned long addr;  	char *ptr; -	int newPC;  	switch (remcomInBuffer[0]) {  	case 'c': @@ -469,8 +466,6 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code,  			linux_regs->ip = addr;  	case 'D':  	case 'k': -		newPC = linux_regs->ip; -  		/* clear the trace bit */  		linux_regs->flags &= ~X86_EFLAGS_TF;  		atomic_set(&kgdb_cpu_doing_single_step, -1); @@ -645,7 +640,7 @@ void kgdb_arch_late(void)  	attr.bp_len = HW_BREAKPOINT_LEN_1;  	attr.bp_type = HW_BREAKPOINT_W;  	attr.disabled = 1; -	for (i = 0; i < 4; i++) { +	for (i = 0; i < HBP_NUM; i++) {  		if (breakinfo[i].pev)  			continue;  		breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL); diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 675879b65ce6..1bfb6cf4dd55 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -126,16 +126,22 @@ static void __kprobes synthesize_reljump(void *from, void *to)  }  /* - * Check for the REX prefix which can only exist on X86_64 - * X86_32 always returns 0 + * Skip the prefixes of the instruction.   */ -static int __kprobes is_REX_prefix(kprobe_opcode_t *insn) +static kprobe_opcode_t *__kprobes skip_prefixes(kprobe_opcode_t *insn)  { +	insn_attr_t attr; + +	attr = inat_get_opcode_attribute((insn_byte_t)*insn); +	while (inat_is_legacy_prefix(attr)) { +		insn++; +		attr = inat_get_opcode_attribute((insn_byte_t)*insn); +	}  #ifdef CONFIG_X86_64 -	if ((*insn & 0xf0) == 0x40) -		return 1; +	if (inat_is_rex_prefix(attr)) +		insn++;  #endif -	return 0; +	return insn;  }  /* @@ -272,6 +278,9 @@ static int __kprobes can_probe(unsigned long paddr)   */  static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)  { +	/* Skip prefixes */ +	insn = skip_prefixes(insn); +  	switch (*insn) {  	case 0xfa:		/* cli */  	case 0xfb:		/* sti */ @@ -280,13 +289,6 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)  		return 1;  	} -	/* -	 * on X86_64, 0x40-0x4f are REX prefixes so we need to look -	 * at the next byte instead.. but of course not recurse infinitely -	 */ -	if (is_REX_prefix(insn)) -		return is_IF_modifier(++insn); -  	return 0;  } @@ -803,9 +805,8 @@ static void __kprobes resume_execution(struct kprobe *p,  	unsigned long orig_ip = (unsigned long)p->addr;  	kprobe_opcode_t *insn = p->ainsn.insn; -	/*skip the REX prefix*/ -	if (is_REX_prefix(insn)) -		insn++; +	/* Skip prefixes */ +	insn = skip_prefixes(insn);  	regs->flags &= ~X86_EFLAGS_TF;  	switch (*insn) { diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index d86dbf7e54be..d7b6f7fb4fec 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -274,6 +274,18 @@ static void __init smp_dump_mptable(struct mpc_table *mpc, unsigned char *mpt)  void __init default_smp_read_mpc_oem(struct mpc_table *mpc) { } +static void __init smp_register_lapic_address(unsigned long address) +{ +	mp_lapic_addr = address; + +	set_fixmap_nocache(FIX_APIC_BASE, address); +	if (boot_cpu_physical_apicid == -1U) { +		boot_cpu_physical_apicid  = read_apic_id(); +		apic_version[boot_cpu_physical_apicid] = +			 GET_APIC_VERSION(apic_read(APIC_LVR)); +	} +} +  static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)  {  	char str[16]; @@ -295,6 +307,10 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)  	if (early)  		return 1; +	/* Initialize the lapic mapping */ +	if (!acpi_lapic) +		smp_register_lapic_address(mpc->lapic); +  	if (mpc->oemptr)  		x86_init.mpparse.smp_read_mpc_oem(mpc); diff --git a/arch/x86/kernel/mrst.c b/arch/x86/kernel/mrst.c index 5915e0b33303..79ae68154e87 100644 --- a/arch/x86/kernel/mrst.c +++ b/arch/x86/kernel/mrst.c @@ -25,8 +25,34 @@  #include <asm/i8259.h>  #include <asm/apb_timer.h> +/* + * the clockevent devices on Moorestown/Medfield can be APBT or LAPIC clock, + * cmdline option x86_mrst_timer can be used to override the configuration + * to prefer one or the other. + * at runtime, there are basically three timer configurations: + * 1. per cpu apbt clock only + * 2. per cpu always-on lapic clocks only, this is Penwell/Medfield only + * 3. per cpu lapic clock (C3STOP) and one apbt clock, with broadcast. + * + * by default (without cmdline option), platform code first detects cpu type + * to see if we are on lincroft or penwell, then set up both lapic or apbt + * clocks accordingly. + * i.e. by default, medfield uses configuration #2, moorestown uses #1. + * config #3 is supported but not recommended on medfield. + * + * rating and feature summary: + * lapic (with C3STOP) --------- 100 + * apbt (always-on) ------------ 110 + * lapic (always-on,ARAT) ------ 150 + */ + +__cpuinitdata enum mrst_timer_options mrst_timer_options; +  static u32 sfi_mtimer_usage[SFI_MTMR_MAX_NUM];  static struct sfi_timer_table_entry sfi_mtimer_array[SFI_MTMR_MAX_NUM]; +enum mrst_cpu_type __mrst_cpu_chip; +EXPORT_SYMBOL_GPL(__mrst_cpu_chip); +  int sfi_mtimer_num;  struct sfi_rtc_table_entry sfi_mrtc_array[SFI_MRTC_MAX]; @@ -167,18 +193,6 @@ int __init sfi_parse_mrtc(struct sfi_table_header *table)  	return 0;  } -/* - * the secondary clock in Moorestown can be APBT or LAPIC clock, default to - * APBT but cmdline option can also override it. - */ -static void __cpuinit mrst_setup_secondary_clock(void) -{ -	/* restore default lapic clock if disabled by cmdline */ -	if (disable_apbt_percpu) -		return setup_secondary_APIC_clock(); -	apbt_setup_secondary_clock(); -} -  static unsigned long __init mrst_calibrate_tsc(void)  {  	unsigned long flags, fast_calibrate; @@ -195,6 +209,21 @@ static unsigned long __init mrst_calibrate_tsc(void)  void __init mrst_time_init(void)  { +	switch (mrst_timer_options) { +	case MRST_TIMER_APBT_ONLY: +		break; +	case MRST_TIMER_LAPIC_APBT: +		x86_init.timers.setup_percpu_clockev = setup_boot_APIC_clock; +		x86_cpuinit.setup_percpu_clockev = setup_secondary_APIC_clock; +		break; +	default: +		if (!boot_cpu_has(X86_FEATURE_ARAT)) +			break; +		x86_init.timers.setup_percpu_clockev = setup_boot_APIC_clock; +		x86_cpuinit.setup_percpu_clockev = setup_secondary_APIC_clock; +		return; +	} +	/* we need at least one APB timer */  	sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr);  	pre_init_apic_IRQ0();  	apbt_time_init(); @@ -205,16 +234,21 @@ void __init mrst_rtc_init(void)  	sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc);  } -/* - * if we use per cpu apb timer, the bootclock already setup. if we use lapic - * timer and one apbt timer for broadcast, we need to set up lapic boot clock. - */ -static void __init mrst_setup_boot_clock(void) +void __cpuinit mrst_arch_setup(void)  { -	pr_info("%s: per cpu apbt flag %d \n", __func__, disable_apbt_percpu); -	if (disable_apbt_percpu) -		setup_boot_APIC_clock(); -}; +	if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27) +		__mrst_cpu_chip = MRST_CPU_CHIP_PENWELL; +	else if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x26) +		__mrst_cpu_chip = MRST_CPU_CHIP_LINCROFT; +	else { +		pr_err("Unknown Moorestown CPU (%d:%d), default to Lincroft\n", +			boot_cpu_data.x86, boot_cpu_data.x86_model); +		__mrst_cpu_chip = MRST_CPU_CHIP_LINCROFT; +	} +	pr_debug("Moorestown CPU %s identified\n", +		(__mrst_cpu_chip == MRST_CPU_CHIP_LINCROFT) ? +		"Lincroft" : "Penwell"); +}  /* MID systems don't have i8042 controller */  static int mrst_i8042_detect(void) @@ -232,11 +266,13 @@ void __init x86_mrst_early_setup(void)  	x86_init.resources.reserve_resources = x86_init_noop;  	x86_init.timers.timer_init = mrst_time_init; -	x86_init.timers.setup_percpu_clockev = mrst_setup_boot_clock; +	x86_init.timers.setup_percpu_clockev = x86_init_noop;  	x86_init.irqs.pre_vector_init = x86_init_noop; -	x86_cpuinit.setup_percpu_clockev = mrst_setup_secondary_clock; +	x86_init.oem.arch_setup = mrst_arch_setup; + +	x86_cpuinit.setup_percpu_clockev = apbt_setup_secondary_clock;  	x86_platform.calibrate_tsc = mrst_calibrate_tsc;  	x86_platform.i8042_detect = mrst_i8042_detect; @@ -250,3 +286,26 @@ void __init x86_mrst_early_setup(void)  	x86_init.mpparse.get_smp_config = x86_init_uint_noop;  } + +/* + * if user does not want to use per CPU apb timer, just give it a lower rating + * than local apic timer and skip the late per cpu timer init. + */ +static inline int __init setup_x86_mrst_timer(char *arg) +{ +	if (!arg) +		return -EINVAL; + +	if (strcmp("apbt_only", arg) == 0) +		mrst_timer_options = MRST_TIMER_APBT_ONLY; +	else if (strcmp("lapic_and_apbt", arg) == 0) +		mrst_timer_options = MRST_TIMER_LAPIC_APBT; +	else { +		pr_warning("X86 MRST timer option %s not recognised" +			   " use x86_mrst_timer=apbt_only or lapic_and_apbt\n", +			   arg); +		return -EINVAL; +	} +	return 0; +} +__setup("x86_mrst_timer=", setup_x86_mrst_timer); diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c index 8297160c41b3..0e0cdde519be 100644 --- a/arch/x86/kernel/olpc.c +++ b/arch/x86/kernel/olpc.c @@ -21,10 +21,7 @@  #include <asm/geode.h>  #include <asm/setup.h>  #include <asm/olpc.h> - -#ifdef CONFIG_OPEN_FIRMWARE -#include <asm/ofw.h> -#endif +#include <asm/olpc_ofw.h>  struct olpc_platform_t olpc_platform_info;  EXPORT_SYMBOL_GPL(olpc_platform_info); @@ -145,7 +142,7 @@ restart:  	 * The OBF flag will sometimes misbehave due to what we believe  	 * is a hardware quirk..  	 */ -	printk(KERN_DEBUG "olpc-ec:  running cmd 0x%x\n", cmd); +	pr_devel("olpc-ec:  running cmd 0x%x\n", cmd);  	outb(cmd, 0x6c);  	if (wait_on_ibf(0x6c, 0)) { @@ -162,8 +159,7 @@ restart:  						" EC accept data!\n");  				goto err;  			} -			printk(KERN_DEBUG "olpc-ec:  sending cmd arg 0x%x\n", -					inbuf[i]); +			pr_devel("olpc-ec:  sending cmd arg 0x%x\n", inbuf[i]);  			outb(inbuf[i], 0x68);  		}  	} @@ -176,8 +172,7 @@ restart:  				goto restart;  			}  			outbuf[i] = inb(0x68); -			printk(KERN_DEBUG "olpc-ec:  received 0x%x\n", -					outbuf[i]); +			pr_devel("olpc-ec:  received 0x%x\n", outbuf[i]);  		}  	} @@ -188,14 +183,15 @@ err:  }  EXPORT_SYMBOL_GPL(olpc_ec_cmd); -#ifdef CONFIG_OPEN_FIRMWARE +#ifdef CONFIG_OLPC_OPENFIRMWARE  static void __init platform_detect(void)  {  	size_t propsize;  	__be32 rev; +	const void *args[] = { NULL, "board-revision-int", &rev, (void *)4 }; +	void *res[] = { &propsize }; -	if (ofw("getprop", 4, 1, NULL, "board-revision-int", &rev, 4, -			&propsize) || propsize != 4) { +	if (olpc_ofw("getprop", args, res) || propsize != 4) {  		printk(KERN_ERR "ofw: getprop call failed!\n");  		rev = cpu_to_be32(0);  	} diff --git a/arch/x86/kernel/olpc_ofw.c b/arch/x86/kernel/olpc_ofw.c new file mode 100644 index 000000000000..3218aa71ab5e --- /dev/null +++ b/arch/x86/kernel/olpc_ofw.c @@ -0,0 +1,106 @@ +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <asm/page.h> +#include <asm/setup.h> +#include <asm/io.h> +#include <asm/pgtable.h> +#include <asm/olpc_ofw.h> + +/* address of OFW callback interface; will be NULL if OFW isn't found */ +static int (*olpc_ofw_cif)(int *); + +/* page dir entry containing OFW's pgdir table; filled in by head_32.S */ +u32 olpc_ofw_pgd __initdata; + +static DEFINE_SPINLOCK(ofw_lock); + +#define MAXARGS 10 + +void __init setup_olpc_ofw_pgd(void) +{ +	pgd_t *base, *ofw_pde; + +	if (!olpc_ofw_cif) +		return; + +	/* fetch OFW's PDE */ +	base = early_ioremap(olpc_ofw_pgd, sizeof(olpc_ofw_pgd) * PTRS_PER_PGD); +	if (!base) { +		printk(KERN_ERR "failed to remap OFW's pgd - disabling OFW!\n"); +		olpc_ofw_cif = NULL; +		return; +	} +	ofw_pde = &base[OLPC_OFW_PDE_NR]; + +	/* install OFW's PDE permanently into the kernel's pgtable */ +	set_pgd(&swapper_pg_dir[OLPC_OFW_PDE_NR], *ofw_pde); +	/* implicit optimization barrier here due to uninline function return */ + +	early_iounmap(base, sizeof(olpc_ofw_pgd) * PTRS_PER_PGD); +} + +int __olpc_ofw(const char *name, int nr_args, const void **args, int nr_res, +		void **res) +{ +	int ofw_args[MAXARGS + 3]; +	unsigned long flags; +	int ret, i, *p; + +	BUG_ON(nr_args + nr_res > MAXARGS); + +	if (!olpc_ofw_cif) +		return -EIO; + +	ofw_args[0] = (int)name; +	ofw_args[1] = nr_args; +	ofw_args[2] = nr_res; + +	p = &ofw_args[3]; +	for (i = 0; i < nr_args; i++, p++) +		*p = (int)args[i]; + +	/* call into ofw */ +	spin_lock_irqsave(&ofw_lock, flags); +	ret = olpc_ofw_cif(ofw_args); +	spin_unlock_irqrestore(&ofw_lock, flags); + +	if (!ret) { +		for (i = 0; i < nr_res; i++, p++) +			*((int *)res[i]) = *p; +	} + +	return ret; +} +EXPORT_SYMBOL_GPL(__olpc_ofw); + +/* OFW cif _should_ be above this address */ +#define OFW_MIN 0xff000000 + +/* OFW starts on a 1MB boundary */ +#define OFW_BOUND (1<<20) + +void __init olpc_ofw_detect(void) +{ +	struct olpc_ofw_header *hdr = &boot_params.olpc_ofw_header; +	unsigned long start; + +	/* ensure OFW booted us by checking for "OFW " string */ +	if (hdr->ofw_magic != OLPC_OFW_SIG) +		return; + +	olpc_ofw_cif = (int (*)(int *))hdr->cif_handler; + +	if ((unsigned long)olpc_ofw_cif < OFW_MIN) { +		printk(KERN_ERR "OFW detected, but cif has invalid address 0x%lx - disabling.\n", +				(unsigned long)olpc_ofw_cif); +		olpc_ofw_cif = NULL; +		return; +	} + +	/* determine where OFW starts in memory */ +	start = round_down((unsigned long)olpc_ofw_cif, OFW_BOUND); +	printk(KERN_INFO "OFW detected in memory, cif @ 0x%lx (reserving top %ldMB)\n", +			(unsigned long)olpc_ofw_cif, (-start) >> 20); +	reserve_top_address(-start); +} diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 4b7e3d8b01dd..9f07cfcbd3a5 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -13,6 +13,7 @@  #include <asm/calgary.h>  #include <asm/amd_iommu.h>  #include <asm/x86_init.h> +#include <asm/xen/swiotlb-xen.h>  static int forbid_dac __read_mostly; @@ -132,7 +133,7 @@ void __init pci_iommu_alloc(void)  	/* free the range so iommu could get some range less than 4G */  	dma32_free_bootmem(); -	if (pci_swiotlb_detect()) +	if (pci_xen_swiotlb_detect() || pci_swiotlb_detect())  		goto out;  	gart_iommu_hole_init(); @@ -144,6 +145,8 @@ void __init pci_iommu_alloc(void)  	/* needs to be called after gart_iommu_hole_init */  	amd_iommu_detect();  out: +	pci_xen_swiotlb_init(); +  	pci_swiotlb_init();  } @@ -296,7 +299,7 @@ static int __init pci_iommu_init(void)  #endif  	x86_init.iommu.iommu_init(); -	if (swiotlb) { +	if (swiotlb || xen_swiotlb) {  		printk(KERN_INFO "PCI-DMA: "  		       "Using software bounce buffering for IO (SWIOTLB)\n");  		swiotlb_print_info(); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index e7e35219b32f..64ecaf0af9af 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -28,6 +28,7 @@ unsigned long idle_nomwait;  EXPORT_SYMBOL(idle_nomwait);  struct kmem_cache *task_xstate_cachep; +EXPORT_SYMBOL_GPL(task_xstate_cachep);  int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)  { @@ -300,7 +301,7 @@ EXPORT_SYMBOL(kernel_thread);  /*   * sys_execve() executes a new program.   */ -long sys_execve(char __user *name, char __user * __user *argv, +long sys_execve(const char __user *name, char __user * __user *argv,  		char __user * __user *envp, struct pt_regs *regs)  {  	long error; @@ -371,7 +372,7 @@ static inline int hlt_use_halt(void)  void default_idle(void)  {  	if (hlt_use_halt()) { -		trace_power_start(POWER_CSTATE, 1); +		trace_power_start(POWER_CSTATE, 1, smp_processor_id());  		current_thread_info()->status &= ~TS_POLLING;  		/*  		 * TS_POLLING-cleared state must be visible before we @@ -441,7 +442,7 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);   */  void mwait_idle_with_hints(unsigned long ax, unsigned long cx)  { -	trace_power_start(POWER_CSTATE, (ax>>4)+1); +	trace_power_start(POWER_CSTATE, (ax>>4)+1, smp_processor_id());  	if (!need_resched()) {  		if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))  			clflush((void *)¤t_thread_info()->flags); @@ -457,7 +458,7 @@ void mwait_idle_with_hints(unsigned long ax, unsigned long cx)  static void mwait_idle(void)  {  	if (!need_resched()) { -		trace_power_start(POWER_CSTATE, 1); +		trace_power_start(POWER_CSTATE, 1, smp_processor_id());  		if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))  			clflush((void *)¤t_thread_info()->flags); @@ -478,7 +479,7 @@ static void mwait_idle(void)   */  static void poll_idle(void)  { -	trace_power_start(POWER_CSTATE, 0); +	trace_power_start(POWER_CSTATE, 0, smp_processor_id());  	local_irq_enable();  	while (!need_resched())  		cpu_relax(); @@ -525,44 +526,10 @@ static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)  	return (edx & MWAIT_EDX_C1);  } -/* - * Check for AMD CPUs, where APIC timer interrupt does not wake up CPU from C1e. - * For more information see - * - Erratum #400 for NPT family 0xf and family 0x10 CPUs - * - Erratum #365 for family 0x11 (not affected because C1e not in use) - */ -static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c) -{ -	u64 val; -	if (c->x86_vendor != X86_VENDOR_AMD) -		goto no_c1e_idle; - -	/* Family 0x0f models < rev F do not have C1E */ -	if (c->x86 == 0x0F && c->x86_model >= 0x40) -		return 1; - -	if (c->x86 == 0x10) { -		/* -		 * check OSVW bit for CPUs that are not affected -		 * by erratum #400 -		 */ -		if (cpu_has(c, X86_FEATURE_OSVW)) { -			rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, val); -			if (val >= 2) { -				rdmsrl(MSR_AMD64_OSVW_STATUS, val); -				if (!(val & BIT(1))) -					goto no_c1e_idle; -			} -		} -		return 1; -	} - -no_c1e_idle: -	return 0; -} +bool c1e_detected; +EXPORT_SYMBOL(c1e_detected);  static cpumask_var_t c1e_mask; -static int c1e_detected;  void c1e_remove_cpu(int cpu)  { @@ -584,12 +551,12 @@ static void c1e_idle(void)  		u32 lo, hi;  		rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); +  		if (lo & K8_INTP_C1E_ACTIVE_MASK) { -			c1e_detected = 1; +			c1e_detected = true;  			if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))  				mark_tsc_unstable("TSC halt in AMD C1E");  			printk(KERN_INFO "System has AMD C1E enabled\n"); -			set_cpu_cap(&boot_cpu_data, X86_FEATURE_AMDC1E);  		}  	} @@ -638,7 +605,8 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)  		 */  		printk(KERN_INFO "using mwait in idle threads.\n");  		pm_idle = mwait_idle; -	} else if (check_c1e_idle(c)) { +	} else if (cpu_has_amd_erratum(amd_erratum_400)) { +		/* E400: APIC timer interrupt does not wake up CPU from C1e */  		printk(KERN_INFO "using C1E aware idle routine\n");  		pm_idle = c1e_idle;  	} else diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 8d128783af47..96586c3cbbbf 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -57,6 +57,8 @@  #include <asm/syscalls.h>  #include <asm/debugreg.h> +#include <trace/events/power.h> +  asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");  /* @@ -111,6 +113,8 @@ void cpu_idle(void)  			stop_critical_timings();  			pm_idle();  			start_critical_timings(); + +			trace_power_end(smp_processor_id());  		}  		tick_nohz_restart_sched_tick();  		preempt_enable_no_resched(); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 3c2422a99f1f..3d9ea531ddd1 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -51,6 +51,8 @@  #include <asm/syscalls.h>  #include <asm/debugreg.h> +#include <trace/events/power.h> +  asmlinkage extern void ret_from_fork(void);  DEFINE_PER_CPU(unsigned long, old_rsp); @@ -138,6 +140,9 @@ void cpu_idle(void)  			stop_critical_timings();  			pm_idle();  			start_critical_timings(); + +			trace_power_end(smp_processor_id()); +  			/* In many cases the interrupt that ended idle  			   has already called exit_idle. But some idle  			   loops can be woken up without interrupt. */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index b4ae4acbd031..b008e7883207 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -102,6 +102,7 @@  #include <asm/paravirt.h>  #include <asm/hypervisor.h> +#include <asm/olpc_ofw.h>  #include <asm/percpu.h>  #include <asm/topology.h> @@ -736,10 +737,15 @@ void __init setup_arch(char **cmdline_p)  	/* VMI may relocate the fixmap; do this before touching ioremap area */  	vmi_init(); +	/* OFW also may relocate the fixmap */ +	olpc_ofw_detect(); +  	early_trap_init();  	early_cpu_init();  	early_ioremap_init(); +	setup_olpc_ofw_pgd(); +  	ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev);  	screen_info = boot_params.screen_info;  	edid_info = boot_params.edid_info; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index c4f33b2e77d6..a5e928b0cb5f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -735,12 +735,8 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)  		goto do_rest;  	} -	if (!keventd_up() || current_is_keventd()) -		c_idle.work.func(&c_idle.work); -	else { -		schedule_work(&c_idle.work); -		wait_for_completion(&c_idle.done); -	} +	schedule_work(&c_idle.work); +	wait_for_completion(&c_idle.done);  	if (IS_ERR(c_idle.idle)) {  		printk("failed fork for CPU %d\n", cpu); @@ -816,6 +812,13 @@ do_rest:  			if (cpumask_test_cpu(cpu, cpu_callin_mask))  				break;	/* It has booted */  			udelay(100); +			/* +			 * Allow other tasks to run while we wait for the +			 * AP to come online. This also gives a chance +			 * for the MTRR work(triggered by the AP coming online) +			 * to be completed in the stop machine context. +			 */ +			schedule();  		}  		if (cpumask_test_cpu(cpu, cpu_callin_mask)) diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 922eefbb3f6c..b53c525368a7 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -23,11 +23,16 @@ static int save_stack_stack(void *data, char *name)  	return 0;  } -static void save_stack_address(void *data, unsigned long addr, int reliable) +static void +__save_stack_address(void *data, unsigned long addr, bool reliable, bool nosched)  {  	struct stack_trace *trace = data; +#ifdef CONFIG_FRAME_POINTER  	if (!reliable)  		return; +#endif +	if (nosched && in_sched_functions(addr)) +		return;  	if (trace->skip > 0) {  		trace->skip--;  		return; @@ -36,20 +41,15 @@ static void save_stack_address(void *data, unsigned long addr, int reliable)  		trace->entries[trace->nr_entries++] = addr;  } +static void save_stack_address(void *data, unsigned long addr, int reliable) +{ +	return __save_stack_address(data, addr, reliable, false); +} +  static void  save_stack_address_nosched(void *data, unsigned long addr, int reliable)  { -	struct stack_trace *trace = (struct stack_trace *)data; -	if (!reliable) -		return; -	if (in_sched_functions(addr)) -		return; -	if (trace->skip > 0) { -		trace->skip--; -		return; -	} -	if (trace->nr_entries < trace->max_entries) -		trace->entries[trace->nr_entries++] = addr; +	return __save_stack_address(data, addr, reliable, true);  }  static const struct stacktrace_ops save_stack_ops = { @@ -96,12 +96,13 @@ EXPORT_SYMBOL_GPL(save_stack_trace_tsk);  /* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */ -struct stack_frame { +struct stack_frame_user {  	const void __user	*next_fp;  	unsigned long		ret_addr;  }; -static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) +static int +copy_stack_frame(const void __user *fp, struct stack_frame_user *frame)  {  	int ret; @@ -126,7 +127,7 @@ static inline void __save_stack_trace_user(struct stack_trace *trace)  		trace->entries[trace->nr_entries++] = regs->ip;  	while (trace->nr_entries < trace->max_entries) { -		struct stack_frame frame; +		struct stack_frame_user frame;  		frame.next_fp = NULL;  		frame.ret_addr = 0; diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index 8b3729341216..b35786dc9b8f 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S @@ -337,3 +337,6 @@ ENTRY(sys_call_table)  	.long sys_rt_tgsigqueueinfo	/* 335 */  	.long sys_perf_event_open  	.long sys_recvmmsg +	.long sys_fanotify_init +	.long sys_fanotify_mark +	.long sys_prlimit64		/* 340 */ diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 7fea555929e2..312ef0292815 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -8,6 +8,7 @@   */  #include <linux/seq_file.h>  #include <linux/proc_fs.h> +#include <linux/debugfs.h>  #include <linux/kernel.h>  #include <linux/slab.h> @@ -22,19 +23,37 @@  #include <asm/irq_vectors.h>  #include <asm/timer.h> -struct msg_desc { -	struct bau_payload_queue_entry *msg; -	int msg_slot; -	int sw_ack_slot; -	struct bau_payload_queue_entry *va_queue_first; -	struct bau_payload_queue_entry *va_queue_last; +/* timeouts in nanoseconds (indexed by UVH_AGING_PRESCALE_SEL urgency7 30:28) */ +static int timeout_base_ns[] = { +		20, +		160, +		1280, +		10240, +		81920, +		655360, +		5242880, +		167772160  }; - -#define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD	0x000000000bUL - -static int uv_bau_max_concurrent __read_mostly; - +static int timeout_us;  static int nobau; +static int baudisabled; +static spinlock_t disable_lock; +static cycles_t congested_cycles; + +/* tunables: */ +static int max_bau_concurrent = MAX_BAU_CONCURRENT; +static int max_bau_concurrent_constant = MAX_BAU_CONCURRENT; +static int plugged_delay = PLUGGED_DELAY; +static int plugsb4reset = PLUGSB4RESET; +static int timeoutsb4reset = TIMEOUTSB4RESET; +static int ipi_reset_limit = IPI_RESET_LIMIT; +static int complete_threshold = COMPLETE_THRESHOLD; +static int congested_response_us = CONGESTED_RESPONSE_US; +static int congested_reps = CONGESTED_REPS; +static int congested_period = CONGESTED_PERIOD; +static struct dentry *tunables_dir; +static struct dentry *tunables_file; +  static int __init setup_nobau(char *arg)  {  	nobau = 1; @@ -52,10 +71,6 @@ static DEFINE_PER_CPU(struct ptc_stats, ptcstats);  static DEFINE_PER_CPU(struct bau_control, bau_control);  static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask); -struct reset_args { -	int sender; -}; -  /*   * Determine the first node on a uvhub. 'Nodes' are used for kernel   * memory allocation. @@ -126,7 +141,7 @@ static inline void uv_bau_process_retry_msg(struct msg_desc *mdp,  	struct ptc_stats *stat;  	msg = mdp->msg; -	stat = &per_cpu(ptcstats, bcp->cpu); +	stat = bcp->statp;  	stat->d_retries++;  	/*  	 * cancel any message from msg+1 to the retry itself @@ -146,15 +161,14 @@ static inline void uv_bau_process_retry_msg(struct msg_desc *mdp,  			slot2 = msg2 - mdp->va_queue_first;  			mmr = uv_read_local_mmr  				(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); -			msg_res = ((msg2->sw_ack_vector << 8) | -				   msg2->sw_ack_vector); +			msg_res = msg2->sw_ack_vector;  			/*  			 * This is a message retry; clear the resources held  			 * by the previous message only if they timed out.  			 * If it has not timed out we have an unexpected  			 * situation to report.  			 */ -			if (mmr & (msg_res << 8)) { +			if (mmr & (msg_res << UV_SW_ACK_NPENDING)) {  				/*  				 * is the resource timed out?  				 * make everyone ignore the cancelled message. @@ -164,9 +178,9 @@ static inline void uv_bau_process_retry_msg(struct msg_desc *mdp,  				cancel_count++;  				uv_write_local_mmr(  				    UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, -					(msg_res << 8) | msg_res); -			} else -				printk(KERN_INFO "note bau retry: no effect\n"); +					(msg_res << UV_SW_ACK_NPENDING) | +					 msg_res); +			}  		}  	}  	if (!cancel_count) @@ -190,7 +204,7 @@ static void uv_bau_process_message(struct msg_desc *mdp,  	 * This must be a normal message, or retry of a normal message  	 */  	msg = mdp->msg; -	stat = &per_cpu(ptcstats, bcp->cpu); +	stat = bcp->statp;  	if (msg->address == TLB_FLUSH_ALL) {  		local_flush_tlb();  		stat->d_alltlb++; @@ -274,7 +288,7 @@ uv_do_reset(void *ptr)  	bcp = &per_cpu(bau_control, smp_processor_id());  	rap = (struct reset_args *)ptr; -	stat = &per_cpu(ptcstats, bcp->cpu); +	stat = bcp->statp;  	stat->d_resets++;  	/* @@ -302,13 +316,13 @@ uv_do_reset(void *ptr)  			 */  			mmr = uv_read_local_mmr  					(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); -			msg_res = ((msg->sw_ack_vector << 8) | -						   msg->sw_ack_vector); +			msg_res = msg->sw_ack_vector;  			if (mmr & msg_res) {  				stat->d_rcanceled++;  				uv_write_local_mmr(  				    UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, -							msg_res); +					(msg_res << UV_SW_ACK_NPENDING) | +					 msg_res);  			}  		}  	} @@ -386,17 +400,12 @@ static int uv_wait_completion(struct bau_desc *bau_desc,  	unsigned long mmr_offset, int right_shift, int this_cpu,  	struct bau_control *bcp, struct bau_control *smaster, long try)  { -	int relaxes = 0;  	unsigned long descriptor_status; -	unsigned long mmr; -	unsigned long mask;  	cycles_t ttime; -	cycles_t timeout_time; -	struct ptc_stats *stat = &per_cpu(ptcstats, this_cpu); +	struct ptc_stats *stat = bcp->statp;  	struct bau_control *hmaster;  	hmaster = bcp->uvhub_master; -	timeout_time = get_cycles() + bcp->timeout_interval;  	/* spin on the status MMR, waiting for it to go idle */  	while ((descriptor_status = (((unsigned long) @@ -423,7 +432,8 @@ static int uv_wait_completion(struct bau_desc *bau_desc,  			 * pending.  In that case hardware returns the  			 * ERROR that looks like a destination timeout.  			 */ -			if (cycles_2_us(ttime - bcp->send_message) < BIOS_TO) { +			if (cycles_2_us(ttime - bcp->send_message) < +							timeout_us) {  				bcp->conseccompletes = 0;  				return FLUSH_RETRY_PLUGGED;  			} @@ -435,26 +445,6 @@ static int uv_wait_completion(struct bau_desc *bau_desc,  			 * descriptor_status is still BUSY  			 */  			cpu_relax(); -			relaxes++; -			if (relaxes >= 10000) { -				relaxes = 0; -				if (get_cycles() > timeout_time) { -					quiesce_local_uvhub(hmaster); - -					/* single-thread the register change */ -					spin_lock(&hmaster->masks_lock); -					mmr = uv_read_local_mmr(mmr_offset); -					mask = 0UL; -					mask |= (3UL < right_shift); -					mask = ~mask; -					mmr &= mask; -					uv_write_local_mmr(mmr_offset, mmr); -					spin_unlock(&hmaster->masks_lock); -					end_uvhub_quiesce(hmaster); -					stat->s_busy++; -					return FLUSH_GIVEUP; -				} -			}  		}  	}  	bcp->conseccompletes++; @@ -494,56 +484,116 @@ static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u)  	return 1;  } +/* + * Our retries are blocked by all destination swack resources being + * in use, and a timeout is pending. In that case hardware immediately + * returns the ERROR that looks like a destination timeout. + */ +static void +destination_plugged(struct bau_desc *bau_desc, struct bau_control *bcp, +			struct bau_control *hmaster, struct ptc_stats *stat) +{ +	udelay(bcp->plugged_delay); +	bcp->plugged_tries++; +	if (bcp->plugged_tries >= bcp->plugsb4reset) { +		bcp->plugged_tries = 0; +		quiesce_local_uvhub(hmaster); +		spin_lock(&hmaster->queue_lock); +		uv_reset_with_ipi(&bau_desc->distribution, bcp->cpu); +		spin_unlock(&hmaster->queue_lock); +		end_uvhub_quiesce(hmaster); +		bcp->ipi_attempts++; +		stat->s_resets_plug++; +	} +} + +static void +destination_timeout(struct bau_desc *bau_desc, struct bau_control *bcp, +			struct bau_control *hmaster, struct ptc_stats *stat) +{ +	hmaster->max_bau_concurrent = 1; +	bcp->timeout_tries++; +	if (bcp->timeout_tries >= bcp->timeoutsb4reset) { +		bcp->timeout_tries = 0; +		quiesce_local_uvhub(hmaster); +		spin_lock(&hmaster->queue_lock); +		uv_reset_with_ipi(&bau_desc->distribution, bcp->cpu); +		spin_unlock(&hmaster->queue_lock); +		end_uvhub_quiesce(hmaster); +		bcp->ipi_attempts++; +		stat->s_resets_timeout++; +	} +} + +/* + * Completions are taking a very long time due to a congested numalink + * network. + */ +static void +disable_for_congestion(struct bau_control *bcp, struct ptc_stats *stat) +{ +	int tcpu; +	struct bau_control *tbcp; + +	/* let only one cpu do this disabling */ +	spin_lock(&disable_lock); +	if (!baudisabled && bcp->period_requests && +	    ((bcp->period_time / bcp->period_requests) > congested_cycles)) { +		/* it becomes this cpu's job to turn on the use of the +		   BAU again */ +		baudisabled = 1; +		bcp->set_bau_off = 1; +		bcp->set_bau_on_time = get_cycles() + +			sec_2_cycles(bcp->congested_period); +		stat->s_bau_disabled++; +		for_each_present_cpu(tcpu) { +			tbcp = &per_cpu(bau_control, tcpu); +				tbcp->baudisabled = 1; +		} +	} +	spin_unlock(&disable_lock); +} +  /**   * uv_flush_send_and_wait   *   * Send a broadcast and wait for it to complete.   * - * The flush_mask contains the cpus the broadcast is to be sent to, plus + * The flush_mask contains the cpus the broadcast is to be sent to including   * cpus that are on the local uvhub.   * - * Returns NULL if all flushing represented in the mask was done. The mask - * is zeroed. - * Returns @flush_mask if some remote flushing remains to be done. The - * mask will have some bits still set, representing any cpus on the local - * uvhub (not current cpu) and any on remote uvhubs if the broadcast failed. + * Returns 0 if all flushing represented in the mask was done. + * Returns 1 if it gives up entirely and the original cpu mask is to be + * returned to the kernel.   */ -const struct cpumask *uv_flush_send_and_wait(struct bau_desc *bau_desc, -					     struct cpumask *flush_mask, -					     struct bau_control *bcp) +int uv_flush_send_and_wait(struct bau_desc *bau_desc, +			   struct cpumask *flush_mask, struct bau_control *bcp)  {  	int right_shift; -	int uvhub; -	int bit;  	int completion_status = 0;  	int seq_number = 0;  	long try = 0;  	int cpu = bcp->uvhub_cpu;  	int this_cpu = bcp->cpu; -	int this_uvhub = bcp->uvhub;  	unsigned long mmr_offset;  	unsigned long index;  	cycles_t time1;  	cycles_t time2; -	struct ptc_stats *stat = &per_cpu(ptcstats, bcp->cpu); +	cycles_t elapsed; +	struct ptc_stats *stat = bcp->statp;  	struct bau_control *smaster = bcp->socket_master;  	struct bau_control *hmaster = bcp->uvhub_master; -	/* -	 * Spin here while there are hmaster->max_concurrent or more active -	 * descriptors. This is the per-uvhub 'throttle'. -	 */  	if (!atomic_inc_unless_ge(&hmaster->uvhub_lock,  			&hmaster->active_descriptor_count, -			hmaster->max_concurrent)) { +			hmaster->max_bau_concurrent)) {  		stat->s_throttles++;  		do {  			cpu_relax();  		} while (!atomic_inc_unless_ge(&hmaster->uvhub_lock,  			&hmaster->active_descriptor_count, -			hmaster->max_concurrent)); +			hmaster->max_bau_concurrent));  	} -  	while (hmaster->uvhub_quiesce)  		cpu_relax(); @@ -557,23 +607,10 @@ const struct cpumask *uv_flush_send_and_wait(struct bau_desc *bau_desc,  	}  	time1 = get_cycles();  	do { -		/* -		 * Every message from any given cpu gets a unique message -		 * sequence number. But retries use that same number. -		 * Our message may have timed out at the destination because -		 * all sw-ack resources are in use and there is a timeout -		 * pending there.  In that case, our last send never got -		 * placed into the queue and we need to persist until it -		 * does. -		 * -		 * Make any retry a type MSG_RETRY so that the destination will -		 * free any resource held by a previous message from this cpu. -		 */  		if (try == 0) { -			/* use message type set by the caller the first time */ +			bau_desc->header.msg_type = MSG_REGULAR;  			seq_number = bcp->message_number++;  		} else { -			/* use RETRY type on all the rest; same sequence */  			bau_desc->header.msg_type = MSG_RETRY;  			stat->s_retry_messages++;  		} @@ -581,50 +618,17 @@ const struct cpumask *uv_flush_send_and_wait(struct bau_desc *bau_desc,  		index = (1UL << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) |  			bcp->uvhub_cpu;  		bcp->send_message = get_cycles(); -  		uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index); -  		try++;  		completion_status = uv_wait_completion(bau_desc, mmr_offset,  			right_shift, this_cpu, bcp, smaster, try);  		if (completion_status == FLUSH_RETRY_PLUGGED) { -			/* -			 * Our retries may be blocked by all destination swack -			 * resources being consumed, and a timeout pending. In -			 * that case hardware immediately returns the ERROR -			 * that looks like a destination timeout. -			 */ -			udelay(TIMEOUT_DELAY); -			bcp->plugged_tries++; -			if (bcp->plugged_tries >= PLUGSB4RESET) { -				bcp->plugged_tries = 0; -				quiesce_local_uvhub(hmaster); -				spin_lock(&hmaster->queue_lock); -				uv_reset_with_ipi(&bau_desc->distribution, -							this_cpu); -				spin_unlock(&hmaster->queue_lock); -				end_uvhub_quiesce(hmaster); -				bcp->ipi_attempts++; -				stat->s_resets_plug++; -			} +			destination_plugged(bau_desc, bcp, hmaster, stat);  		} else if (completion_status == FLUSH_RETRY_TIMEOUT) { -			hmaster->max_concurrent = 1; -			bcp->timeout_tries++; -			udelay(TIMEOUT_DELAY); -			if (bcp->timeout_tries >= TIMEOUTSB4RESET) { -				bcp->timeout_tries = 0; -				quiesce_local_uvhub(hmaster); -				spin_lock(&hmaster->queue_lock); -				uv_reset_with_ipi(&bau_desc->distribution, -								this_cpu); -				spin_unlock(&hmaster->queue_lock); -				end_uvhub_quiesce(hmaster); -				bcp->ipi_attempts++; -				stat->s_resets_timeout++; -			} +			destination_timeout(bau_desc, bcp, hmaster, stat);  		} -		if (bcp->ipi_attempts >= 3) { +		if (bcp->ipi_attempts >= bcp->ipi_reset_limit) {  			bcp->ipi_attempts = 0;  			completion_status = FLUSH_GIVEUP;  			break; @@ -633,49 +637,36 @@ const struct cpumask *uv_flush_send_and_wait(struct bau_desc *bau_desc,  	} while ((completion_status == FLUSH_RETRY_PLUGGED) ||  		 (completion_status == FLUSH_RETRY_TIMEOUT));  	time2 = get_cycles(); - -	if ((completion_status == FLUSH_COMPLETE) && (bcp->conseccompletes > 5) -	    && (hmaster->max_concurrent < hmaster->max_concurrent_constant)) -			hmaster->max_concurrent++; - -	/* -	 * hold any cpu not timing out here; no other cpu currently held by -	 * the 'throttle' should enter the activation code -	 */ +	bcp->plugged_tries = 0; +	bcp->timeout_tries = 0; +	if ((completion_status == FLUSH_COMPLETE) && +	    (bcp->conseccompletes > bcp->complete_threshold) && +	    (hmaster->max_bau_concurrent < +					hmaster->max_bau_concurrent_constant)) +			hmaster->max_bau_concurrent++;  	while (hmaster->uvhub_quiesce)  		cpu_relax();  	atomic_dec(&hmaster->active_descriptor_count); - -	/* guard against cycles wrap */ -	if (time2 > time1) -		stat->s_time += (time2 - time1); -	else -		stat->s_requestor--; /* don't count this one */ +	if (time2 > time1) { +		elapsed = time2 - time1; +		stat->s_time += elapsed; +		if ((completion_status == FLUSH_COMPLETE) && (try == 1)) { +			bcp->period_requests++; +			bcp->period_time += elapsed; +			if ((elapsed > congested_cycles) && +			    (bcp->period_requests > bcp->congested_reps)) { +				disable_for_congestion(bcp, stat); +			} +		} +	} else +		stat->s_requestor--;  	if (completion_status == FLUSH_COMPLETE && try > 1)  		stat->s_retriesok++;  	else if (completion_status == FLUSH_GIVEUP) { -		/* -		 * Cause the caller to do an IPI-style TLB shootdown on -		 * the target cpu's, all of which are still in the mask. -		 */  		stat->s_giveup++; -		return flush_mask; -	} - -	/* -	 * Success, so clear the remote cpu's from the mask so we don't -	 * use the IPI method of shootdown on them. -	 */ -	for_each_cpu(bit, flush_mask) { -		uvhub = uv_cpu_to_blade_id(bit); -		if (uvhub == this_uvhub) -			continue; -		cpumask_clear_cpu(bit, flush_mask); +		return 1;  	} -	if (!cpumask_empty(flush_mask)) -		return flush_mask; - -	return NULL; +	return 0;  }  /** @@ -707,70 +698,89 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,  					  struct mm_struct *mm,  					  unsigned long va, unsigned int cpu)  { -	int remotes;  	int tcpu;  	int uvhub;  	int locals = 0; +	int remotes = 0; +	int hubs = 0;  	struct bau_desc *bau_desc;  	struct cpumask *flush_mask;  	struct ptc_stats *stat;  	struct bau_control *bcp; +	struct bau_control *tbcp; +	/* kernel was booted 'nobau' */  	if (nobau)  		return cpumask;  	bcp = &per_cpu(bau_control, cpu); +	stat = bcp->statp; + +	/* bau was disabled due to slow response */ +	if (bcp->baudisabled) { +		/* the cpu that disabled it must re-enable it */ +		if (bcp->set_bau_off) { +			if (get_cycles() >= bcp->set_bau_on_time) { +				stat->s_bau_reenabled++; +				baudisabled = 0; +				for_each_present_cpu(tcpu) { +					tbcp = &per_cpu(bau_control, tcpu); +					tbcp->baudisabled = 0; +					tbcp->period_requests = 0; +					tbcp->period_time = 0; +				} +			} +		} +		return cpumask; +	} +  	/*  	 * Each sending cpu has a per-cpu mask which it fills from the caller's -	 * cpu mask.  Only remote cpus are converted to uvhubs and copied. +	 * cpu mask.  All cpus are converted to uvhubs and copied to the +	 * activation descriptor.  	 */  	flush_mask = (struct cpumask *)per_cpu(uv_flush_tlb_mask, cpu); -	/* -	 * copy cpumask to flush_mask, removing current cpu -	 * (current cpu should already have been flushed by the caller and -	 *  should never be returned if we return flush_mask) -	 */ +	/* don't actually do a shootdown of the local cpu */  	cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu));  	if (cpu_isset(cpu, *cpumask)) -		locals++;  /* current cpu was targeted */ +		stat->s_ntargself++;  	bau_desc = bcp->descriptor_base;  	bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu; -  	bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); -	remotes = 0; + +	/* cpu statistics */  	for_each_cpu(tcpu, flush_mask) {  		uvhub = uv_cpu_to_blade_id(tcpu); -		if (uvhub == bcp->uvhub) { -			locals++; -			continue; -		}  		bau_uvhub_set(uvhub, &bau_desc->distribution); -		remotes++; -	} -	if (remotes == 0) { -		/* -		 * No off_hub flushing; return status for local hub. -		 * Return the caller's mask if all were local (the current -		 * cpu may be in that mask). -		 */ -		if (locals) -			return cpumask; +		if (uvhub == bcp->uvhub) +			locals++;  		else -			return NULL; +			remotes++;  	} -	stat = &per_cpu(ptcstats, cpu); +	if ((locals + remotes) == 0) +		return NULL;  	stat->s_requestor++; -	stat->s_ntargcpu += remotes; +	stat->s_ntargcpu += remotes + locals; +	stat->s_ntargremotes += remotes; +	stat->s_ntarglocals += locals;  	remotes = bau_uvhub_weight(&bau_desc->distribution); -	stat->s_ntarguvhub += remotes; -	if (remotes >= 16) + +	/* uvhub statistics */ +	hubs = bau_uvhub_weight(&bau_desc->distribution); +	if (locals) { +		stat->s_ntarglocaluvhub++; +		stat->s_ntargremoteuvhub += (hubs - 1); +	} else +		stat->s_ntargremoteuvhub += hubs; +	stat->s_ntarguvhub += hubs; +	if (hubs >= 16)  		stat->s_ntarguvhub16++; -	else if (remotes >= 8) +	else if (hubs >= 8)  		stat->s_ntarguvhub8++; -	else if (remotes >= 4) +	else if (hubs >= 4)  		stat->s_ntarguvhub4++; -	else if (remotes >= 2) +	else if (hubs >= 2)  		stat->s_ntarguvhub2++;  	else  		stat->s_ntarguvhub1++; @@ -779,10 +789,13 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,  	bau_desc->payload.sending_cpu = cpu;  	/* -	 * uv_flush_send_and_wait returns null if all cpu's were messaged, or -	 * the adjusted flush_mask if any cpu's were not messaged. +	 * uv_flush_send_and_wait returns 0 if all cpu's were messaged, +	 * or 1 if it gave up and the original cpumask should be returned.  	 */ -	return uv_flush_send_and_wait(bau_desc, flush_mask, bcp); +	if (!uv_flush_send_and_wait(bau_desc, flush_mask, bcp)) +		return NULL; +	else +		return cpumask;  }  /* @@ -810,7 +823,7 @@ void uv_bau_message_interrupt(struct pt_regs *regs)  	time_start = get_cycles();  	bcp = &per_cpu(bau_control, smp_processor_id()); -	stat = &per_cpu(ptcstats, smp_processor_id()); +	stat = bcp->statp;  	msgdesc.va_queue_first = bcp->va_queue_first;  	msgdesc.va_queue_last = bcp->va_queue_last;  	msg = bcp->bau_msg_head; @@ -908,12 +921,12 @@ static void uv_ptc_seq_stop(struct seq_file *file, void *data)  }  static inline unsigned long long -millisec_2_cycles(unsigned long millisec) +microsec_2_cycles(unsigned long microsec)  {  	unsigned long ns;  	unsigned long long cyc; -	ns = millisec * 1000; +	ns = microsec * 1000;  	cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id()));  	return cyc;  } @@ -931,15 +944,19 @@ static int uv_ptc_seq_show(struct seq_file *file, void *data)  	if (!cpu) {  		seq_printf(file, -			"# cpu sent stime numuvhubs numuvhubs16 numuvhubs8 "); +			"# cpu sent stime self locals remotes ncpus localhub "); +		seq_printf(file, +			"remotehub numuvhubs numuvhubs16 numuvhubs8 ");  		seq_printf(file, -			"numuvhubs4 numuvhubs2 numuvhubs1 numcpus dto "); +			"numuvhubs4 numuvhubs2 numuvhubs1 dto ");  		seq_printf(file,  			"retries rok resetp resett giveup sto bz throt ");  		seq_printf(file,  			"sw_ack recv rtime all ");  		seq_printf(file, -			"one mult none retry canc nocan reset rcan\n"); +			"one mult none retry canc nocan reset rcan "); +		seq_printf(file, +			"disable enable\n");  	}  	if (cpu < num_possible_cpus() && cpu_online(cpu)) {  		stat = &per_cpu(ptcstats, cpu); @@ -947,18 +964,23 @@ static int uv_ptc_seq_show(struct seq_file *file, void *data)  		seq_printf(file,  			"cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ",  			   cpu, stat->s_requestor, cycles_2_us(stat->s_time), -			   stat->s_ntarguvhub, stat->s_ntarguvhub16, +			   stat->s_ntargself, stat->s_ntarglocals, +			   stat->s_ntargremotes, stat->s_ntargcpu, +			   stat->s_ntarglocaluvhub, stat->s_ntargremoteuvhub, +			   stat->s_ntarguvhub, stat->s_ntarguvhub16); +		seq_printf(file, "%ld %ld %ld %ld %ld ",  			   stat->s_ntarguvhub8, stat->s_ntarguvhub4,  			   stat->s_ntarguvhub2, stat->s_ntarguvhub1, -			   stat->s_ntargcpu, stat->s_dtimeout); +			   stat->s_dtimeout);  		seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld ",  			   stat->s_retry_messages, stat->s_retriesok,  			   stat->s_resets_plug, stat->s_resets_timeout,  			   stat->s_giveup, stat->s_stimeout,  			   stat->s_busy, stat->s_throttles); +  		/* destination side statistics */  		seq_printf(file, -			   "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", +			   "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ",  			   uv_read_global_mmr64(uv_cpu_to_pnode(cpu),  					UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE),  			   stat->d_requestee, cycles_2_us(stat->d_time), @@ -966,15 +988,36 @@ static int uv_ptc_seq_show(struct seq_file *file, void *data)  			   stat->d_nomsg, stat->d_retries, stat->d_canceled,  			   stat->d_nocanceled, stat->d_resets,  			   stat->d_rcanceled); +		seq_printf(file, "%ld %ld\n", +			stat->s_bau_disabled, stat->s_bau_reenabled);  	}  	return 0;  }  /* + * Display the tunables thru debugfs + */ +static ssize_t tunables_read(struct file *file, char __user *userbuf, +						size_t count, loff_t *ppos) +{ +	char buf[300]; +	int ret; + +	ret = snprintf(buf, 300, "%s %s %s\n%d %d %d %d %d %d %d %d %d\n", +		"max_bau_concurrent plugged_delay plugsb4reset", +		"timeoutsb4reset ipi_reset_limit complete_threshold", +		"congested_response_us congested_reps congested_period", +		max_bau_concurrent, plugged_delay, plugsb4reset, +		timeoutsb4reset, ipi_reset_limit, complete_threshold, +		congested_response_us, congested_reps, congested_period); + +	return simple_read_from_buffer(userbuf, count, ppos, buf, ret); +} + +/*   * -1: resetf the statistics   *  0: display meaning of the statistics - * >0: maximum concurrent active descriptors per uvhub (throttle)   */  static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user,  				 size_t count, loff_t *data) @@ -983,7 +1026,6 @@ static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user,  	long input_arg;  	char optstr[64];  	struct ptc_stats *stat; -	struct bau_control *bcp;  	if (count == 0 || count > sizeof(optstr))  		return -EINVAL; @@ -1059,29 +1101,158 @@ static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user,  		"reset:    number of ipi-style reset requests processed\n");  		printk(KERN_DEBUG  		"rcan:     number messages canceled by reset requests\n"); +		printk(KERN_DEBUG +		"disable:  number times use of the BAU was disabled\n"); +		printk(KERN_DEBUG +		"enable:   number times use of the BAU was re-enabled\n");  	} else if (input_arg == -1) {  		for_each_present_cpu(cpu) {  			stat = &per_cpu(ptcstats, cpu);  			memset(stat, 0, sizeof(struct ptc_stats));  		} -	} else { -		uv_bau_max_concurrent = input_arg; -		bcp = &per_cpu(bau_control, smp_processor_id()); -		if (uv_bau_max_concurrent < 1 || -		    uv_bau_max_concurrent > bcp->cpus_in_uvhub) { -			printk(KERN_DEBUG -				"Error: BAU max concurrent %d; %d is invalid\n", -				bcp->max_concurrent, uv_bau_max_concurrent); -			return -EINVAL; -		} -		printk(KERN_DEBUG "Set BAU max concurrent:%d\n", -		       uv_bau_max_concurrent); -		for_each_present_cpu(cpu) { -			bcp = &per_cpu(bau_control, cpu); -			bcp->max_concurrent = uv_bau_max_concurrent; +	} + +	return count; +} + +static int local_atoi(const char *name) +{ +	int val = 0; + +	for (;; name++) { +		switch (*name) { +		case '0' ... '9': +			val = 10*val+(*name-'0'); +			break; +		default: +			return val;  		}  	} +} + +/* + * set the tunables + * 0 values reset them to defaults + */ +static ssize_t tunables_write(struct file *file, const char __user *user, +				 size_t count, loff_t *data) +{ +	int cpu; +	int cnt = 0; +	int val; +	char *p; +	char *q; +	char instr[64]; +	struct bau_control *bcp; + +	if (count == 0 || count > sizeof(instr)-1) +		return -EINVAL; +	if (copy_from_user(instr, user, count)) +		return -EFAULT; +	instr[count] = '\0'; +	/* count the fields */ +	p = instr + strspn(instr, WHITESPACE); +	q = p; +	for (; *p; p = q + strspn(q, WHITESPACE)) { +		q = p + strcspn(p, WHITESPACE); +		cnt++; +		if (q == p) +			break; +	} +	if (cnt != 9) { +		printk(KERN_INFO "bau tunable error: should be 9 numbers\n"); +		return -EINVAL; +	} + +	p = instr + strspn(instr, WHITESPACE); +	q = p; +	for (cnt = 0; *p; p = q + strspn(q, WHITESPACE), cnt++) { +		q = p + strcspn(p, WHITESPACE); +		val = local_atoi(p); +		switch (cnt) { +		case 0: +			if (val == 0) { +				max_bau_concurrent = MAX_BAU_CONCURRENT; +				max_bau_concurrent_constant = +							MAX_BAU_CONCURRENT; +				continue; +			} +			bcp = &per_cpu(bau_control, smp_processor_id()); +			if (val < 1 || val > bcp->cpus_in_uvhub) { +				printk(KERN_DEBUG +				"Error: BAU max concurrent %d is invalid\n", +				val); +				return -EINVAL; +			} +			max_bau_concurrent = val; +			max_bau_concurrent_constant = val; +			continue; +		case 1: +			if (val == 0) +				plugged_delay = PLUGGED_DELAY; +			else +				plugged_delay = val; +			continue; +		case 2: +			if (val == 0) +				plugsb4reset = PLUGSB4RESET; +			else +				plugsb4reset = val; +			continue; +		case 3: +			if (val == 0) +				timeoutsb4reset = TIMEOUTSB4RESET; +			else +				timeoutsb4reset = val; +			continue; +		case 4: +			if (val == 0) +				ipi_reset_limit = IPI_RESET_LIMIT; +			else +				ipi_reset_limit = val; +			continue; +		case 5: +			if (val == 0) +				complete_threshold = COMPLETE_THRESHOLD; +			else +				complete_threshold = val; +			continue; +		case 6: +			if (val == 0) +				congested_response_us = CONGESTED_RESPONSE_US; +			else +				congested_response_us = val; +			continue; +		case 7: +			if (val == 0) +				congested_reps = CONGESTED_REPS; +			else +				congested_reps = val; +			continue; +		case 8: +			if (val == 0) +				congested_period = CONGESTED_PERIOD; +			else +				congested_period = val; +			continue; +		} +		if (q == p) +			break; +	} +	for_each_present_cpu(cpu) { +		bcp = &per_cpu(bau_control, cpu); +		bcp->max_bau_concurrent = max_bau_concurrent; +		bcp->max_bau_concurrent_constant = max_bau_concurrent; +		bcp->plugged_delay = plugged_delay; +		bcp->plugsb4reset = plugsb4reset; +		bcp->timeoutsb4reset = timeoutsb4reset; +		bcp->ipi_reset_limit = ipi_reset_limit; +		bcp->complete_threshold = complete_threshold; +		bcp->congested_response_us = congested_response_us; +		bcp->congested_reps = congested_reps; +		bcp->congested_period = congested_period; +	}  	return count;  } @@ -1097,6 +1268,11 @@ static int uv_ptc_proc_open(struct inode *inode, struct file *file)  	return seq_open(file, &uv_ptc_seq_ops);  } +static int tunables_open(struct inode *inode, struct file *file) +{ +	return 0; +} +  static const struct file_operations proc_uv_ptc_operations = {  	.open		= uv_ptc_proc_open,  	.read		= seq_read, @@ -1105,6 +1281,12 @@ static const struct file_operations proc_uv_ptc_operations = {  	.release	= seq_release,  }; +static const struct file_operations tunables_fops = { +	.open		= tunables_open, +	.read		= tunables_read, +	.write		= tunables_write, +}; +  static int __init uv_ptc_init(void)  {  	struct proc_dir_entry *proc_uv_ptc; @@ -1119,6 +1301,20 @@ static int __init uv_ptc_init(void)  		       UV_PTC_BASENAME);  		return -EINVAL;  	} + +	tunables_dir = debugfs_create_dir(UV_BAU_TUNABLES_DIR, NULL); +	if (!tunables_dir) { +		printk(KERN_ERR "unable to create debugfs directory %s\n", +		       UV_BAU_TUNABLES_DIR); +		return -EINVAL; +	} +	tunables_file = debugfs_create_file(UV_BAU_TUNABLES_FILE, 0600, +			tunables_dir, NULL, &tunables_fops); +	if (!tunables_file) { +		printk(KERN_ERR "unable to create debugfs file %s\n", +		       UV_BAU_TUNABLES_FILE); +		return -EINVAL; +	}  	return 0;  } @@ -1259,15 +1455,45 @@ static void __init uv_init_uvhub(int uvhub, int vector)  }  /* + * We will set BAU_MISC_CONTROL with a timeout period. + * But the BIOS has set UVH_AGING_PRESCALE_SEL and UVH_TRANSACTION_TIMEOUT. + * So the destination timeout period has be be calculated from them. + */ +static int +calculate_destination_timeout(void) +{ +	unsigned long mmr_image; +	int mult1; +	int mult2; +	int index; +	int base; +	int ret; +	unsigned long ts_ns; + +	mult1 = UV_INTD_SOFT_ACK_TIMEOUT_PERIOD & BAU_MISC_CONTROL_MULT_MASK; +	mmr_image = uv_read_local_mmr(UVH_AGING_PRESCALE_SEL); +	index = (mmr_image >> BAU_URGENCY_7_SHIFT) & BAU_URGENCY_7_MASK; +	mmr_image = uv_read_local_mmr(UVH_TRANSACTION_TIMEOUT); +	mult2 = (mmr_image >> BAU_TRANS_SHIFT) & BAU_TRANS_MASK; +	base = timeout_base_ns[index]; +	ts_ns = base * mult1 * mult2; +	ret = ts_ns / 1000; +	return ret; +} + +/*   * initialize the bau_control structure for each cpu   */ -static void uv_init_per_cpu(int nuvhubs) +static void __init uv_init_per_cpu(int nuvhubs)  { -	int i, j, k; +	int i;  	int cpu;  	int pnode;  	int uvhub; +	int have_hmaster;  	short socket = 0; +	unsigned short socket_mask; +	unsigned char *uvhub_mask;  	struct bau_control *bcp;  	struct uvhub_desc *bdp;  	struct socket_desc *sdp; @@ -1278,7 +1504,7 @@ static void uv_init_per_cpu(int nuvhubs)  		short cpu_number[16];  	};  	struct uvhub_desc { -		short num_sockets; +		unsigned short socket_mask;  		short num_cpus;  		short uvhub;  		short pnode; @@ -1286,57 +1512,84 @@ static void uv_init_per_cpu(int nuvhubs)  	};  	struct uvhub_desc *uvhub_descs; +	timeout_us = calculate_destination_timeout(); +  	uvhub_descs = (struct uvhub_desc *)  		kmalloc(nuvhubs * sizeof(struct uvhub_desc), GFP_KERNEL);  	memset(uvhub_descs, 0, nuvhubs * sizeof(struct uvhub_desc)); +	uvhub_mask = kzalloc((nuvhubs+7)/8, GFP_KERNEL);  	for_each_present_cpu(cpu) {  		bcp = &per_cpu(bau_control, cpu);  		memset(bcp, 0, sizeof(struct bau_control)); -		spin_lock_init(&bcp->masks_lock); -		bcp->max_concurrent = uv_bau_max_concurrent;  		pnode = uv_cpu_hub_info(cpu)->pnode;  		uvhub = uv_cpu_hub_info(cpu)->numa_blade_id; +		*(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8));  		bdp = &uvhub_descs[uvhub];  		bdp->num_cpus++;  		bdp->uvhub = uvhub;  		bdp->pnode = pnode; -		/* time interval to catch a hardware stay-busy bug */ -		bcp->timeout_interval = millisec_2_cycles(3); -		/* kludge: assume uv_hub.h is constant */ -		socket = (cpu_physical_id(cpu)>>5)&1; -		if (socket >= bdp->num_sockets) -			bdp->num_sockets = socket+1; +		/* kludge: 'assuming' one node per socket, and assuming that +		   disabling a socket just leaves a gap in node numbers */ +		socket = (cpu_to_node(cpu) & 1); +		bdp->socket_mask |= (1 << socket);  		sdp = &bdp->socket[socket];  		sdp->cpu_number[sdp->num_cpus] = cpu;  		sdp->num_cpus++;  	} -	socket = 0; -	for_each_possible_blade(uvhub) { +	for (uvhub = 0; uvhub < nuvhubs; uvhub++) { +		if (!(*(uvhub_mask + (uvhub/8)) & (1 << (uvhub%8)))) +			continue; +		have_hmaster = 0;  		bdp = &uvhub_descs[uvhub]; -		for (i = 0; i < bdp->num_sockets; i++) { -			sdp = &bdp->socket[i]; -			for (j = 0; j < sdp->num_cpus; j++) { -				cpu = sdp->cpu_number[j]; +		socket_mask = bdp->socket_mask; +		socket = 0; +		while (socket_mask) { +			if (!(socket_mask & 1)) +				goto nextsocket; +			sdp = &bdp->socket[socket]; +			for (i = 0; i < sdp->num_cpus; i++) { +				cpu = sdp->cpu_number[i];  				bcp = &per_cpu(bau_control, cpu);  				bcp->cpu = cpu; -				if (j == 0) { +				if (i == 0) {  					smaster = bcp; -					if (i == 0) +					if (!have_hmaster) { +						have_hmaster++;  						hmaster = bcp; +					}  				}  				bcp->cpus_in_uvhub = bdp->num_cpus;  				bcp->cpus_in_socket = sdp->num_cpus;  				bcp->socket_master = smaster; +				bcp->uvhub = bdp->uvhub;  				bcp->uvhub_master = hmaster; -				for (k = 0; k < DEST_Q_SIZE; k++) -					bcp->socket_acknowledge_count[k] = 0; -				bcp->uvhub_cpu = -				  uv_cpu_hub_info(cpu)->blade_processor_id; +				bcp->uvhub_cpu = uv_cpu_hub_info(cpu)-> +						blade_processor_id;  			} +nextsocket:  			socket++; +			socket_mask = (socket_mask >> 1);  		}  	}  	kfree(uvhub_descs); +	kfree(uvhub_mask); +	for_each_present_cpu(cpu) { +		bcp = &per_cpu(bau_control, cpu); +		bcp->baudisabled = 0; +		bcp->statp = &per_cpu(ptcstats, cpu); +		/* time interval to catch a hardware stay-busy bug */ +		bcp->timeout_interval = microsec_2_cycles(2*timeout_us); +		bcp->max_bau_concurrent = max_bau_concurrent; +		bcp->max_bau_concurrent_constant = max_bau_concurrent; +		bcp->plugged_delay = plugged_delay; +		bcp->plugsb4reset = plugsb4reset; +		bcp->timeoutsb4reset = timeoutsb4reset; +		bcp->ipi_reset_limit = ipi_reset_limit; +		bcp->complete_threshold = complete_threshold; +		bcp->congested_response_us = congested_response_us; +		bcp->congested_reps = congested_reps; +		bcp->congested_period = congested_period; +	}  }  /* @@ -1361,10 +1614,11 @@ static int __init uv_bau_init(void)  		zalloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu),  				       GFP_KERNEL, cpu_to_node(cur_cpu)); -	uv_bau_max_concurrent = MAX_BAU_CONCURRENT;  	uv_nshift = uv_hub_info->m_val;  	uv_mmask = (1UL << uv_hub_info->m_val) - 1;  	nuvhubs = uv_num_possible_blades(); +	spin_lock_init(&disable_lock); +	congested_cycles = microsec_2_cycles(congested_response_us);  	uv_init_per_cpu(nuvhubs); @@ -1383,15 +1637,19 @@ static int __init uv_bau_init(void)  	alloc_intr_gate(vector, uv_bau_message_intr1);  	for_each_possible_blade(uvhub) { -		pnode = uv_blade_to_pnode(uvhub); -		/* INIT the bau */ -		uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_ACTIVATION_CONTROL, -				      ((unsigned long)1 << 63)); -		mmr = 1; /* should be 1 to broadcast to both sockets */ -		uv_write_global_mmr64(pnode, UVH_BAU_DATA_BROADCAST, mmr); +		if (uv_blade_nr_possible_cpus(uvhub)) { +			pnode = uv_blade_to_pnode(uvhub); +			/* INIT the bau */ +			uv_write_global_mmr64(pnode, +					UVH_LB_BAU_SB_ACTIVATION_CONTROL, +					((unsigned long)1 << 63)); +			mmr = 1; /* should be 1 to broadcast to both sockets */ +			uv_write_global_mmr64(pnode, UVH_BAU_DATA_BROADCAST, +						mmr); +		}  	}  	return 0;  }  core_initcall(uv_bau_init); -core_initcall(uv_ptc_init); +fs_initcall(uv_ptc_init); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 725ef4d17cd5..60788dee0f8a 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -392,7 +392,13 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)  		if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)  								== NOTIFY_STOP)  			return; +  #ifdef CONFIG_X86_LOCAL_APIC +		if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) +							== NOTIFY_STOP) +			return; + +#ifndef CONFIG_LOCKUP_DETECTOR  		/*  		 * Ok, so this is none of the documented NMI sources,  		 * so it must be the NMI watchdog. @@ -400,6 +406,7 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)  		if (nmi_watchdog_tick(regs, reason))  			return;  		if (!do_nmi_callback(regs, cpu)) +#endif /* !CONFIG_LOCKUP_DETECTOR */  			unknown_nmi_error(reason, regs);  #else  		unknown_nmi_error(reason, regs); diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 9faf91ae1841..ce8e50239332 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -751,7 +751,6 @@ static struct clocksource clocksource_tsc = {  	.read                   = read_tsc,  	.resume			= resume_tsc,  	.mask                   = CLOCKSOURCE_MASK(64), -	.shift                  = 22,  	.flags                  = CLOCK_SOURCE_IS_CONTINUOUS |  				  CLOCK_SOURCE_MUST_VERIFY,  #ifdef CONFIG_X86_64 @@ -845,8 +844,6 @@ __cpuinit int unsynchronized_tsc(void)  static void __init init_tsc_clocksource(void)  { -	clocksource_tsc.mult = clocksource_khz2mult(tsc_khz, -			clocksource_tsc.shift);  	if (tsc_clocksource_reliable)  		clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;  	/* lower the rating if we already know its unstable: */ @@ -854,7 +851,7 @@ static void __init init_tsc_clocksource(void)  		clocksource_tsc.rating = 0;  		clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;  	} -	clocksource_register(&clocksource_tsc); +	clocksource_register_khz(&clocksource_tsc, tsc_khz);  }  #ifdef CONFIG_X86_64 diff --git a/arch/x86/kernel/verify_cpu_64.S b/arch/x86/kernel/verify_cpu_64.S index 45b6f8a975a1..56a8c2a867d9 100644 --- a/arch/x86/kernel/verify_cpu_64.S +++ b/arch/x86/kernel/verify_cpu_64.S @@ -31,6 +31,7 @@   */  #include <asm/cpufeature.h> +#include <asm/msr-index.h>  verify_cpu:  	pushfl				# Save caller passed flags @@ -88,7 +89,7 @@ verify_cpu_sse_test:  	je	verify_cpu_sse_ok  	test	%di,%di  	jz	verify_cpu_no_longmode	# only try to force SSE on AMD -	movl	$0xc0010015,%ecx	# HWCR +	movl	$MSR_K7_HWCR,%ecx  	rdmsr  	btr	$15,%eax		# enable SSE  	wrmsr diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 1c0c6ab9c60f..dcbb28c4b694 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -73,8 +73,8 @@ void update_vsyscall_tz(void)  	write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);  } -void update_vsyscall(struct timespec *wall_time, struct clocksource *clock, -		     u32 mult) +void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, +			struct clocksource *clock, u32 mult)  {  	unsigned long flags; @@ -87,7 +87,7 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock,  	vsyscall_gtod_data.clock.shift = clock->shift;  	vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;  	vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; -	vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic; +	vsyscall_gtod_data.wall_to_monotonic = *wtm;  	vsyscall_gtod_data.wall_time_coarse = __current_kernel_time();  	write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);  } @@ -169,13 +169,18 @@ int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)   * unlikely */  time_t __vsyscall(1) vtime(time_t *t)  { -	struct timeval tv; +	unsigned seq;  	time_t result;  	if (unlikely(!__vsyscall_gtod_data.sysctl_enabled))  		return time_syscall(t); -	vgettimeofday(&tv, NULL); -	result = tv.tv_sec; +	do { +		seq = read_seqbegin(&__vsyscall_gtod_data.lock); + +		result = __vsyscall_gtod_data.wall_time_sec; + +	} while (read_seqretry(&__vsyscall_gtod_data.lock, seq)); +  	if (t)  		*t = result;  	return result; diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 37e68fc5e24a..9c253bd65e24 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -16,11 +16,88 @@   */  u64 pcntxt_mask; +/* + * Represents init state for the supported extended state. + */ +static struct xsave_struct *init_xstate_buf; +  struct _fpx_sw_bytes fx_sw_reserved;  #ifdef CONFIG_IA32_EMULATION  struct _fpx_sw_bytes fx_sw_reserved_ia32;  #endif +static unsigned int *xstate_offsets, *xstate_sizes, xstate_features; + +/* + * If a processor implementation discern that a processor state component is + * in its initialized state it may modify the corresponding bit in the + * xsave_hdr.xstate_bv as '0', with out modifying the corresponding memory + * layout in the case of xsaveopt. While presenting the xstate information to + * the user, we always ensure that the memory layout of a feature will be in + * the init state if the corresponding header bit is zero. This is to ensure + * that the user doesn't see some stale state in the memory layout during + * signal handling, debugging etc. + */ +void __sanitize_i387_state(struct task_struct *tsk) +{ +	u64 xstate_bv; +	int feature_bit = 0x2; +	struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave; + +	if (!fx) +		return; + +	BUG_ON(task_thread_info(tsk)->status & TS_USEDFPU); + +	xstate_bv = tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv; + +	/* +	 * None of the feature bits are in init state. So nothing else +	 * to do for us, as the memory layout is upto date. +	 */ +	if ((xstate_bv & pcntxt_mask) == pcntxt_mask) +		return; + +	/* +	 * FP is in init state +	 */ +	if (!(xstate_bv & XSTATE_FP)) { +		fx->cwd = 0x37f; +		fx->swd = 0; +		fx->twd = 0; +		fx->fop = 0; +		fx->rip = 0; +		fx->rdp = 0; +		memset(&fx->st_space[0], 0, 128); +	} + +	/* +	 * SSE is in init state +	 */ +	if (!(xstate_bv & XSTATE_SSE)) +		memset(&fx->xmm_space[0], 0, 256); + +	xstate_bv = (pcntxt_mask & ~xstate_bv) >> 2; + +	/* +	 * Update all the other memory layouts for which the corresponding +	 * header bit is in the init state. +	 */ +	while (xstate_bv) { +		if (xstate_bv & 0x1) { +			int offset = xstate_offsets[feature_bit]; +			int size = xstate_sizes[feature_bit]; + +			memcpy(((void *) fx) + offset, +			       ((void *) init_xstate_buf) + offset, +			       size); +		} + +		xstate_bv >>= 1; +		feature_bit++; +	} +} +  /*   * Check for the presence of extended state information in the   * user fpstate pointer in the sigcontext. @@ -36,15 +113,14 @@ int check_for_xstate(struct i387_fxsave_struct __user *buf,  	err = __copy_from_user(fx_sw_user, &buf->sw_reserved[0],  			       sizeof(struct _fpx_sw_bytes)); -  	if (err) -		return err; +		return -EFAULT;  	/*  	 * First Magic check failed.  	 */  	if (fx_sw_user->magic1 != FP_XSTATE_MAGIC1) -		return -1; +		return -EINVAL;  	/*  	 * Check for error scenarios. @@ -52,19 +128,21 @@ int check_for_xstate(struct i387_fxsave_struct __user *buf,  	if (fx_sw_user->xstate_size < min_xstate_size ||  	    fx_sw_user->xstate_size > xstate_size ||  	    fx_sw_user->xstate_size > fx_sw_user->extended_size) -		return -1; +		return -EINVAL;  	err = __get_user(magic2, (__u32 *) (((void *)fpstate) +  					    fx_sw_user->extended_size -  					    FP_XSTATE_MAGIC2_SIZE)); +	if (err) +		return err;  	/*  	 * Check for the presence of second magic word at the end of memory  	 * layout. This detects the case where the user just copied the legacy  	 * fpstate layout with out copying the extended state information  	 * in the memory layout.  	 */ -	if (err || magic2 != FP_XSTATE_MAGIC2) -		return -1; +	if (magic2 != FP_XSTATE_MAGIC2) +		return -EFAULT;  	return 0;  } @@ -91,14 +169,6 @@ int save_i387_xstate(void __user *buf)  		return 0;  	if (task_thread_info(tsk)->status & TS_USEDFPU) { -		/* -	 	 * Start with clearing the user buffer. This will present a -	 	 * clean context for the bytes not touched by the fxsave/xsave. -		 */ -		err = __clear_user(buf, sig_xstate_size); -		if (err) -			return err; -  		if (use_xsave())  			err = xsave_user(buf);  		else @@ -109,6 +179,7 @@ int save_i387_xstate(void __user *buf)  		task_thread_info(tsk)->status &= ~TS_USEDFPU;  		stts();  	} else { +		sanitize_i387_state(tsk);  		if (__copy_to_user(buf, &tsk->thread.fpu.state->fxsave,  				   xstate_size))  			return -1; @@ -184,8 +255,8 @@ static int restore_user_xstate(void __user *buf)  	 * init the state skipped by the user.  	 */  	mask = pcntxt_mask & ~mask; - -	xrstor_state(init_xstate_buf, mask); +	if (unlikely(mask)) +		xrstor_state(init_xstate_buf, mask);  	return 0; @@ -274,11 +345,6 @@ static void prepare_fx_sw_frame(void)  #endif  } -/* - * Represents init state for the supported extended state. - */ -struct xsave_struct *init_xstate_buf; -  #ifdef CONFIG_X86_64  unsigned int sig_xstate_size = sizeof(struct _fpstate);  #endif @@ -286,37 +352,77 @@ unsigned int sig_xstate_size = sizeof(struct _fpstate);  /*   * Enable the extended processor state save/restore feature   */ -void __cpuinit xsave_init(void) +static inline void xstate_enable(void)  { -	if (!cpu_has_xsave) -		return; -  	set_in_cr4(X86_CR4_OSXSAVE); - -	/* -	 * Enable all the features that the HW is capable of -	 * and the Linux kernel is aware of. -	 */  	xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask);  }  /* + * Record the offsets and sizes of different state managed by the xsave + * memory layout. + */ +static void __init setup_xstate_features(void) +{ +	int eax, ebx, ecx, edx, leaf = 0x2; + +	xstate_features = fls64(pcntxt_mask); +	xstate_offsets = alloc_bootmem(xstate_features * sizeof(int)); +	xstate_sizes = alloc_bootmem(xstate_features * sizeof(int)); + +	do { +		cpuid_count(XSTATE_CPUID, leaf, &eax, &ebx, &ecx, &edx); + +		if (eax == 0) +			break; + +		xstate_offsets[leaf] = ebx; +		xstate_sizes[leaf] = eax; + +		leaf++; +	} while (1); +} + +/*   * setup the xstate image representing the init state   */  static void __init setup_xstate_init(void)  { +	setup_xstate_features(); + +	/* +	 * Setup init_xstate_buf to represent the init state of +	 * all the features managed by the xsave +	 */  	init_xstate_buf = alloc_bootmem(xstate_size);  	init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT; + +	clts(); +	/* +	 * Init all the features state with header_bv being 0x0 +	 */ +	xrstor_state(init_xstate_buf, -1); +	/* +	 * Dump the init state again. This is to identify the init state +	 * of any feature which is not represented by all zero's. +	 */ +	xsave_state(init_xstate_buf, -1); +	stts();  }  /*   * Enable and initialize the xsave feature.   */ -void __ref xsave_cntxt_init(void) +static void __init xstate_enable_boot_cpu(void)  {  	unsigned int eax, ebx, ecx, edx; -	cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); +	if (boot_cpu_data.cpuid_level < XSTATE_CPUID) { +		WARN(1, KERN_ERR "XSTATE_CPUID missing\n"); +		return; +	} + +	cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);  	pcntxt_mask = eax + ((u64)edx << 32);  	if ((pcntxt_mask & XSTATE_FPSSE) != XSTATE_FPSSE) { @@ -329,12 +435,13 @@ void __ref xsave_cntxt_init(void)  	 * Support only the state known to OS.  	 */  	pcntxt_mask = pcntxt_mask & XCNTXT_MASK; -	xsave_init(); + +	xstate_enable();  	/*  	 * Recompute the context size for enabled features  	 */ -	cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); +	cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);  	xstate_size = ebx;  	update_regset_xstate_info(xstate_size, pcntxt_mask); @@ -346,3 +453,23 @@ void __ref xsave_cntxt_init(void)  	       "cntxt size 0x%x\n",  	       pcntxt_mask, xstate_size);  } + +/* + * For the very first instance, this calls xstate_enable_boot_cpu(); + * for all subsequent instances, this calls xstate_enable(). + * + * This is somewhat obfuscated due to the lack of powerful enough + * overrides for the section checks. + */ +void __cpuinit xsave_init(void) +{ +	static __refdata void (*next_func)(void) = xstate_enable_boot_cpu; +	void (*this_func)(void); + +	if (!cpu_has_xsave) +		return; + +	this_func = next_func; +	next_func = xstate_enable; +	this_func(); +} diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 5ac0bb465ed6..b38bd8b92aa6 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -9,6 +9,7 @@   * privileged instructions:   *   * Copyright (C) 2006 Qumranet + * Copyright 2010 Red Hat, Inc. and/or its affilates.   *   *   Avi Kivity <avi@qumranet.com>   *   Yaniv Kamay <yaniv@qumranet.com> @@ -67,6 +68,9 @@  #define SrcImmUByte (8<<4)      /* 8-bit unsigned immediate operand. */  #define SrcImmU     (9<<4)      /* Immediate operand, unsigned */  #define SrcSI       (0xa<<4)	/* Source is in the DS:RSI */ +#define SrcImmFAddr (0xb<<4)	/* Source is immediate far address */ +#define SrcMemFAddr (0xc<<4)	/* Source is far address in memory */ +#define SrcAcc      (0xd<<4)	/* Source Accumulator */  #define SrcMask     (0xf<<4)  /* Generic ModRM decode. */  #define ModRM       (1<<8) @@ -88,10 +92,6 @@  #define Src2CL      (1<<29)  #define Src2ImmByte (2<<29)  #define Src2One     (3<<29) -#define Src2Imm16   (4<<29) -#define Src2Mem16   (5<<29) /* Used for Ep encoding. First argument has to be -			       in memory and second argument is located -			       immediately after the first one in memory. */  #define Src2Mask    (7<<29)  enum { @@ -124,15 +124,15 @@ static u32 opcode_table[256] = {  	/* 0x20 - 0x27 */  	ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,  	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, -	DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0, +	ByteOp | DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0,  	/* 0x28 - 0x2F */  	ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,  	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, -	0, 0, 0, 0, +	ByteOp | DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0,  	/* 0x30 - 0x37 */  	ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,  	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, -	0, 0, 0, 0, +	ByteOp | DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0,  	/* 0x38 - 0x3F */  	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,  	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, @@ -170,20 +170,20 @@ static u32 opcode_table[256] = {  	/* 0x88 - 0x8F */  	ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov,  	ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, -	DstMem | SrcReg | ModRM | Mov, ModRM | DstReg, -	DstReg | SrcMem | ModRM | Mov, Group | Group1A, +	DstMem | SrcNone | ModRM | Mov, ModRM | DstReg, +	ImplicitOps | SrcMem16 | ModRM, Group | Group1A,  	/* 0x90 - 0x97 */  	DstReg, DstReg, DstReg, DstReg,	DstReg, DstReg, DstReg, DstReg,  	/* 0x98 - 0x9F */ -	0, 0, SrcImm | Src2Imm16 | No64, 0, +	0, 0, SrcImmFAddr | No64, 0,  	ImplicitOps | Stack, ImplicitOps | Stack, 0, 0,  	/* 0xA0 - 0xA7 */ -	ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs, -	ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs, +	ByteOp | DstAcc | SrcMem | Mov | MemAbs, DstAcc | SrcMem | Mov | MemAbs, +	ByteOp | DstMem | SrcAcc | Mov | MemAbs, DstMem | SrcAcc | Mov | MemAbs,  	ByteOp | SrcSI | DstDI | Mov | String, SrcSI | DstDI | Mov | String,  	ByteOp | SrcSI | DstDI | String, SrcSI | DstDI | String,  	/* 0xA8 - 0xAF */ -	0, 0, ByteOp | DstDI | Mov | String, DstDI | Mov | String, +	DstAcc | SrcImmByte | ByteOp, DstAcc | SrcImm, ByteOp | DstDI | Mov | String, DstDI | Mov | String,  	ByteOp | SrcSI | DstAcc | Mov | String, SrcSI | DstAcc | Mov | String,  	ByteOp | DstDI | String, DstDI | String,  	/* 0xB0 - 0xB7 */ @@ -215,7 +215,7 @@ static u32 opcode_table[256] = {  	ByteOp | SrcImmUByte | DstAcc, SrcImmUByte | DstAcc,  	/* 0xE8 - 0xEF */  	SrcImm | Stack, SrcImm | ImplicitOps, -	SrcImmU | Src2Imm16 | No64, SrcImmByte | ImplicitOps, +	SrcImmFAddr | No64, SrcImmByte | ImplicitOps,  	SrcNone | ByteOp | DstAcc, SrcNone | DstAcc,  	SrcNone | ByteOp | DstAcc, SrcNone | DstAcc,  	/* 0xF0 - 0xF7 */ @@ -337,20 +337,20 @@ static u32 group_table[] = {  	[Group1A*8] =  	DstMem | SrcNone | ModRM | Mov | Stack, 0, 0, 0, 0, 0, 0, 0,  	[Group3_Byte*8] = -	ByteOp | SrcImm | DstMem | ModRM, 0, +	ByteOp | SrcImm | DstMem | ModRM, ByteOp | SrcImm | DstMem | ModRM,  	ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM,  	0, 0, 0, 0,  	[Group3*8] = -	DstMem | SrcImm | ModRM, 0, +	DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,  	DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM,  	0, 0, 0, 0,  	[Group4*8] = -	ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM, +	ByteOp | DstMem | SrcNone | ModRM | Lock, ByteOp | DstMem | SrcNone | ModRM | Lock,  	0, 0, 0, 0, 0, 0,  	[Group5*8] = -	DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, +	DstMem | SrcNone | ModRM | Lock, DstMem | SrcNone | ModRM | Lock,  	SrcMem | ModRM | Stack, 0, -	SrcMem | ModRM | Stack, SrcMem | ModRM | Src2Mem16 | ImplicitOps, +	SrcMem | ModRM | Stack, SrcMemFAddr | ModRM | ImplicitOps,  	SrcMem | ModRM | Stack, 0,  	[Group7*8] =  	0, 0, ModRM | SrcMem | Priv, ModRM | SrcMem | Priv, @@ -576,6 +576,13 @@ static u32 group2_table[] = {  	(_type)_x;							\  }) +#define insn_fetch_arr(_arr, _size, _eip)                                \ +({	rc = do_insn_fetch(ctxt, ops, (_eip), _arr, (_size));		\ +	if (rc != X86EMUL_CONTINUE)					\ +		goto done;						\ +	(_eip) += (_size);						\ +}) +  static inline unsigned long ad_mask(struct decode_cache *c)  {  	return (1UL << (c->ad_bytes << 3)) - 1; @@ -617,31 +624,66 @@ static void set_seg_override(struct decode_cache *c, int seg)  	c->seg_override = seg;  } -static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg) +static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, +			      struct x86_emulate_ops *ops, int seg)  {  	if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)  		return 0; -	return kvm_x86_ops->get_segment_base(ctxt->vcpu, seg); +	return ops->get_cached_segment_base(seg, ctxt->vcpu);  }  static unsigned long seg_override_base(struct x86_emulate_ctxt *ctxt, +				       struct x86_emulate_ops *ops,  				       struct decode_cache *c)  {  	if (!c->has_seg_override)  		return 0; -	return seg_base(ctxt, c->seg_override); +	return seg_base(ctxt, ops, c->seg_override); +} + +static unsigned long es_base(struct x86_emulate_ctxt *ctxt, +			     struct x86_emulate_ops *ops) +{ +	return seg_base(ctxt, ops, VCPU_SREG_ES); +} + +static unsigned long ss_base(struct x86_emulate_ctxt *ctxt, +			     struct x86_emulate_ops *ops) +{ +	return seg_base(ctxt, ops, VCPU_SREG_SS); +} + +static void emulate_exception(struct x86_emulate_ctxt *ctxt, int vec, +				      u32 error, bool valid) +{ +	ctxt->exception = vec; +	ctxt->error_code = error; +	ctxt->error_code_valid = valid; +	ctxt->restart = false; +} + +static void emulate_gp(struct x86_emulate_ctxt *ctxt, int err) +{ +	emulate_exception(ctxt, GP_VECTOR, err, true);  } -static unsigned long es_base(struct x86_emulate_ctxt *ctxt) +static void emulate_pf(struct x86_emulate_ctxt *ctxt, unsigned long addr, +		       int err)  { -	return seg_base(ctxt, VCPU_SREG_ES); +	ctxt->cr2 = addr; +	emulate_exception(ctxt, PF_VECTOR, err, true);  } -static unsigned long ss_base(struct x86_emulate_ctxt *ctxt) +static void emulate_ud(struct x86_emulate_ctxt *ctxt)  { -	return seg_base(ctxt, VCPU_SREG_SS); +	emulate_exception(ctxt, UD_VECTOR, 0, false); +} + +static void emulate_ts(struct x86_emulate_ctxt *ctxt, int err) +{ +	emulate_exception(ctxt, TS_VECTOR, err, true);  }  static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, @@ -932,12 +974,9 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)  	/* we cannot decode insn before we complete previous rep insn */  	WARN_ON(ctxt->restart); -	/* Shadow copy of register state. Committed on successful emulation. */ -	memset(c, 0, sizeof(struct decode_cache));  	c->eip = ctxt->eip;  	c->fetch.start = c->fetch.end = c->eip; -	ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS); -	memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); +	ctxt->cs_base = seg_base(ctxt, ops, VCPU_SREG_CS);  	switch (mode) {  	case X86EMUL_MODE_REAL: @@ -1060,7 +1099,7 @@ done_prefixes:  		set_seg_override(c, VCPU_SREG_DS);  	if (!(!c->twobyte && c->b == 0x8d)) -		c->modrm_ea += seg_override_base(ctxt, c); +		c->modrm_ea += seg_override_base(ctxt, ops, c);  	if (c->ad_bytes != 8)  		c->modrm_ea = (u32)c->modrm_ea; @@ -1148,6 +1187,25 @@ done_prefixes:  		else  			c->src.val = insn_fetch(u8, 1, c->eip);  		break; +	case SrcAcc: +		c->src.type = OP_REG; +		c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; +		c->src.ptr = &c->regs[VCPU_REGS_RAX]; +		switch (c->src.bytes) { +			case 1: +				c->src.val = *(u8 *)c->src.ptr; +				break; +			case 2: +				c->src.val = *(u16 *)c->src.ptr; +				break; +			case 4: +				c->src.val = *(u32 *)c->src.ptr; +				break; +			case 8: +				c->src.val = *(u64 *)c->src.ptr; +				break; +		} +		break;  	case SrcOne:  		c->src.bytes = 1;  		c->src.val = 1; @@ -1156,10 +1214,21 @@ done_prefixes:  		c->src.type = OP_MEM;  		c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;  		c->src.ptr = (unsigned long *) -			register_address(c,  seg_override_base(ctxt, c), +			register_address(c,  seg_override_base(ctxt, ops, c),  					 c->regs[VCPU_REGS_RSI]);  		c->src.val = 0;  		break; +	case SrcImmFAddr: +		c->src.type = OP_IMM; +		c->src.ptr = (unsigned long *)c->eip; +		c->src.bytes = c->op_bytes + 2; +		insn_fetch_arr(c->src.valptr, c->src.bytes, c->eip); +		break; +	case SrcMemFAddr: +		c->src.type = OP_MEM; +		c->src.ptr = (unsigned long *)c->modrm_ea; +		c->src.bytes = c->op_bytes + 2; +		break;  	}  	/* @@ -1179,22 +1248,10 @@ done_prefixes:  		c->src2.bytes = 1;  		c->src2.val = insn_fetch(u8, 1, c->eip);  		break; -	case Src2Imm16: -		c->src2.type = OP_IMM; -		c->src2.ptr = (unsigned long *)c->eip; -		c->src2.bytes = 2; -		c->src2.val = insn_fetch(u16, 2, c->eip); -		break;  	case Src2One:  		c->src2.bytes = 1;  		c->src2.val = 1;  		break; -	case Src2Mem16: -		c->src2.type = OP_MEM; -		c->src2.bytes = 2; -		c->src2.ptr = (unsigned long *)(c->modrm_ea + c->src.bytes); -		c->src2.val = 0; -		break;  	}  	/* Decode and fetch the destination operand: register or memory. */ @@ -1253,7 +1310,7 @@ done_prefixes:  		c->dst.type = OP_MEM;  		c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;  		c->dst.ptr = (unsigned long *) -			register_address(c, es_base(ctxt), +			register_address(c, es_base(ctxt, ops),  					 c->regs[VCPU_REGS_RDI]);  		c->dst.val = 0;  		break; @@ -1263,6 +1320,37 @@ done:  	return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;  } +static int read_emulated(struct x86_emulate_ctxt *ctxt, +			 struct x86_emulate_ops *ops, +			 unsigned long addr, void *dest, unsigned size) +{ +	int rc; +	struct read_cache *mc = &ctxt->decode.mem_read; +	u32 err; + +	while (size) { +		int n = min(size, 8u); +		size -= n; +		if (mc->pos < mc->end) +			goto read_cached; + +		rc = ops->read_emulated(addr, mc->data + mc->end, n, &err, +					ctxt->vcpu); +		if (rc == X86EMUL_PROPAGATE_FAULT) +			emulate_pf(ctxt, addr, err); +		if (rc != X86EMUL_CONTINUE) +			return rc; +		mc->end += n; + +	read_cached: +		memcpy(dest, mc->data + mc->pos, n); +		mc->pos += n; +		dest += n; +		addr += n; +	} +	return X86EMUL_CONTINUE; +} +  static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,  			   struct x86_emulate_ops *ops,  			   unsigned int size, unsigned short port, @@ -1330,13 +1418,13 @@ static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,  	get_descriptor_table_ptr(ctxt, ops, selector, &dt);  	if (dt.size < index * 8 + 7) { -		kvm_inject_gp(ctxt->vcpu, selector & 0xfffc); +		emulate_gp(ctxt, selector & 0xfffc);  		return X86EMUL_PROPAGATE_FAULT;  	}  	addr = dt.address + index * 8;  	ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu,  &err);  	if (ret == X86EMUL_PROPAGATE_FAULT) -		kvm_inject_page_fault(ctxt->vcpu, addr, err); +		emulate_pf(ctxt, addr, err);         return ret;  } @@ -1355,14 +1443,14 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,  	get_descriptor_table_ptr(ctxt, ops, selector, &dt);  	if (dt.size < index * 8 + 7) { -		kvm_inject_gp(ctxt->vcpu, selector & 0xfffc); +		emulate_gp(ctxt, selector & 0xfffc);  		return X86EMUL_PROPAGATE_FAULT;  	}  	addr = dt.address + index * 8;  	ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu, &err);  	if (ret == X86EMUL_PROPAGATE_FAULT) -		kvm_inject_page_fault(ctxt->vcpu, addr, err); +		emulate_pf(ctxt, addr, err);  	return ret;  } @@ -1481,11 +1569,70 @@ load:  	ops->set_cached_descriptor(&seg_desc, seg, ctxt->vcpu);  	return X86EMUL_CONTINUE;  exception: -	kvm_queue_exception_e(ctxt->vcpu, err_vec, err_code); +	emulate_exception(ctxt, err_vec, err_code, true);  	return X86EMUL_PROPAGATE_FAULT;  } -static inline void emulate_push(struct x86_emulate_ctxt *ctxt) +static inline int writeback(struct x86_emulate_ctxt *ctxt, +			    struct x86_emulate_ops *ops) +{ +	int rc; +	struct decode_cache *c = &ctxt->decode; +	u32 err; + +	switch (c->dst.type) { +	case OP_REG: +		/* The 4-byte case *is* correct: +		 * in 64-bit mode we zero-extend. +		 */ +		switch (c->dst.bytes) { +		case 1: +			*(u8 *)c->dst.ptr = (u8)c->dst.val; +			break; +		case 2: +			*(u16 *)c->dst.ptr = (u16)c->dst.val; +			break; +		case 4: +			*c->dst.ptr = (u32)c->dst.val; +			break;	/* 64b: zero-ext */ +		case 8: +			*c->dst.ptr = c->dst.val; +			break; +		} +		break; +	case OP_MEM: +		if (c->lock_prefix) +			rc = ops->cmpxchg_emulated( +					(unsigned long)c->dst.ptr, +					&c->dst.orig_val, +					&c->dst.val, +					c->dst.bytes, +					&err, +					ctxt->vcpu); +		else +			rc = ops->write_emulated( +					(unsigned long)c->dst.ptr, +					&c->dst.val, +					c->dst.bytes, +					&err, +					ctxt->vcpu); +		if (rc == X86EMUL_PROPAGATE_FAULT) +			emulate_pf(ctxt, +					      (unsigned long)c->dst.ptr, err); +		if (rc != X86EMUL_CONTINUE) +			return rc; +		break; +	case OP_NONE: +		/* no writeback */ +		break; +	default: +		break; +	} +	return X86EMUL_CONTINUE; +} + +static inline void emulate_push(struct x86_emulate_ctxt *ctxt, +				struct x86_emulate_ops *ops)  {  	struct decode_cache *c = &ctxt->decode; @@ -1493,7 +1640,7 @@ static inline void emulate_push(struct x86_emulate_ctxt *ctxt)  	c->dst.bytes = c->op_bytes;  	c->dst.val = c->src.val;  	register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes); -	c->dst.ptr = (void *) register_address(c, ss_base(ctxt), +	c->dst.ptr = (void *) register_address(c, ss_base(ctxt, ops),  					       c->regs[VCPU_REGS_RSP]);  } @@ -1504,9 +1651,9 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt,  	struct decode_cache *c = &ctxt->decode;  	int rc; -	rc = ops->read_emulated(register_address(c, ss_base(ctxt), -						 c->regs[VCPU_REGS_RSP]), -				dest, len, ctxt->vcpu); +	rc = read_emulated(ctxt, ops, register_address(c, ss_base(ctxt, ops), +						       c->regs[VCPU_REGS_RSP]), +			   dest, len);  	if (rc != X86EMUL_CONTINUE)  		return rc; @@ -1541,7 +1688,7 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt,  		break;  	case X86EMUL_MODE_VM86:  		if (iopl < 3) { -			kvm_inject_gp(ctxt->vcpu, 0); +			emulate_gp(ctxt, 0);  			return X86EMUL_PROPAGATE_FAULT;  		}  		change_mask |= EFLG_IF; @@ -1557,15 +1704,14 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt,  	return rc;  } -static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, int seg) +static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, +			      struct x86_emulate_ops *ops, int seg)  {  	struct decode_cache *c = &ctxt->decode; -	struct kvm_segment segment; -	kvm_x86_ops->get_segment(ctxt->vcpu, &segment, seg); +	c->src.val = ops->get_segment_selector(seg, ctxt->vcpu); -	c->src.val = segment.selector; -	emulate_push(ctxt); +	emulate_push(ctxt, ops);  }  static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, @@ -1583,19 +1729,31 @@ static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt,  	return rc;  } -static void emulate_pusha(struct x86_emulate_ctxt *ctxt) +static int emulate_pusha(struct x86_emulate_ctxt *ctxt, +			  struct x86_emulate_ops *ops)  {  	struct decode_cache *c = &ctxt->decode;  	unsigned long old_esp = c->regs[VCPU_REGS_RSP]; +	int rc = X86EMUL_CONTINUE;  	int reg = VCPU_REGS_RAX;  	while (reg <= VCPU_REGS_RDI) {  		(reg == VCPU_REGS_RSP) ?  		(c->src.val = old_esp) : (c->src.val = c->regs[reg]); -		emulate_push(ctxt); +		emulate_push(ctxt, ops); + +		rc = writeback(ctxt, ops); +		if (rc != X86EMUL_CONTINUE) +			return rc; +  		++reg;  	} + +	/* Disable writeback. */ +	c->dst.type = OP_NONE; + +	return rc;  }  static int emulate_popa(struct x86_emulate_ctxt *ctxt, @@ -1695,14 +1853,14 @@ static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt,  		old_eip = c->eip;  		c->eip = c->src.val;  		c->src.val = old_eip; -		emulate_push(ctxt); +		emulate_push(ctxt, ops);  		break;  	}  	case 4: /* jmp abs */  		c->eip = c->src.val;  		break;  	case 6:	/* push */ -		emulate_push(ctxt); +		emulate_push(ctxt, ops);  		break;  	}  	return X86EMUL_CONTINUE; @@ -1748,145 +1906,82 @@ static int emulate_ret_far(struct x86_emulate_ctxt *ctxt,  	return rc;  } -static inline int writeback(struct x86_emulate_ctxt *ctxt, -			    struct x86_emulate_ops *ops) -{ -	int rc; -	struct decode_cache *c = &ctxt->decode; - -	switch (c->dst.type) { -	case OP_REG: -		/* The 4-byte case *is* correct: -		 * in 64-bit mode we zero-extend. -		 */ -		switch (c->dst.bytes) { -		case 1: -			*(u8 *)c->dst.ptr = (u8)c->dst.val; -			break; -		case 2: -			*(u16 *)c->dst.ptr = (u16)c->dst.val; -			break; -		case 4: -			*c->dst.ptr = (u32)c->dst.val; -			break;	/* 64b: zero-ext */ -		case 8: -			*c->dst.ptr = c->dst.val; -			break; -		} -		break; -	case OP_MEM: -		if (c->lock_prefix) -			rc = ops->cmpxchg_emulated( -					(unsigned long)c->dst.ptr, -					&c->dst.orig_val, -					&c->dst.val, -					c->dst.bytes, -					ctxt->vcpu); -		else -			rc = ops->write_emulated( -					(unsigned long)c->dst.ptr, -					&c->dst.val, -					c->dst.bytes, -					ctxt->vcpu); -		if (rc != X86EMUL_CONTINUE) -			return rc; -		break; -	case OP_NONE: -		/* no writeback */ -		break; -	default: -		break; -	} -	return X86EMUL_CONTINUE; -} - -static void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask) -{ -	u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(ctxt->vcpu, mask); -	/* -	 * an sti; sti; sequence only disable interrupts for the first -	 * instruction. So, if the last instruction, be it emulated or -	 * not, left the system with the INT_STI flag enabled, it -	 * means that the last instruction is an sti. We should not -	 * leave the flag on in this case. The same goes for mov ss -	 */ -	if (!(int_shadow & mask)) -		ctxt->interruptibility = mask; -} -  static inline void  setup_syscalls_segments(struct x86_emulate_ctxt *ctxt, -	struct kvm_segment *cs, struct kvm_segment *ss) +			struct x86_emulate_ops *ops, struct desc_struct *cs, +			struct desc_struct *ss)  { -	memset(cs, 0, sizeof(struct kvm_segment)); -	kvm_x86_ops->get_segment(ctxt->vcpu, cs, VCPU_SREG_CS); -	memset(ss, 0, sizeof(struct kvm_segment)); +	memset(cs, 0, sizeof(struct desc_struct)); +	ops->get_cached_descriptor(cs, VCPU_SREG_CS, ctxt->vcpu); +	memset(ss, 0, sizeof(struct desc_struct));  	cs->l = 0;		/* will be adjusted later */ -	cs->base = 0;		/* flat segment */ +	set_desc_base(cs, 0);	/* flat segment */  	cs->g = 1;		/* 4kb granularity */ -	cs->limit = 0xffffffff;	/* 4GB limit */ +	set_desc_limit(cs, 0xfffff);	/* 4GB limit */  	cs->type = 0x0b;	/* Read, Execute, Accessed */  	cs->s = 1;  	cs->dpl = 0;		/* will be adjusted later */ -	cs->present = 1; -	cs->db = 1; +	cs->p = 1; +	cs->d = 1; -	ss->unusable = 0; -	ss->base = 0;		/* flat segment */ -	ss->limit = 0xffffffff;	/* 4GB limit */ +	set_desc_base(ss, 0);	/* flat segment */ +	set_desc_limit(ss, 0xfffff);	/* 4GB limit */  	ss->g = 1;		/* 4kb granularity */  	ss->s = 1;  	ss->type = 0x03;	/* Read/Write, Accessed */ -	ss->db = 1;		/* 32bit stack segment */ +	ss->d = 1;		/* 32bit stack segment */  	ss->dpl = 0; -	ss->present = 1; +	ss->p = 1;  }  static int -emulate_syscall(struct x86_emulate_ctxt *ctxt) +emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)  {  	struct decode_cache *c = &ctxt->decode; -	struct kvm_segment cs, ss; +	struct desc_struct cs, ss;  	u64 msr_data; +	u16 cs_sel, ss_sel;  	/* syscall is not available in real mode */  	if (ctxt->mode == X86EMUL_MODE_REAL ||  	    ctxt->mode == X86EMUL_MODE_VM86) { -		kvm_queue_exception(ctxt->vcpu, UD_VECTOR); +		emulate_ud(ctxt);  		return X86EMUL_PROPAGATE_FAULT;  	} -	setup_syscalls_segments(ctxt, &cs, &ss); +	setup_syscalls_segments(ctxt, ops, &cs, &ss); -	kvm_x86_ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data); +	ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);  	msr_data >>= 32; -	cs.selector = (u16)(msr_data & 0xfffc); -	ss.selector = (u16)(msr_data + 8); +	cs_sel = (u16)(msr_data & 0xfffc); +	ss_sel = (u16)(msr_data + 8);  	if (is_long_mode(ctxt->vcpu)) { -		cs.db = 0; +		cs.d = 0;  		cs.l = 1;  	} -	kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS); -	kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS); +	ops->set_cached_descriptor(&cs, VCPU_SREG_CS, ctxt->vcpu); +	ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); +	ops->set_cached_descriptor(&ss, VCPU_SREG_SS, ctxt->vcpu); +	ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu);  	c->regs[VCPU_REGS_RCX] = c->eip;  	if (is_long_mode(ctxt->vcpu)) {  #ifdef CONFIG_X86_64  		c->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF; -		kvm_x86_ops->get_msr(ctxt->vcpu, -			ctxt->mode == X86EMUL_MODE_PROT64 ? -			MSR_LSTAR : MSR_CSTAR, &msr_data); +		ops->get_msr(ctxt->vcpu, +			     ctxt->mode == X86EMUL_MODE_PROT64 ? +			     MSR_LSTAR : MSR_CSTAR, &msr_data);  		c->eip = msr_data; -		kvm_x86_ops->get_msr(ctxt->vcpu, MSR_SYSCALL_MASK, &msr_data); +		ops->get_msr(ctxt->vcpu, MSR_SYSCALL_MASK, &msr_data);  		ctxt->eflags &= ~(msr_data | EFLG_RF);  #endif  	} else {  		/* legacy mode */ -		kvm_x86_ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data); +		ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);  		c->eip = (u32)msr_data;  		ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); @@ -1896,15 +1991,16 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt)  }  static int -emulate_sysenter(struct x86_emulate_ctxt *ctxt) +emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)  {  	struct decode_cache *c = &ctxt->decode; -	struct kvm_segment cs, ss; +	struct desc_struct cs, ss;  	u64 msr_data; +	u16 cs_sel, ss_sel;  	/* inject #GP if in real mode */  	if (ctxt->mode == X86EMUL_MODE_REAL) { -		kvm_inject_gp(ctxt->vcpu, 0); +		emulate_gp(ctxt, 0);  		return X86EMUL_PROPAGATE_FAULT;  	} @@ -1912,67 +2008,70 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt)  	* Therefore, we inject an #UD.  	*/  	if (ctxt->mode == X86EMUL_MODE_PROT64) { -		kvm_queue_exception(ctxt->vcpu, UD_VECTOR); +		emulate_ud(ctxt);  		return X86EMUL_PROPAGATE_FAULT;  	} -	setup_syscalls_segments(ctxt, &cs, &ss); +	setup_syscalls_segments(ctxt, ops, &cs, &ss); -	kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data); +	ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);  	switch (ctxt->mode) {  	case X86EMUL_MODE_PROT32:  		if ((msr_data & 0xfffc) == 0x0) { -			kvm_inject_gp(ctxt->vcpu, 0); +			emulate_gp(ctxt, 0);  			return X86EMUL_PROPAGATE_FAULT;  		}  		break;  	case X86EMUL_MODE_PROT64:  		if (msr_data == 0x0) { -			kvm_inject_gp(ctxt->vcpu, 0); +			emulate_gp(ctxt, 0);  			return X86EMUL_PROPAGATE_FAULT;  		}  		break;  	}  	ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); -	cs.selector = (u16)msr_data; -	cs.selector &= ~SELECTOR_RPL_MASK; -	ss.selector = cs.selector + 8; -	ss.selector &= ~SELECTOR_RPL_MASK; +	cs_sel = (u16)msr_data; +	cs_sel &= ~SELECTOR_RPL_MASK; +	ss_sel = cs_sel + 8; +	ss_sel &= ~SELECTOR_RPL_MASK;  	if (ctxt->mode == X86EMUL_MODE_PROT64  		|| is_long_mode(ctxt->vcpu)) { -		cs.db = 0; +		cs.d = 0;  		cs.l = 1;  	} -	kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS); -	kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS); +	ops->set_cached_descriptor(&cs, VCPU_SREG_CS, ctxt->vcpu); +	ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); +	ops->set_cached_descriptor(&ss, VCPU_SREG_SS, ctxt->vcpu); +	ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); -	kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data); +	ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data);  	c->eip = msr_data; -	kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data); +	ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data);  	c->regs[VCPU_REGS_RSP] = msr_data;  	return X86EMUL_CONTINUE;  }  static int -emulate_sysexit(struct x86_emulate_ctxt *ctxt) +emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)  {  	struct decode_cache *c = &ctxt->decode; -	struct kvm_segment cs, ss; +	struct desc_struct cs, ss;  	u64 msr_data;  	int usermode; +	u16 cs_sel, ss_sel;  	/* inject #GP if in real mode or Virtual 8086 mode */  	if (ctxt->mode == X86EMUL_MODE_REAL ||  	    ctxt->mode == X86EMUL_MODE_VM86) { -		kvm_inject_gp(ctxt->vcpu, 0); +		emulate_gp(ctxt, 0);  		return X86EMUL_PROPAGATE_FAULT;  	} -	setup_syscalls_segments(ctxt, &cs, &ss); +	setup_syscalls_segments(ctxt, ops, &cs, &ss);  	if ((c->rex_prefix & 0x8) != 0x0)  		usermode = X86EMUL_MODE_PROT64; @@ -1981,35 +2080,37 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt)  	cs.dpl = 3;  	ss.dpl = 3; -	kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data); +	ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);  	switch (usermode) {  	case X86EMUL_MODE_PROT32: -		cs.selector = (u16)(msr_data + 16); +		cs_sel = (u16)(msr_data + 16);  		if ((msr_data & 0xfffc) == 0x0) { -			kvm_inject_gp(ctxt->vcpu, 0); +			emulate_gp(ctxt, 0);  			return X86EMUL_PROPAGATE_FAULT;  		} -		ss.selector = (u16)(msr_data + 24); +		ss_sel = (u16)(msr_data + 24);  		break;  	case X86EMUL_MODE_PROT64: -		cs.selector = (u16)(msr_data + 32); +		cs_sel = (u16)(msr_data + 32);  		if (msr_data == 0x0) { -			kvm_inject_gp(ctxt->vcpu, 0); +			emulate_gp(ctxt, 0);  			return X86EMUL_PROPAGATE_FAULT;  		} -		ss.selector = cs.selector + 8; -		cs.db = 0; +		ss_sel = cs_sel + 8; +		cs.d = 0;  		cs.l = 1;  		break;  	} -	cs.selector |= SELECTOR_RPL_MASK; -	ss.selector |= SELECTOR_RPL_MASK; +	cs_sel |= SELECTOR_RPL_MASK; +	ss_sel |= SELECTOR_RPL_MASK; -	kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS); -	kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS); +	ops->set_cached_descriptor(&cs, VCPU_SREG_CS, ctxt->vcpu); +	ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); +	ops->set_cached_descriptor(&ss, VCPU_SREG_SS, ctxt->vcpu); +	ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); -	c->eip = ctxt->vcpu->arch.regs[VCPU_REGS_RDX]; -	c->regs[VCPU_REGS_RSP] = ctxt->vcpu->arch.regs[VCPU_REGS_RCX]; +	c->eip = c->regs[VCPU_REGS_RDX]; +	c->regs[VCPU_REGS_RSP] = c->regs[VCPU_REGS_RCX];  	return X86EMUL_CONTINUE;  } @@ -2030,25 +2131,25 @@ static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,  					    struct x86_emulate_ops *ops,  					    u16 port, u16 len)  { -	struct kvm_segment tr_seg; +	struct desc_struct tr_seg;  	int r;  	u16 io_bitmap_ptr;  	u8 perm, bit_idx = port & 0x7;  	unsigned mask = (1 << len) - 1; -	kvm_get_segment(ctxt->vcpu, &tr_seg, VCPU_SREG_TR); -	if (tr_seg.unusable) +	ops->get_cached_descriptor(&tr_seg, VCPU_SREG_TR, ctxt->vcpu); +	if (!tr_seg.p)  		return false; -	if (tr_seg.limit < 103) +	if (desc_limit_scaled(&tr_seg) < 103)  		return false; -	r = ops->read_std(tr_seg.base + 102, &io_bitmap_ptr, 2, ctxt->vcpu, -			  NULL); +	r = ops->read_std(get_desc_base(&tr_seg) + 102, &io_bitmap_ptr, 2, +			  ctxt->vcpu, NULL);  	if (r != X86EMUL_CONTINUE)  		return false; -	if (io_bitmap_ptr + port/8 > tr_seg.limit) +	if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg))  		return false; -	r = ops->read_std(tr_seg.base + io_bitmap_ptr + port/8, &perm, 1, -			  ctxt->vcpu, NULL); +	r = ops->read_std(get_desc_base(&tr_seg) + io_bitmap_ptr + port/8, +			  &perm, 1, ctxt->vcpu, NULL);  	if (r != X86EMUL_CONTINUE)  		return false;  	if ((perm >> bit_idx) & mask) @@ -2066,17 +2167,6 @@ static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt,  	return true;  } -static u32 get_cached_descriptor_base(struct x86_emulate_ctxt *ctxt, -				      struct x86_emulate_ops *ops, -				      int seg) -{ -	struct desc_struct desc; -	if (ops->get_cached_descriptor(&desc, seg, ctxt->vcpu)) -		return get_desc_base(&desc); -	else -		return ~0; -} -  static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt,  				struct x86_emulate_ops *ops,  				struct tss_segment_16 *tss) @@ -2165,7 +2255,7 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt,  			    &err);  	if (ret == X86EMUL_PROPAGATE_FAULT) {  		/* FIXME: need to provide precise fault address */ -		kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); +		emulate_pf(ctxt, old_tss_base, err);  		return ret;  	} @@ -2175,7 +2265,7 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt,  			     &err);  	if (ret == X86EMUL_PROPAGATE_FAULT) {  		/* FIXME: need to provide precise fault address */ -		kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); +		emulate_pf(ctxt, old_tss_base, err);  		return ret;  	} @@ -2183,7 +2273,7 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt,  			    &err);  	if (ret == X86EMUL_PROPAGATE_FAULT) {  		/* FIXME: need to provide precise fault address */ -		kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); +		emulate_pf(ctxt, new_tss_base, err);  		return ret;  	} @@ -2196,7 +2286,7 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt,  				     ctxt->vcpu, &err);  		if (ret == X86EMUL_PROPAGATE_FAULT) {  			/* FIXME: need to provide precise fault address */ -			kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); +			emulate_pf(ctxt, new_tss_base, err);  			return ret;  		}  	} @@ -2238,7 +2328,10 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,  	struct decode_cache *c = &ctxt->decode;  	int ret; -	ops->set_cr(3, tss->cr3, ctxt->vcpu); +	if (ops->set_cr(3, tss->cr3, ctxt->vcpu)) { +		emulate_gp(ctxt, 0); +		return X86EMUL_PROPAGATE_FAULT; +	}  	c->eip = tss->eip;  	ctxt->eflags = tss->eflags | 2;  	c->regs[VCPU_REGS_RAX] = tss->eax; @@ -2304,7 +2397,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt,  			    &err);  	if (ret == X86EMUL_PROPAGATE_FAULT) {  		/* FIXME: need to provide precise fault address */ -		kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); +		emulate_pf(ctxt, old_tss_base, err);  		return ret;  	} @@ -2314,7 +2407,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt,  			     &err);  	if (ret == X86EMUL_PROPAGATE_FAULT) {  		/* FIXME: need to provide precise fault address */ -		kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); +		emulate_pf(ctxt, old_tss_base, err);  		return ret;  	} @@ -2322,7 +2415,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt,  			    &err);  	if (ret == X86EMUL_PROPAGATE_FAULT) {  		/* FIXME: need to provide precise fault address */ -		kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); +		emulate_pf(ctxt, new_tss_base, err);  		return ret;  	} @@ -2335,7 +2428,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt,  				     ctxt->vcpu, &err);  		if (ret == X86EMUL_PROPAGATE_FAULT) {  			/* FIXME: need to provide precise fault address */ -			kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); +			emulate_pf(ctxt, new_tss_base, err);  			return ret;  		}  	} @@ -2352,7 +2445,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,  	int ret;  	u16 old_tss_sel = ops->get_segment_selector(VCPU_SREG_TR, ctxt->vcpu);  	ulong old_tss_base = -		get_cached_descriptor_base(ctxt, ops, VCPU_SREG_TR); +		ops->get_cached_segment_base(VCPU_SREG_TR, ctxt->vcpu);  	u32 desc_limit;  	/* FIXME: old_tss_base == ~0 ? */ @@ -2369,7 +2462,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,  	if (reason != TASK_SWITCH_IRET) {  		if ((tss_selector & 3) > next_tss_desc.dpl ||  		    ops->cpl(ctxt->vcpu) > next_tss_desc.dpl) { -			kvm_inject_gp(ctxt->vcpu, 0); +			emulate_gp(ctxt, 0);  			return X86EMUL_PROPAGATE_FAULT;  		}  	} @@ -2378,8 +2471,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,  	if (!next_tss_desc.p ||  	    ((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||  	     desc_limit < 0x2b)) { -		kvm_queue_exception_e(ctxt->vcpu, TS_VECTOR, -				      tss_selector & 0xfffc); +		emulate_ts(ctxt, tss_selector & 0xfffc);  		return X86EMUL_PROPAGATE_FAULT;  	} @@ -2425,7 +2517,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,  		c->op_bytes = c->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2;  		c->lock_prefix = 0;  		c->src.val = (unsigned long) error_code; -		emulate_push(ctxt); +		emulate_push(ctxt, ops);  	}  	return ret; @@ -2439,18 +2531,16 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt,  	struct decode_cache *c = &ctxt->decode;  	int rc; -	memset(c, 0, sizeof(struct decode_cache));  	c->eip = ctxt->eip; -	memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);  	c->dst.type = OP_NONE;  	rc = emulator_do_task_switch(ctxt, ops, tss_selector, reason,  				     has_error_code, error_code);  	if (rc == X86EMUL_CONTINUE) { -		memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs); -		kvm_rip_write(ctxt->vcpu, c->eip);  		rc = writeback(ctxt, ops); +		if (rc == X86EMUL_CONTINUE) +			ctxt->eip = c->eip;  	}  	return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; @@ -2474,29 +2564,22 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)  	int rc = X86EMUL_CONTINUE;  	int saved_dst_type = c->dst.type; -	ctxt->interruptibility = 0; - -	/* Shadow copy of register state. Committed on successful emulation. -	 * NOTE: we can copy them from vcpu as x86_decode_insn() doesn't -	 * modify them. -	 */ - -	memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); +	ctxt->decode.mem_read.pos = 0;  	if (ctxt->mode == X86EMUL_MODE_PROT64 && (c->d & No64)) { -		kvm_queue_exception(ctxt->vcpu, UD_VECTOR); +		emulate_ud(ctxt);  		goto done;  	}  	/* LOCK prefix is allowed only with some instructions */  	if (c->lock_prefix && (!(c->d & Lock) || c->dst.type != OP_MEM)) { -		kvm_queue_exception(ctxt->vcpu, UD_VECTOR); +		emulate_ud(ctxt);  		goto done;  	}  	/* Privileged instruction can be executed only in CPL=0 */  	if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) { -		kvm_inject_gp(ctxt->vcpu, 0); +		emulate_gp(ctxt, 0);  		goto done;  	} @@ -2506,7 +2589,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)  		if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) {  		string_done:  			ctxt->restart = false; -			kvm_rip_write(ctxt->vcpu, c->eip); +			ctxt->eip = c->eip;  			goto done;  		}  		/* The second termination condition only applies for REPE @@ -2529,20 +2612,16 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)  	}  	if (c->src.type == OP_MEM) { -		rc = ops->read_emulated((unsigned long)c->src.ptr, -					&c->src.val, -					c->src.bytes, -					ctxt->vcpu); +		rc = read_emulated(ctxt, ops, (unsigned long)c->src.ptr, +					c->src.valptr, c->src.bytes);  		if (rc != X86EMUL_CONTINUE)  			goto done;  		c->src.orig_val = c->src.val;  	}  	if (c->src2.type == OP_MEM) { -		rc = ops->read_emulated((unsigned long)c->src2.ptr, -					&c->src2.val, -					c->src2.bytes, -					ctxt->vcpu); +		rc = read_emulated(ctxt, ops, (unsigned long)c->src2.ptr, +					&c->src2.val, c->src2.bytes);  		if (rc != X86EMUL_CONTINUE)  			goto done;  	} @@ -2553,8 +2632,8 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)  	if ((c->dst.type == OP_MEM) && !(c->d & Mov)) {  		/* optimisation - avoid slow emulated read if Mov */ -		rc = ops->read_emulated((unsigned long)c->dst.ptr, &c->dst.val, -					c->dst.bytes, ctxt->vcpu); +		rc = read_emulated(ctxt, ops, (unsigned long)c->dst.ptr, +				   &c->dst.val, c->dst.bytes);  		if (rc != X86EMUL_CONTINUE)  			goto done;  	} @@ -2571,7 +2650,7 @@ special_insn:  		emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags);  		break;  	case 0x06:		/* push es */ -		emulate_push_sreg(ctxt, VCPU_SREG_ES); +		emulate_push_sreg(ctxt, ops, VCPU_SREG_ES);  		break;  	case 0x07:		/* pop es */  		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES); @@ -2583,14 +2662,14 @@ special_insn:  		emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags);  		break;  	case 0x0e:		/* push cs */ -		emulate_push_sreg(ctxt, VCPU_SREG_CS); +		emulate_push_sreg(ctxt, ops, VCPU_SREG_CS);  		break;  	case 0x10 ... 0x15:  	      adc:		/* adc */  		emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags);  		break;  	case 0x16:		/* push ss */ -		emulate_push_sreg(ctxt, VCPU_SREG_SS); +		emulate_push_sreg(ctxt, ops, VCPU_SREG_SS);  		break;  	case 0x17:		/* pop ss */  		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS); @@ -2602,7 +2681,7 @@ special_insn:  		emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags);  		break;  	case 0x1e:		/* push ds */ -		emulate_push_sreg(ctxt, VCPU_SREG_DS); +		emulate_push_sreg(ctxt, ops, VCPU_SREG_DS);  		break;  	case 0x1f:		/* pop ds */  		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS); @@ -2632,7 +2711,7 @@ special_insn:  		emulate_1op("dec", c->dst, ctxt->eflags);  		break;  	case 0x50 ... 0x57:  /* push reg */ -		emulate_push(ctxt); +		emulate_push(ctxt, ops);  		break;  	case 0x58 ... 0x5f: /* pop reg */  	pop_instruction: @@ -2641,7 +2720,9 @@ special_insn:  			goto done;  		break;  	case 0x60:	/* pusha */ -		emulate_pusha(ctxt); +		rc = emulate_pusha(ctxt, ops); +		if (rc != X86EMUL_CONTINUE) +			goto done;  		break;  	case 0x61:	/* popa */  		rc = emulate_popa(ctxt, ops); @@ -2655,14 +2736,14 @@ special_insn:  		break;  	case 0x68: /* push imm */  	case 0x6a: /* push imm8 */ -		emulate_push(ctxt); +		emulate_push(ctxt, ops);  		break;  	case 0x6c:		/* insb */  	case 0x6d:		/* insw/insd */  		c->dst.bytes = min(c->dst.bytes, 4u);  		if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX],  					  c->dst.bytes)) { -			kvm_inject_gp(ctxt->vcpu, 0); +			emulate_gp(ctxt, 0);  			goto done;  		}  		if (!pio_in_emulated(ctxt, ops, c->dst.bytes, @@ -2674,7 +2755,7 @@ special_insn:  		c->src.bytes = min(c->src.bytes, 4u);  		if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX],  					  c->src.bytes)) { -			kvm_inject_gp(ctxt->vcpu, 0); +			emulate_gp(ctxt, 0);  			goto done;  		}  		ops->pio_out_emulated(c->src.bytes, c->regs[VCPU_REGS_RDX], @@ -2707,6 +2788,7 @@ special_insn:  		}  		break;  	case 0x84 ... 0x85: +	test:  		emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags);  		break;  	case 0x86 ... 0x87:	/* xchg */ @@ -2735,18 +2817,13 @@ special_insn:  		break;  	case 0x88 ... 0x8b:	/* mov */  		goto mov; -	case 0x8c: { /* mov r/m, sreg */ -		struct kvm_segment segreg; - -		if (c->modrm_reg <= VCPU_SREG_GS) -			kvm_get_segment(ctxt->vcpu, &segreg, c->modrm_reg); -		else { -			kvm_queue_exception(ctxt->vcpu, UD_VECTOR); +	case 0x8c:  /* mov r/m, sreg */ +		if (c->modrm_reg > VCPU_SREG_GS) { +			emulate_ud(ctxt);  			goto done;  		} -		c->dst.val = segreg.selector; +		c->dst.val = ops->get_segment_selector(c->modrm_reg, ctxt->vcpu);  		break; -	}  	case 0x8d: /* lea r16/r32, m */  		c->dst.val = c->modrm_ea;  		break; @@ -2757,12 +2834,12 @@ special_insn:  		if (c->modrm_reg == VCPU_SREG_CS ||  		    c->modrm_reg > VCPU_SREG_GS) { -			kvm_queue_exception(ctxt->vcpu, UD_VECTOR); +			emulate_ud(ctxt);  			goto done;  		}  		if (c->modrm_reg == VCPU_SREG_SS) -			toggle_interruptibility(ctxt, KVM_X86_SHADOW_INT_MOV_SS); +			ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;  		rc = load_segment_descriptor(ctxt, ops, sel, c->modrm_reg); @@ -2775,19 +2852,19 @@ special_insn:  			goto done;  		break;  	case 0x90: /* nop / xchg r8,rax */ -		if (!(c->rex_prefix & 1)) { /* nop */ -			c->dst.type = OP_NONE; +		if (c->dst.ptr == (unsigned long *)&c->regs[VCPU_REGS_RAX]) { +			c->dst.type = OP_NONE;  /* nop */  			break;  		}  	case 0x91 ... 0x97: /* xchg reg,rax */ -		c->src.type = c->dst.type = OP_REG; -		c->src.bytes = c->dst.bytes = c->op_bytes; +		c->src.type = OP_REG; +		c->src.bytes = c->op_bytes;  		c->src.ptr = (unsigned long *) &c->regs[VCPU_REGS_RAX];  		c->src.val = *(c->src.ptr);  		goto xchg;  	case 0x9c: /* pushf */  		c->src.val =  (unsigned long) ctxt->eflags; -		emulate_push(ctxt); +		emulate_push(ctxt, ops);  		break;  	case 0x9d: /* popf */  		c->dst.type = OP_REG; @@ -2797,19 +2874,15 @@ special_insn:  		if (rc != X86EMUL_CONTINUE)  			goto done;  		break; -	case 0xa0 ... 0xa1:	/* mov */ -		c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; -		c->dst.val = c->src.val; -		break; -	case 0xa2 ... 0xa3:	/* mov */ -		c->dst.val = (unsigned long)c->regs[VCPU_REGS_RAX]; -		break; +	case 0xa0 ... 0xa3:	/* mov */  	case 0xa4 ... 0xa5:	/* movs */  		goto mov;  	case 0xa6 ... 0xa7:	/* cmps */  		c->dst.type = OP_NONE; /* Disable writeback. */  		DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr);  		goto cmp; +	case 0xa8 ... 0xa9:	/* test ax, imm */ +		goto test;  	case 0xaa ... 0xab:	/* stos */  		c->dst.val = c->regs[VCPU_REGS_RAX];  		break; @@ -2855,19 +2928,23 @@ special_insn:  		long int rel = c->src.val;  		c->src.val = (unsigned long) c->eip;  		jmp_rel(c, rel); -		emulate_push(ctxt); +		emulate_push(ctxt, ops);  		break;  	}  	case 0xe9: /* jmp rel */  		goto jmp; -	case 0xea: /* jmp far */ +	case 0xea: { /* jmp far */ +		unsigned short sel;  	jump_far: -		if (load_segment_descriptor(ctxt, ops, c->src2.val, -					    VCPU_SREG_CS)) +		memcpy(&sel, c->src.valptr + c->op_bytes, 2); + +		if (load_segment_descriptor(ctxt, ops, sel, VCPU_SREG_CS))  			goto done; -		c->eip = c->src.val; +		c->eip = 0; +		memcpy(&c->eip, c->src.valptr, c->op_bytes);  		break; +	}  	case 0xeb:  	      jmp:		/* jmp rel short */  		jmp_rel(c, c->src.val); @@ -2879,20 +2956,20 @@ special_insn:  	do_io_in:  		c->dst.bytes = min(c->dst.bytes, 4u);  		if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { -			kvm_inject_gp(ctxt->vcpu, 0); +			emulate_gp(ctxt, 0);  			goto done;  		}  		if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val,  				     &c->dst.val))  			goto done; /* IO is needed */  		break; -	case 0xee: /* out al,dx */ -	case 0xef: /* out (e/r)ax,dx */ +	case 0xee: /* out dx,al */ +	case 0xef: /* out dx,(e/r)ax */  		c->src.val = c->regs[VCPU_REGS_RDX];  	do_io_out:  		c->dst.bytes = min(c->dst.bytes, 4u);  		if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { -			kvm_inject_gp(ctxt->vcpu, 0); +			emulate_gp(ctxt, 0);  			goto done;  		}  		ops->pio_out_emulated(c->dst.bytes, c->src.val, &c->dst.val, 1, @@ -2916,18 +2993,20 @@ special_insn:  		c->dst.type = OP_NONE;	/* Disable writeback. */  		break;  	case 0xfa: /* cli */ -		if (emulator_bad_iopl(ctxt, ops)) -			kvm_inject_gp(ctxt->vcpu, 0); -		else { +		if (emulator_bad_iopl(ctxt, ops)) { +			emulate_gp(ctxt, 0); +			goto done; +		} else {  			ctxt->eflags &= ~X86_EFLAGS_IF;  			c->dst.type = OP_NONE;	/* Disable writeback. */  		}  		break;  	case 0xfb: /* sti */ -		if (emulator_bad_iopl(ctxt, ops)) -			kvm_inject_gp(ctxt->vcpu, 0); -		else { -			toggle_interruptibility(ctxt, KVM_X86_SHADOW_INT_STI); +		if (emulator_bad_iopl(ctxt, ops)) { +			emulate_gp(ctxt, 0); +			goto done; +		} else { +			ctxt->interruptibility = KVM_X86_SHADOW_INT_STI;  			ctxt->eflags |= X86_EFLAGS_IF;  			c->dst.type = OP_NONE;	/* Disable writeback. */  		} @@ -2964,11 +3043,12 @@ writeback:  	c->dst.type = saved_dst_type;  	if ((c->d & SrcMask) == SrcSI) -		string_addr_inc(ctxt, seg_override_base(ctxt, c), VCPU_REGS_RSI, -				&c->src); +		string_addr_inc(ctxt, seg_override_base(ctxt, ops, c), +				VCPU_REGS_RSI, &c->src);  	if ((c->d & DstMask) == DstDI) -		string_addr_inc(ctxt, es_base(ctxt), VCPU_REGS_RDI, &c->dst); +		string_addr_inc(ctxt, es_base(ctxt, ops), VCPU_REGS_RDI, +				&c->dst);  	if (c->rep_prefix && (c->d & String)) {  		struct read_cache *rc = &ctxt->decode.io_read; @@ -2981,11 +3061,12 @@ writeback:  		    (rc->end != 0 && rc->end == rc->pos))  			ctxt->restart = false;  	} - -	/* Commit shadow register state. */ -	memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs); -	kvm_rip_write(ctxt->vcpu, c->eip); -	ops->set_rflags(ctxt->vcpu, ctxt->eflags); +	/* +	 * reset read cache here in case string instruction is restared +	 * without decoding +	 */ +	ctxt->decode.mem_read.end = 0; +	ctxt->eip = c->eip;  done:  	return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; @@ -3051,7 +3132,7 @@ twobyte_insn:  			c->dst.type = OP_NONE;  			break;  		case 5: /* not defined */ -			kvm_queue_exception(ctxt->vcpu, UD_VECTOR); +			emulate_ud(ctxt);  			goto done;  		case 7: /* invlpg*/  			emulate_invlpg(ctxt->vcpu, c->modrm_ea); @@ -3063,7 +3144,7 @@ twobyte_insn:  		}  		break;  	case 0x05: 		/* syscall */ -		rc = emulate_syscall(ctxt); +		rc = emulate_syscall(ctxt, ops);  		if (rc != X86EMUL_CONTINUE)  			goto done;  		else @@ -3073,8 +3154,11 @@ twobyte_insn:  		emulate_clts(ctxt->vcpu);  		c->dst.type = OP_NONE;  		break; -	case 0x08:		/* invd */  	case 0x09:		/* wbinvd */ +		kvm_emulate_wbinvd(ctxt->vcpu); +		c->dst.type = OP_NONE; +		break; +	case 0x08:		/* invd */  	case 0x0d:		/* GrpP (prefetch) */  	case 0x18:		/* Grp16 (prefetch/nop) */  		c->dst.type = OP_NONE; @@ -3084,7 +3168,7 @@ twobyte_insn:  		case 1:  		case 5 ... 7:  		case 9 ... 15: -			kvm_queue_exception(ctxt->vcpu, UD_VECTOR); +			emulate_ud(ctxt);  			goto done;  		}  		c->regs[c->modrm_rm] = ops->get_cr(c->modrm_reg, ctxt->vcpu); @@ -3093,31 +3177,42 @@ twobyte_insn:  	case 0x21: /* mov from dr to reg */  		if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) &&  		    (c->modrm_reg == 4 || c->modrm_reg == 5)) { -			kvm_queue_exception(ctxt->vcpu, UD_VECTOR); +			emulate_ud(ctxt);  			goto done;  		} -		emulator_get_dr(ctxt, c->modrm_reg, &c->regs[c->modrm_rm]); +		ops->get_dr(c->modrm_reg, &c->regs[c->modrm_rm], ctxt->vcpu);  		c->dst.type = OP_NONE;	/* no writeback */  		break;  	case 0x22: /* mov reg, cr */ -		ops->set_cr(c->modrm_reg, c->modrm_val, ctxt->vcpu); +		if (ops->set_cr(c->modrm_reg, c->modrm_val, ctxt->vcpu)) { +			emulate_gp(ctxt, 0); +			goto done; +		}  		c->dst.type = OP_NONE;  		break;  	case 0x23: /* mov from reg to dr */  		if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) &&  		    (c->modrm_reg == 4 || c->modrm_reg == 5)) { -			kvm_queue_exception(ctxt->vcpu, UD_VECTOR); +			emulate_ud(ctxt); +			goto done; +		} + +		if (ops->set_dr(c->modrm_reg, c->regs[c->modrm_rm] & +				((ctxt->mode == X86EMUL_MODE_PROT64) ? +				 ~0ULL : ~0U), ctxt->vcpu) < 0) { +			/* #UD condition is already handled by the code above */ +			emulate_gp(ctxt, 0);  			goto done;  		} -		emulator_set_dr(ctxt, c->modrm_reg, c->regs[c->modrm_rm]); +  		c->dst.type = OP_NONE;	/* no writeback */  		break;  	case 0x30:  		/* wrmsr */  		msr_data = (u32)c->regs[VCPU_REGS_RAX]  			| ((u64)c->regs[VCPU_REGS_RDX] << 32); -		if (kvm_set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) { -			kvm_inject_gp(ctxt->vcpu, 0); +		if (ops->set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) { +			emulate_gp(ctxt, 0);  			goto done;  		}  		rc = X86EMUL_CONTINUE; @@ -3125,8 +3220,8 @@ twobyte_insn:  		break;  	case 0x32:  		/* rdmsr */ -		if (kvm_get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) { -			kvm_inject_gp(ctxt->vcpu, 0); +		if (ops->get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) { +			emulate_gp(ctxt, 0);  			goto done;  		} else {  			c->regs[VCPU_REGS_RAX] = (u32)msr_data; @@ -3136,14 +3231,14 @@ twobyte_insn:  		c->dst.type = OP_NONE;  		break;  	case 0x34:		/* sysenter */ -		rc = emulate_sysenter(ctxt); +		rc = emulate_sysenter(ctxt, ops);  		if (rc != X86EMUL_CONTINUE)  			goto done;  		else  			goto writeback;  		break;  	case 0x35:		/* sysexit */ -		rc = emulate_sysexit(ctxt); +		rc = emulate_sysexit(ctxt, ops);  		if (rc != X86EMUL_CONTINUE)  			goto done;  		else @@ -3160,7 +3255,7 @@ twobyte_insn:  		c->dst.type = OP_NONE;  		break;  	case 0xa0:	  /* push fs */ -		emulate_push_sreg(ctxt, VCPU_SREG_FS); +		emulate_push_sreg(ctxt, ops, VCPU_SREG_FS);  		break;  	case 0xa1:	 /* pop fs */  		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS); @@ -3179,7 +3274,7 @@ twobyte_insn:  		emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags);  		break;  	case 0xa8:	/* push gs */ -		emulate_push_sreg(ctxt, VCPU_SREG_GS); +		emulate_push_sreg(ctxt, ops, VCPU_SREG_GS);  		break;  	case 0xa9:	/* pop gs */  		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS); diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 0150affad25d..0fd6378981f4 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -5,6 +5,7 @@   * Copyright (c) 2006 Intel Corporation   * Copyright (c) 2007 Keir Fraser, XenSource Inc   * Copyright (c) 2008 Intel Corporation + * Copyright 2009 Red Hat, Inc. and/or its affilates.   *   * Permission is hereby granted, free of charge, to any person obtaining a copy   * of this software and associated documentation files (the "Software"), to deal @@ -33,6 +34,7 @@  #include <linux/kvm_host.h>  #include <linux/slab.h> +#include <linux/workqueue.h>  #include "irq.h"  #include "i8254.h" @@ -243,11 +245,22 @@ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)  {  	struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state,  						 irq_ack_notifier); -	raw_spin_lock(&ps->inject_lock); -	if (atomic_dec_return(&ps->pit_timer.pending) < 0) +	int value; + +	spin_lock(&ps->inject_lock); +	value = atomic_dec_return(&ps->pit_timer.pending); +	if (value < 0) +		/* spurious acks can be generated if, for example, the +		 * PIC is being reset.  Handle it gracefully here +		 */  		atomic_inc(&ps->pit_timer.pending); +	else if (value > 0) +		/* in this case, we had multiple outstanding pit interrupts +		 * that we needed to inject.  Reinject +		 */ +		queue_work(ps->pit->wq, &ps->pit->expired);  	ps->irq_ack = 1; -	raw_spin_unlock(&ps->inject_lock); +	spin_unlock(&ps->inject_lock);  }  void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) @@ -263,10 +276,10 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)  		hrtimer_start_expires(timer, HRTIMER_MODE_ABS);  } -static void destroy_pit_timer(struct kvm_timer *pt) +static void destroy_pit_timer(struct kvm_pit *pit)  { -	pr_debug("execute del timer!\n"); -	hrtimer_cancel(&pt->timer); +	hrtimer_cancel(&pit->pit_state.pit_timer.timer); +	cancel_work_sync(&pit->expired);  }  static bool kpit_is_periodic(struct kvm_timer *ktimer) @@ -280,6 +293,60 @@ static struct kvm_timer_ops kpit_ops = {  	.is_periodic = kpit_is_periodic,  }; +static void pit_do_work(struct work_struct *work) +{ +	struct kvm_pit *pit = container_of(work, struct kvm_pit, expired); +	struct kvm *kvm = pit->kvm; +	struct kvm_vcpu *vcpu; +	int i; +	struct kvm_kpit_state *ps = &pit->pit_state; +	int inject = 0; + +	/* Try to inject pending interrupts when +	 * last one has been acked. +	 */ +	spin_lock(&ps->inject_lock); +	if (ps->irq_ack) { +		ps->irq_ack = 0; +		inject = 1; +	} +	spin_unlock(&ps->inject_lock); +	if (inject) { +		kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1); +		kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0); + +		/* +		 * Provides NMI watchdog support via Virtual Wire mode. +		 * The route is: PIT -> PIC -> LVT0 in NMI mode. +		 * +		 * Note: Our Virtual Wire implementation is simplified, only +		 * propagating PIT interrupts to all VCPUs when they have set +		 * LVT0 to NMI delivery. Other PIC interrupts are just sent to +		 * VCPU0, and only if its LVT0 is in EXTINT mode. +		 */ +		if (kvm->arch.vapics_in_nmi_mode > 0) +			kvm_for_each_vcpu(i, vcpu, kvm) +				kvm_apic_nmi_wd_deliver(vcpu); +	} +} + +static enum hrtimer_restart pit_timer_fn(struct hrtimer *data) +{ +	struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer); +	struct kvm_pit *pt = ktimer->kvm->arch.vpit; + +	if (ktimer->reinject || !atomic_read(&ktimer->pending)) { +		atomic_inc(&ktimer->pending); +		queue_work(pt->wq, &pt->expired); +	} + +	if (ktimer->t_ops->is_periodic(ktimer)) { +		hrtimer_add_expires_ns(&ktimer->timer, ktimer->period); +		return HRTIMER_RESTART; +	} else +		return HRTIMER_NORESTART; +} +  static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period)  {  	struct kvm_timer *pt = &ps->pit_timer; @@ -291,13 +358,13 @@ static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period)  	/* TODO The new value only affected after the retriggered */  	hrtimer_cancel(&pt->timer); +	cancel_work_sync(&ps->pit->expired);  	pt->period = interval;  	ps->is_periodic = is_period; -	pt->timer.function = kvm_timer_fn; +	pt->timer.function = pit_timer_fn;  	pt->t_ops = &kpit_ops;  	pt->kvm = ps->pit->kvm; -	pt->vcpu = pt->kvm->bsp_vcpu;  	atomic_set(&pt->pending, 0);  	ps->irq_ack = 1; @@ -346,7 +413,7 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val)  		}  		break;  	default: -		destroy_pit_timer(&ps->pit_timer); +		destroy_pit_timer(kvm->arch.vpit);  	}  } @@ -625,7 +692,15 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)  	mutex_init(&pit->pit_state.lock);  	mutex_lock(&pit->pit_state.lock); -	raw_spin_lock_init(&pit->pit_state.inject_lock); +	spin_lock_init(&pit->pit_state.inject_lock); + +	pit->wq = create_singlethread_workqueue("kvm-pit-wq"); +	if (!pit->wq) { +		mutex_unlock(&pit->pit_state.lock); +		kfree(pit); +		return NULL; +	} +	INIT_WORK(&pit->expired, pit_do_work);  	kvm->arch.vpit = pit;  	pit->kvm = kvm; @@ -677,6 +752,9 @@ void kvm_free_pit(struct kvm *kvm)  	struct hrtimer *timer;  	if (kvm->arch.vpit) { +		kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &kvm->arch.vpit->dev); +		kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, +					      &kvm->arch.vpit->speaker_dev);  		kvm_unregister_irq_mask_notifier(kvm, 0,  					       &kvm->arch.vpit->mask_notifier);  		kvm_unregister_irq_ack_notifier(kvm, @@ -684,54 +762,10 @@ void kvm_free_pit(struct kvm *kvm)  		mutex_lock(&kvm->arch.vpit->pit_state.lock);  		timer = &kvm->arch.vpit->pit_state.pit_timer.timer;  		hrtimer_cancel(timer); +		cancel_work_sync(&kvm->arch.vpit->expired);  		kvm_free_irq_source_id(kvm, kvm->arch.vpit->irq_source_id);  		mutex_unlock(&kvm->arch.vpit->pit_state.lock); +		destroy_workqueue(kvm->arch.vpit->wq);  		kfree(kvm->arch.vpit);  	}  } - -static void __inject_pit_timer_intr(struct kvm *kvm) -{ -	struct kvm_vcpu *vcpu; -	int i; - -	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1); -	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0); - -	/* -	 * Provides NMI watchdog support via Virtual Wire mode. -	 * The route is: PIT -> PIC -> LVT0 in NMI mode. -	 * -	 * Note: Our Virtual Wire implementation is simplified, only -	 * propagating PIT interrupts to all VCPUs when they have set -	 * LVT0 to NMI delivery. Other PIC interrupts are just sent to -	 * VCPU0, and only if its LVT0 is in EXTINT mode. -	 */ -	if (kvm->arch.vapics_in_nmi_mode > 0) -		kvm_for_each_vcpu(i, vcpu, kvm) -			kvm_apic_nmi_wd_deliver(vcpu); -} - -void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu) -{ -	struct kvm_pit *pit = vcpu->kvm->arch.vpit; -	struct kvm *kvm = vcpu->kvm; -	struct kvm_kpit_state *ps; - -	if (pit) { -		int inject = 0; -		ps = &pit->pit_state; - -		/* Try to inject pending interrupts when -		 * last one has been acked. -		 */ -		raw_spin_lock(&ps->inject_lock); -		if (atomic_read(&ps->pit_timer.pending) && ps->irq_ack) { -			ps->irq_ack = 0; -			inject = 1; -		} -		raw_spin_unlock(&ps->inject_lock); -		if (inject) -			__inject_pit_timer_intr(kvm); -	} -} diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h index 900d6b0ba7c2..46d08ca0b48f 100644 --- a/arch/x86/kvm/i8254.h +++ b/arch/x86/kvm/i8254.h @@ -27,7 +27,7 @@ struct kvm_kpit_state {  	u32    speaker_data_on;  	struct mutex lock;  	struct kvm_pit *pit; -	raw_spinlock_t inject_lock; +	spinlock_t inject_lock;  	unsigned long irq_ack;  	struct kvm_irq_ack_notifier irq_ack_notifier;  }; @@ -40,6 +40,8 @@ struct kvm_pit {  	struct kvm_kpit_state pit_state;  	int irq_source_id;  	struct kvm_irq_mask_notifier mask_notifier; +	struct workqueue_struct *wq; +	struct work_struct expired;  };  #define KVM_PIT_BASE_ADDRESS	    0x40 diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 93825ff3338f..8d10c063d7f2 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -3,6 +3,7 @@   *   * Copyright (c) 2003-2004 Fabrice Bellard   * Copyright (c) 2007 Intel Corporation + * Copyright 2009 Red Hat, Inc. and/or its affilates.   *   * Permission is hereby granted, free of charge, to any person obtaining a copy   * of this software and associated documentation files (the "Software"), to deal @@ -33,6 +34,8 @@  #include <linux/kvm_host.h>  #include "trace.h" +static void pic_irq_request(struct kvm *kvm, int level); +  static void pic_lock(struct kvm_pic *s)  	__acquires(&s->lock)  { @@ -43,16 +46,25 @@ static void pic_unlock(struct kvm_pic *s)  	__releases(&s->lock)  {  	bool wakeup = s->wakeup_needed; -	struct kvm_vcpu *vcpu; +	struct kvm_vcpu *vcpu, *found = NULL; +	int i;  	s->wakeup_needed = false;  	raw_spin_unlock(&s->lock);  	if (wakeup) { -		vcpu = s->kvm->bsp_vcpu; -		if (vcpu) -			kvm_vcpu_kick(vcpu); +		kvm_for_each_vcpu(i, vcpu, s->kvm) { +			if (kvm_apic_accept_pic_intr(vcpu)) { +				found = vcpu; +				break; +			} +		} + +		if (!found) +			found = s->kvm->bsp_vcpu; + +		kvm_vcpu_kick(found);  	}  } @@ -173,10 +185,7 @@ static void pic_update_irq(struct kvm_pic *s)  		pic_set_irq1(&s->pics[0], 2, 0);  	}  	irq = pic_get_irq(&s->pics[0]); -	if (irq >= 0) -		s->irq_request(s->irq_request_opaque, 1); -	else -		s->irq_request(s->irq_request_opaque, 0); +	pic_irq_request(s->kvm, irq >= 0);  }  void kvm_pic_update_irq(struct kvm_pic *s) @@ -261,8 +270,7 @@ int kvm_pic_read_irq(struct kvm *kvm)  void kvm_pic_reset(struct kvm_kpic_state *s)  {  	int irq; -	struct kvm *kvm = s->pics_state->irq_request_opaque; -	struct kvm_vcpu *vcpu0 = kvm->bsp_vcpu; +	struct kvm_vcpu *vcpu0 = s->pics_state->kvm->bsp_vcpu;  	u8 irr = s->irr, isr = s->imr;  	s->last_irr = 0; @@ -301,8 +309,7 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val)  			/*  			 * deassert a pending interrupt  			 */ -			s->pics_state->irq_request(s->pics_state-> -						   irq_request_opaque, 0); +			pic_irq_request(s->pics_state->kvm, 0);  			s->init_state = 1;  			s->init4 = val & 1;  			if (val & 0x02) @@ -356,10 +363,20 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val)  		}  	} else  		switch (s->init_state) { -		case 0:		/* normal mode */ +		case 0: { /* normal mode */ +			u8 imr_diff = s->imr ^ val, +				off = (s == &s->pics_state->pics[0]) ? 0 : 8;  			s->imr = val; +			for (irq = 0; irq < PIC_NUM_PINS/2; irq++) +				if (imr_diff & (1 << irq)) +					kvm_fire_mask_notifiers( +						s->pics_state->kvm, +						SELECT_PIC(irq + off), +						irq + off, +						!!(s->imr & (1 << irq)));  			pic_update_irq(s->pics_state);  			break; +		}  		case 1:  			s->irq_base = val & 0xf8;  			s->init_state = 2; @@ -518,9 +535,8 @@ static int picdev_read(struct kvm_io_device *this,  /*   * callback when PIC0 irq status changed   */ -static void pic_irq_request(void *opaque, int level) +static void pic_irq_request(struct kvm *kvm, int level)  { -	struct kvm *kvm = opaque;  	struct kvm_vcpu *vcpu = kvm->bsp_vcpu;  	struct kvm_pic *s = pic_irqchip(kvm);  	int irq = pic_get_irq(&s->pics[0]); @@ -549,8 +565,6 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)  	s->kvm = kvm;  	s->pics[0].elcr_mask = 0xf8;  	s->pics[1].elcr_mask = 0xde; -	s->irq_request = pic_irq_request; -	s->irq_request_opaque = kvm;  	s->pics[0].pics_state = s;  	s->pics[1].pics_state = s; diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index 96dfbb6ad2a9..2095a049835e 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -1,6 +1,7 @@  /*   * irq.c: API for in kernel interrupt controller   * Copyright (c) 2007, Intel Corporation. + * Copyright 2009 Red Hat, Inc. and/or its affilates.   *   * This program is free software; you can redistribute it and/or modify it   * under the terms and conditions of the GNU General Public License, @@ -89,7 +90,6 @@ EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt);  void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)  {  	kvm_inject_apic_timer_irqs(vcpu); -	kvm_inject_pit_timer_irqs(vcpu);  	/* TODO: PIT, RTC etc. */  }  EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs); diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index cd1f362f413d..ffed06871c5c 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -38,8 +38,6 @@  struct kvm;  struct kvm_vcpu; -typedef void irq_request_func(void *opaque, int level); -  struct kvm_kpic_state {  	u8 last_irr;	/* edge detection */  	u8 irr;		/* interrupt request register */ @@ -67,8 +65,6 @@ struct kvm_pic {  	unsigned pending_acks;  	struct kvm *kvm;  	struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ -	irq_request_func *irq_request; -	void *irq_request_opaque;  	int output;		/* intr from master PIC */  	struct kvm_io_device dev;  	void (*ack_notifier)(void *opaque, int irq); diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index cff851cf5322..6491ac8e755b 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h @@ -36,6 +36,8 @@ static inline void kvm_rip_write(struct kvm_vcpu *vcpu, unsigned long val)  static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index)  { +	might_sleep();  /* on svm */ +  	if (!test_bit(VCPU_EXREG_PDPTR,  		      (unsigned long *)&vcpu->arch.regs_avail))  		kvm_x86_ops->cache_reg(vcpu, VCPU_EXREG_PDPTR); @@ -69,4 +71,10 @@ static inline ulong kvm_read_cr4(struct kvm_vcpu *vcpu)  	return kvm_read_cr4_bits(vcpu, ~0UL);  } +static inline u64 kvm_read_edx_eax(struct kvm_vcpu *vcpu) +{ +	return (kvm_register_read(vcpu, VCPU_REGS_RAX) & -1u) +		| ((u64)(kvm_register_read(vcpu, VCPU_REGS_RDX) & -1u) << 32); +} +  #endif diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 1eb7a4ae0c9c..77d8c0f4817d 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -5,6 +5,7 @@   * Copyright (C) 2006 Qumranet, Inc.   * Copyright (C) 2007 Novell   * Copyright (C) 2007 Intel + * Copyright 2009 Red Hat, Inc. and/or its affilates.   *   * Authors:   *   Dor Laor <dor.laor@qumranet.com> @@ -328,7 +329,7 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,  		   "dest_mode 0x%x, short_hand 0x%x\n",  		   target, source, dest, dest_mode, short_hand); -	ASSERT(!target); +	ASSERT(target);  	switch (short_hand) {  	case APIC_DEST_NOSHORT:  		if (dest_mode == 0) @@ -533,7 +534,7 @@ static void __report_tpr_access(struct kvm_lapic *apic, bool write)  	struct kvm_vcpu *vcpu = apic->vcpu;  	struct kvm_run *run = vcpu->run; -	set_bit(KVM_REQ_REPORT_TPR_ACCESS, &vcpu->requests); +	kvm_make_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu);  	run->tpr_access.rip = kvm_rip_read(vcpu);  	run->tpr_access.is_write = write;  } @@ -1106,13 +1107,11 @@ int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)  	u32 lvt0 = apic_get_reg(vcpu->arch.apic, APIC_LVT0);  	int r = 0; -	if (kvm_vcpu_is_bsp(vcpu)) { -		if (!apic_hw_enabled(vcpu->arch.apic)) -			r = 1; -		if ((lvt0 & APIC_LVT_MASKED) == 0 && -		    GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT) -			r = 1; -	} +	if (!apic_hw_enabled(vcpu->arch.apic)) +		r = 1; +	if ((lvt0 & APIC_LVT_MASKED) == 0 && +	    GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT) +		r = 1;  	return r;  } diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index b1ed0a1a5913..311f6dad8951 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -7,6 +7,7 @@   * MMU support   *   * Copyright (C) 2006 Qumranet, Inc. + * Copyright 2010 Red Hat, Inc. and/or its affilates.   *   * Authors:   *   Yaniv Kamay  <yaniv@qumranet.com> @@ -32,6 +33,7 @@  #include <linux/compiler.h>  #include <linux/srcu.h>  #include <linux/slab.h> +#include <linux/uaccess.h>  #include <asm/page.h>  #include <asm/cmpxchg.h> @@ -90,8 +92,6 @@ module_param(oos_shadow, bool, 0644);  #define PT_FIRST_AVAIL_BITS_SHIFT 9  #define PT64_SECOND_AVAIL_BITS_SHIFT 52 -#define VALID_PAGE(x) ((x) != INVALID_PAGE) -  #define PT64_LEVEL_BITS 9  #define PT64_LEVEL_SHIFT(level) \ @@ -173,7 +173,7 @@ struct kvm_shadow_walk_iterator {  	     shadow_walk_okay(&(_walker));			\  	     shadow_walk_next(&(_walker))) -typedef int (*mmu_parent_walk_fn) (struct kvm_mmu_page *sp); +typedef void (*mmu_parent_walk_fn) (struct kvm_mmu_page *sp, u64 *spte);  static struct kmem_cache *pte_chain_cache;  static struct kmem_cache *rmap_desc_cache; @@ -281,13 +281,38 @@ static gfn_t pse36_gfn_delta(u32 gpte)  static void __set_spte(u64 *sptep, u64 spte)  { +	set_64bit(sptep, spte); +} + +static u64 __xchg_spte(u64 *sptep, u64 new_spte) +{  #ifdef CONFIG_X86_64 -	set_64bit((unsigned long *)sptep, spte); +	return xchg(sptep, new_spte);  #else -	set_64bit((unsigned long long *)sptep, spte); +	u64 old_spte; + +	do { +		old_spte = *sptep; +	} while (cmpxchg64(sptep, old_spte, new_spte) != old_spte); + +	return old_spte;  #endif  } +static void update_spte(u64 *sptep, u64 new_spte) +{ +	u64 old_spte; + +	if (!shadow_accessed_mask || (new_spte & shadow_accessed_mask) || +	      !is_rmap_spte(*sptep)) +		__set_spte(sptep, new_spte); +	else { +		old_spte = __xchg_spte(sptep, new_spte); +		if (old_spte & shadow_accessed_mask) +			mark_page_accessed(pfn_to_page(spte_to_pfn(old_spte))); +	} +} +  static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,  				  struct kmem_cache *base_cache, int min)  { @@ -304,10 +329,11 @@ static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,  	return 0;  } -static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc) +static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc, +				  struct kmem_cache *cache)  {  	while (mc->nobjs) -		kfree(mc->objects[--mc->nobjs]); +		kmem_cache_free(cache, mc->objects[--mc->nobjs]);  }  static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache, @@ -355,10 +381,11 @@ out:  static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)  { -	mmu_free_memory_cache(&vcpu->arch.mmu_pte_chain_cache); -	mmu_free_memory_cache(&vcpu->arch.mmu_rmap_desc_cache); +	mmu_free_memory_cache(&vcpu->arch.mmu_pte_chain_cache, pte_chain_cache); +	mmu_free_memory_cache(&vcpu->arch.mmu_rmap_desc_cache, rmap_desc_cache);  	mmu_free_memory_cache_page(&vcpu->arch.mmu_page_cache); -	mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache); +	mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache, +				mmu_page_header_cache);  }  static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc, @@ -379,7 +406,7 @@ static struct kvm_pte_chain *mmu_alloc_pte_chain(struct kvm_vcpu *vcpu)  static void mmu_free_pte_chain(struct kvm_pte_chain *pc)  { -	kfree(pc); +	kmem_cache_free(pte_chain_cache, pc);  }  static struct kvm_rmap_desc *mmu_alloc_rmap_desc(struct kvm_vcpu *vcpu) @@ -390,7 +417,23 @@ static struct kvm_rmap_desc *mmu_alloc_rmap_desc(struct kvm_vcpu *vcpu)  static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd)  { -	kfree(rd); +	kmem_cache_free(rmap_desc_cache, rd); +} + +static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index) +{ +	if (!sp->role.direct) +		return sp->gfns[index]; + +	return sp->gfn + (index << ((sp->role.level - 1) * PT64_LEVEL_BITS)); +} + +static void kvm_mmu_page_set_gfn(struct kvm_mmu_page *sp, int index, gfn_t gfn) +{ +	if (sp->role.direct) +		BUG_ON(gfn != kvm_mmu_page_get_gfn(sp, index)); +	else +		sp->gfns[index] = gfn;  }  /* @@ -403,8 +446,8 @@ static int *slot_largepage_idx(gfn_t gfn,  {  	unsigned long idx; -	idx = (gfn / KVM_PAGES_PER_HPAGE(level)) - -	      (slot->base_gfn / KVM_PAGES_PER_HPAGE(level)); +	idx = (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - +	      (slot->base_gfn >> KVM_HPAGE_GFN_SHIFT(level));  	return &slot->lpage_info[level - 2][idx].write_count;  } @@ -414,9 +457,7 @@ static void account_shadowed(struct kvm *kvm, gfn_t gfn)  	int *write_count;  	int i; -	gfn = unalias_gfn(kvm, gfn); - -	slot = gfn_to_memslot_unaliased(kvm, gfn); +	slot = gfn_to_memslot(kvm, gfn);  	for (i = PT_DIRECTORY_LEVEL;  	     i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {  		write_count   = slot_largepage_idx(gfn, slot, i); @@ -430,8 +471,7 @@ static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn)  	int *write_count;  	int i; -	gfn = unalias_gfn(kvm, gfn); -	slot = gfn_to_memslot_unaliased(kvm, gfn); +	slot = gfn_to_memslot(kvm, gfn);  	for (i = PT_DIRECTORY_LEVEL;  	     i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {  		write_count   = slot_largepage_idx(gfn, slot, i); @@ -447,8 +487,7 @@ static int has_wrprotected_page(struct kvm *kvm,  	struct kvm_memory_slot *slot;  	int *largepage_idx; -	gfn = unalias_gfn(kvm, gfn); -	slot = gfn_to_memslot_unaliased(kvm, gfn); +	slot = gfn_to_memslot(kvm, gfn);  	if (slot) {  		largepage_idx = slot_largepage_idx(gfn, slot, level);  		return *largepage_idx; @@ -501,7 +540,6 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn)  /*   * Take gfn and return the reverse mapping to it. - * Note: gfn must be unaliased before this function get called   */  static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level) @@ -513,8 +551,8 @@ static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level)  	if (likely(level == PT_PAGE_TABLE_LEVEL))  		return &slot->rmap[gfn - slot->base_gfn]; -	idx = (gfn / KVM_PAGES_PER_HPAGE(level)) - -		(slot->base_gfn / KVM_PAGES_PER_HPAGE(level)); +	idx = (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - +		(slot->base_gfn >> KVM_HPAGE_GFN_SHIFT(level));  	return &slot->lpage_info[level - 2][idx].rmap_pde;  } @@ -541,9 +579,8 @@ static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)  	if (!is_rmap_spte(*spte))  		return count; -	gfn = unalias_gfn(vcpu->kvm, gfn);  	sp = page_header(__pa(spte)); -	sp->gfns[spte - sp->spt] = gfn; +	kvm_mmu_page_set_gfn(sp, spte - sp->spt, gfn);  	rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);  	if (!*rmapp) {  		rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte); @@ -600,19 +637,13 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)  	struct kvm_rmap_desc *desc;  	struct kvm_rmap_desc *prev_desc;  	struct kvm_mmu_page *sp; -	pfn_t pfn; +	gfn_t gfn;  	unsigned long *rmapp;  	int i; -	if (!is_rmap_spte(*spte)) -		return;  	sp = page_header(__pa(spte)); -	pfn = spte_to_pfn(*spte); -	if (*spte & shadow_accessed_mask) -		kvm_set_pfn_accessed(pfn); -	if (is_writable_pte(*spte)) -		kvm_set_pfn_dirty(pfn); -	rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level); +	gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt); +	rmapp = gfn_to_rmap(kvm, gfn, sp->role.level);  	if (!*rmapp) {  		printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);  		BUG(); @@ -644,6 +675,32 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)  	}  } +static void set_spte_track_bits(u64 *sptep, u64 new_spte) +{ +	pfn_t pfn; +	u64 old_spte = *sptep; + +	if (!shadow_accessed_mask || !is_shadow_present_pte(old_spte) || +	      old_spte & shadow_accessed_mask) { +		__set_spte(sptep, new_spte); +	} else +		old_spte = __xchg_spte(sptep, new_spte); + +	if (!is_rmap_spte(old_spte)) +		return; +	pfn = spte_to_pfn(old_spte); +	if (!shadow_accessed_mask || old_spte & shadow_accessed_mask) +		kvm_set_pfn_accessed(pfn); +	if (is_writable_pte(old_spte)) +		kvm_set_pfn_dirty(pfn); +} + +static void drop_spte(struct kvm *kvm, u64 *sptep, u64 new_spte) +{ +	set_spte_track_bits(sptep, new_spte); +	rmap_remove(kvm, sptep); +} +  static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte)  {  	struct kvm_rmap_desc *desc; @@ -676,7 +733,6 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn)  	u64 *spte;  	int i, write_protected = 0; -	gfn = unalias_gfn(kvm, gfn);  	rmapp = gfn_to_rmap(kvm, gfn, PT_PAGE_TABLE_LEVEL);  	spte = rmap_next(kvm, rmapp, NULL); @@ -685,7 +741,7 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn)  		BUG_ON(!(*spte & PT_PRESENT_MASK));  		rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte);  		if (is_writable_pte(*spte)) { -			__set_spte(spte, *spte & ~PT_WRITABLE_MASK); +			update_spte(spte, *spte & ~PT_WRITABLE_MASK);  			write_protected = 1;  		}  		spte = rmap_next(kvm, rmapp, spte); @@ -709,9 +765,9 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn)  			BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK));  			pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn);  			if (is_writable_pte(*spte)) { -				rmap_remove(kvm, spte); +				drop_spte(kvm, spte, +					  shadow_trap_nonpresent_pte);  				--kvm->stat.lpages; -				__set_spte(spte, shadow_trap_nonpresent_pte);  				spte = NULL;  				write_protected = 1;  			} @@ -731,8 +787,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,  	while ((spte = rmap_next(kvm, rmapp, NULL))) {  		BUG_ON(!(*spte & PT_PRESENT_MASK));  		rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte); -		rmap_remove(kvm, spte); -		__set_spte(spte, shadow_trap_nonpresent_pte); +		drop_spte(kvm, spte, shadow_trap_nonpresent_pte);  		need_tlb_flush = 1;  	}  	return need_tlb_flush; @@ -754,8 +809,7 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,  		rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte);  		need_flush = 1;  		if (pte_write(*ptep)) { -			rmap_remove(kvm, spte); -			__set_spte(spte, shadow_trap_nonpresent_pte); +			drop_spte(kvm, spte, shadow_trap_nonpresent_pte);  			spte = rmap_next(kvm, rmapp, NULL);  		} else {  			new_spte = *spte &~ (PT64_BASE_ADDR_MASK); @@ -763,9 +817,8 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,  			new_spte &= ~PT_WRITABLE_MASK;  			new_spte &= ~SPTE_HOST_WRITEABLE; -			if (is_writable_pte(*spte)) -				kvm_set_pfn_dirty(spte_to_pfn(*spte)); -			__set_spte(spte, new_spte); +			new_spte &= ~shadow_accessed_mask; +			set_spte_track_bits(spte, new_spte);  			spte = rmap_next(kvm, rmapp, spte);  		}  	} @@ -799,8 +852,12 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,  			ret = handler(kvm, &memslot->rmap[gfn_offset], data);  			for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { -				int idx = gfn_offset; -				idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j); +				unsigned long idx; +				int sh; + +				sh = KVM_HPAGE_GFN_SHIFT(PT_DIRECTORY_LEVEL+j); +				idx = ((memslot->base_gfn+gfn_offset) >> sh) - +					(memslot->base_gfn >> sh);  				ret |= handler(kvm,  					&memslot->lpage_info[j][idx].rmap_pde,  					data); @@ -863,7 +920,6 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)  	sp = page_header(__pa(spte)); -	gfn = unalias_gfn(vcpu->kvm, gfn);  	rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);  	kvm_unmap_rmapp(vcpu->kvm, rmapp, 0); @@ -894,10 +950,12 @@ static int is_empty_shadow_page(u64 *spt)  static void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *sp)  {  	ASSERT(is_empty_shadow_page(sp->spt)); +	hlist_del(&sp->hash_link);  	list_del(&sp->link);  	__free_page(virt_to_page(sp->spt)); -	__free_page(virt_to_page(sp->gfns)); -	kfree(sp); +	if (!sp->role.direct) +		__free_page(virt_to_page(sp->gfns)); +	kmem_cache_free(mmu_page_header_cache, sp);  	++kvm->arch.n_free_mmu_pages;  } @@ -907,13 +965,15 @@ static unsigned kvm_page_table_hashfn(gfn_t gfn)  }  static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, -					       u64 *parent_pte) +					       u64 *parent_pte, int direct)  {  	struct kvm_mmu_page *sp;  	sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache, sizeof *sp);  	sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); -	sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); +	if (!direct) +		sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, +						  PAGE_SIZE);  	set_page_private(virt_to_page(sp->spt), (unsigned long)sp);  	list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);  	bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); @@ -998,7 +1058,6 @@ static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp,  	BUG();  } -  static void mmu_parent_walk(struct kvm_mmu_page *sp, mmu_parent_walk_fn fn)  {  	struct kvm_pte_chain *pte_chain; @@ -1008,63 +1067,37 @@ static void mmu_parent_walk(struct kvm_mmu_page *sp, mmu_parent_walk_fn fn)  	if (!sp->multimapped && sp->parent_pte) {  		parent_sp = page_header(__pa(sp->parent_pte)); -		fn(parent_sp); -		mmu_parent_walk(parent_sp, fn); +		fn(parent_sp, sp->parent_pte);  		return;  	} +  	hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link)  		for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) { -			if (!pte_chain->parent_ptes[i]) +			u64 *spte = pte_chain->parent_ptes[i]; + +			if (!spte)  				break; -			parent_sp = page_header(__pa(pte_chain->parent_ptes[i])); -			fn(parent_sp); -			mmu_parent_walk(parent_sp, fn); +			parent_sp = page_header(__pa(spte)); +			fn(parent_sp, spte);  		}  } -static void kvm_mmu_update_unsync_bitmap(u64 *spte) +static void mark_unsync(struct kvm_mmu_page *sp, u64 *spte); +static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp)  { -	unsigned int index; -	struct kvm_mmu_page *sp = page_header(__pa(spte)); - -	index = spte - sp->spt; -	if (!__test_and_set_bit(index, sp->unsync_child_bitmap)) -		sp->unsync_children++; -	WARN_ON(!sp->unsync_children); +	mmu_parent_walk(sp, mark_unsync);  } -static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp) +static void mark_unsync(struct kvm_mmu_page *sp, u64 *spte)  { -	struct kvm_pte_chain *pte_chain; -	struct hlist_node *node; -	int i; +	unsigned int index; -	if (!sp->parent_pte) +	index = spte - sp->spt; +	if (__test_and_set_bit(index, sp->unsync_child_bitmap))  		return; - -	if (!sp->multimapped) { -		kvm_mmu_update_unsync_bitmap(sp->parent_pte); +	if (sp->unsync_children++)  		return; -	} - -	hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) -		for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) { -			if (!pte_chain->parent_ptes[i]) -				break; -			kvm_mmu_update_unsync_bitmap(pte_chain->parent_ptes[i]); -		} -} - -static int unsync_walk_fn(struct kvm_mmu_page *sp) -{ -	kvm_mmu_update_parents_unsync(sp); -	return 1; -} - -static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp) -{ -	mmu_parent_walk(sp, unsync_walk_fn); -	kvm_mmu_update_parents_unsync(sp); +	kvm_mmu_mark_parents_unsync(sp);  }  static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu, @@ -1077,7 +1110,7 @@ static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu,  }  static int nonpaging_sync_page(struct kvm_vcpu *vcpu, -			       struct kvm_mmu_page *sp) +			       struct kvm_mmu_page *sp, bool clear_unsync)  {  	return 1;  } @@ -1123,35 +1156,40 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp,  	int i, ret, nr_unsync_leaf = 0;  	for_each_unsync_children(sp->unsync_child_bitmap, i) { +		struct kvm_mmu_page *child;  		u64 ent = sp->spt[i]; -		if (is_shadow_present_pte(ent) && !is_large_pte(ent)) { -			struct kvm_mmu_page *child; -			child = page_header(ent & PT64_BASE_ADDR_MASK); - -			if (child->unsync_children) { -				if (mmu_pages_add(pvec, child, i)) -					return -ENOSPC; - -				ret = __mmu_unsync_walk(child, pvec); -				if (!ret) -					__clear_bit(i, sp->unsync_child_bitmap); -				else if (ret > 0) -					nr_unsync_leaf += ret; -				else -					return ret; -			} +		if (!is_shadow_present_pte(ent) || is_large_pte(ent)) +			goto clear_child_bitmap; + +		child = page_header(ent & PT64_BASE_ADDR_MASK); + +		if (child->unsync_children) { +			if (mmu_pages_add(pvec, child, i)) +				return -ENOSPC; + +			ret = __mmu_unsync_walk(child, pvec); +			if (!ret) +				goto clear_child_bitmap; +			else if (ret > 0) +				nr_unsync_leaf += ret; +			else +				return ret; +		} else if (child->unsync) { +			nr_unsync_leaf++; +			if (mmu_pages_add(pvec, child, i)) +				return -ENOSPC; +		} else +			 goto clear_child_bitmap; -			if (child->unsync) { -				nr_unsync_leaf++; -				if (mmu_pages_add(pvec, child, i)) -					return -ENOSPC; -			} -		} +		continue; + +clear_child_bitmap: +		__clear_bit(i, sp->unsync_child_bitmap); +		sp->unsync_children--; +		WARN_ON((int)sp->unsync_children < 0);  	} -	if (find_first_bit(sp->unsync_child_bitmap, 512) == 512) -		sp->unsync_children = 0;  	return nr_unsync_leaf;  } @@ -1166,26 +1204,6 @@ static int mmu_unsync_walk(struct kvm_mmu_page *sp,  	return __mmu_unsync_walk(sp, pvec);  } -static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) -{ -	unsigned index; -	struct hlist_head *bucket; -	struct kvm_mmu_page *sp; -	struct hlist_node *node; - -	pgprintk("%s: looking for gfn %lx\n", __func__, gfn); -	index = kvm_page_table_hashfn(gfn); -	bucket = &kvm->arch.mmu_page_hash[index]; -	hlist_for_each_entry(sp, node, bucket, hash_link) -		if (sp->gfn == gfn && !sp->role.direct -		    && !sp->role.invalid) { -			pgprintk("%s: found role %x\n", -				 __func__, sp->role.word); -			return sp; -		} -	return NULL; -} -  static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)  {  	WARN_ON(!sp->unsync); @@ -1194,20 +1212,36 @@ static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)  	--kvm->stat.mmu_unsync;  } -static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp); +static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, +				    struct list_head *invalid_list); +static void kvm_mmu_commit_zap_page(struct kvm *kvm, +				    struct list_head *invalid_list); -static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) +#define for_each_gfn_sp(kvm, sp, gfn, pos)				\ +  hlist_for_each_entry(sp, pos,						\ +   &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link)	\ +	if ((sp)->gfn != (gfn)) {} else + +#define for_each_gfn_indirect_valid_sp(kvm, sp, gfn, pos)		\ +  hlist_for_each_entry(sp, pos,						\ +   &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link)	\ +		if ((sp)->gfn != (gfn) || (sp)->role.direct ||		\ +			(sp)->role.invalid) {} else + +/* @sp->gfn should be write-protected at the call site */ +static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, +			   struct list_head *invalid_list, bool clear_unsync)  {  	if (sp->role.cr4_pae != !!is_pae(vcpu)) { -		kvm_mmu_zap_page(vcpu->kvm, sp); +		kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list);  		return 1;  	} -	if (rmap_write_protect(vcpu->kvm, sp->gfn)) -		kvm_flush_remote_tlbs(vcpu->kvm); -	kvm_unlink_unsync_page(vcpu->kvm, sp); -	if (vcpu->arch.mmu.sync_page(vcpu, sp)) { -		kvm_mmu_zap_page(vcpu->kvm, sp); +	if (clear_unsync) +		kvm_unlink_unsync_page(vcpu->kvm, sp); + +	if (vcpu->arch.mmu.sync_page(vcpu, sp, clear_unsync)) { +		kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list);  		return 1;  	} @@ -1215,6 +1249,52 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)  	return 0;  } +static int kvm_sync_page_transient(struct kvm_vcpu *vcpu, +				   struct kvm_mmu_page *sp) +{ +	LIST_HEAD(invalid_list); +	int ret; + +	ret = __kvm_sync_page(vcpu, sp, &invalid_list, false); +	if (ret) +		kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); + +	return ret; +} + +static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, +			 struct list_head *invalid_list) +{ +	return __kvm_sync_page(vcpu, sp, invalid_list, true); +} + +/* @gfn should be write-protected at the call site */ +static void kvm_sync_pages(struct kvm_vcpu *vcpu,  gfn_t gfn) +{ +	struct kvm_mmu_page *s; +	struct hlist_node *node; +	LIST_HEAD(invalid_list); +	bool flush = false; + +	for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) { +		if (!s->unsync) +			continue; + +		WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL); +		if ((s->role.cr4_pae != !!is_pae(vcpu)) || +			(vcpu->arch.mmu.sync_page(vcpu, s, true))) { +			kvm_mmu_prepare_zap_page(vcpu->kvm, s, &invalid_list); +			continue; +		} +		kvm_unlink_unsync_page(vcpu->kvm, s); +		flush = true; +	} + +	kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); +	if (flush) +		kvm_mmu_flush_tlb(vcpu); +} +  struct mmu_page_path {  	struct kvm_mmu_page *parent[PT64_ROOT_LEVEL-1];  	unsigned int idx[PT64_ROOT_LEVEL-1]; @@ -1281,6 +1361,7 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu,  	struct kvm_mmu_page *sp;  	struct mmu_page_path parents;  	struct kvm_mmu_pages pages; +	LIST_HEAD(invalid_list);  	kvm_mmu_pages_init(parent, &parents, &pages);  	while (mmu_unsync_walk(parent, &pages)) { @@ -1293,9 +1374,10 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu,  			kvm_flush_remote_tlbs(vcpu->kvm);  		for_each_sp(pages, sp, parents, i) { -			kvm_sync_page(vcpu, sp); +			kvm_sync_page(vcpu, sp, &invalid_list);  			mmu_pages_clear_parents(&parents);  		} +		kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);  		cond_resched_lock(&vcpu->kvm->mmu_lock);  		kvm_mmu_pages_init(parent, &parents, &pages);  	} @@ -1310,11 +1392,10 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,  					     u64 *parent_pte)  {  	union kvm_mmu_page_role role; -	unsigned index;  	unsigned quadrant; -	struct hlist_head *bucket;  	struct kvm_mmu_page *sp; -	struct hlist_node *node, *tmp; +	struct hlist_node *node; +	bool need_sync = false;  	role = vcpu->arch.mmu.base_role;  	role.level = level; @@ -1322,40 +1403,45 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,  	if (role.direct)  		role.cr4_pae = 0;  	role.access = access; -	if (vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) { +	if (!tdp_enabled && vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) {  		quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level));  		quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;  		role.quadrant = quadrant;  	} -	index = kvm_page_table_hashfn(gfn); -	bucket = &vcpu->kvm->arch.mmu_page_hash[index]; -	hlist_for_each_entry_safe(sp, node, tmp, bucket, hash_link) -		if (sp->gfn == gfn) { -			if (sp->unsync) -				if (kvm_sync_page(vcpu, sp)) -					continue; +	for_each_gfn_sp(vcpu->kvm, sp, gfn, node) { +		if (!need_sync && sp->unsync) +			need_sync = true; -			if (sp->role.word != role.word) -				continue; +		if (sp->role.word != role.word) +			continue; -			mmu_page_add_parent_pte(vcpu, sp, parent_pte); -			if (sp->unsync_children) { -				set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests); -				kvm_mmu_mark_parents_unsync(sp); -			} -			trace_kvm_mmu_get_page(sp, false); -			return sp; -		} +		if (sp->unsync && kvm_sync_page_transient(vcpu, sp)) +			break; + +		mmu_page_add_parent_pte(vcpu, sp, parent_pte); +		if (sp->unsync_children) { +			kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); +			kvm_mmu_mark_parents_unsync(sp); +		} else if (sp->unsync) +			kvm_mmu_mark_parents_unsync(sp); + +		trace_kvm_mmu_get_page(sp, false); +		return sp; +	}  	++vcpu->kvm->stat.mmu_cache_miss; -	sp = kvm_mmu_alloc_page(vcpu, parent_pte); +	sp = kvm_mmu_alloc_page(vcpu, parent_pte, direct);  	if (!sp)  		return sp;  	sp->gfn = gfn;  	sp->role = role; -	hlist_add_head(&sp->hash_link, bucket); +	hlist_add_head(&sp->hash_link, +		&vcpu->kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]);  	if (!direct) {  		if (rmap_write_protect(vcpu->kvm, gfn))  			kvm_flush_remote_tlbs(vcpu->kvm); +		if (level > PT_PAGE_TABLE_LEVEL && need_sync) +			kvm_sync_pages(vcpu, gfn); +  		account_shadowed(vcpu->kvm, gfn);  	}  	if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte) @@ -1402,6 +1488,47 @@ static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator)  	--iterator->level;  } +static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp) +{ +	u64 spte; + +	spte = __pa(sp->spt) +		| PT_PRESENT_MASK | PT_ACCESSED_MASK +		| PT_WRITABLE_MASK | PT_USER_MASK; +	__set_spte(sptep, spte); +} + +static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep) +{ +	if (is_large_pte(*sptep)) { +		drop_spte(vcpu->kvm, sptep, shadow_trap_nonpresent_pte); +		kvm_flush_remote_tlbs(vcpu->kvm); +	} +} + +static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep, +				   unsigned direct_access) +{ +	if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep)) { +		struct kvm_mmu_page *child; + +		/* +		 * For the direct sp, if the guest pte's dirty bit +		 * changed form clean to dirty, it will corrupt the +		 * sp's access: allow writable in the read-only sp, +		 * so we should update the spte at this point to get +		 * a new sp with the correct access. +		 */ +		child = page_header(*sptep & PT64_BASE_ADDR_MASK); +		if (child->role.access == direct_access) +			return; + +		mmu_page_remove_parent_pte(child, sptep); +		__set_spte(sptep, shadow_trap_nonpresent_pte); +		kvm_flush_remote_tlbs(vcpu->kvm); +	} +} +  static void kvm_mmu_page_unlink_children(struct kvm *kvm,  					 struct kvm_mmu_page *sp)  { @@ -1422,7 +1549,8 @@ static void kvm_mmu_page_unlink_children(struct kvm *kvm,  			} else {  				if (is_large_pte(ent))  					--kvm->stat.lpages; -				rmap_remove(kvm, &pt[i]); +				drop_spte(kvm, &pt[i], +					  shadow_trap_nonpresent_pte);  			}  		}  		pt[i] = shadow_trap_nonpresent_pte; @@ -1464,7 +1592,8 @@ static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)  }  static int mmu_zap_unsync_children(struct kvm *kvm, -				   struct kvm_mmu_page *parent) +				   struct kvm_mmu_page *parent, +				   struct list_head *invalid_list)  {  	int i, zapped = 0;  	struct mmu_page_path parents; @@ -1478,7 +1607,7 @@ static int mmu_zap_unsync_children(struct kvm *kvm,  		struct kvm_mmu_page *sp;  		for_each_sp(pages, sp, parents, i) { -			kvm_mmu_zap_page(kvm, sp); +			kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);  			mmu_pages_clear_parents(&parents);  			zapped++;  		} @@ -1488,32 +1617,52 @@ static int mmu_zap_unsync_children(struct kvm *kvm,  	return zapped;  } -static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) +static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, +				    struct list_head *invalid_list)  {  	int ret; -	trace_kvm_mmu_zap_page(sp); +	trace_kvm_mmu_prepare_zap_page(sp);  	++kvm->stat.mmu_shadow_zapped; -	ret = mmu_zap_unsync_children(kvm, sp); +	ret = mmu_zap_unsync_children(kvm, sp, invalid_list);  	kvm_mmu_page_unlink_children(kvm, sp);  	kvm_mmu_unlink_parents(kvm, sp); -	kvm_flush_remote_tlbs(kvm);  	if (!sp->role.invalid && !sp->role.direct)  		unaccount_shadowed(kvm, sp->gfn);  	if (sp->unsync)  		kvm_unlink_unsync_page(kvm, sp);  	if (!sp->root_count) { -		hlist_del(&sp->hash_link); -		kvm_mmu_free_page(kvm, sp); +		/* Count self */ +		ret++; +		list_move(&sp->link, invalid_list);  	} else { -		sp->role.invalid = 1;  		list_move(&sp->link, &kvm->arch.active_mmu_pages);  		kvm_reload_remote_mmus(kvm);  	} + +	sp->role.invalid = 1;  	kvm_mmu_reset_last_pte_updated(kvm);  	return ret;  } +static void kvm_mmu_commit_zap_page(struct kvm *kvm, +				    struct list_head *invalid_list) +{ +	struct kvm_mmu_page *sp; + +	if (list_empty(invalid_list)) +		return; + +	kvm_flush_remote_tlbs(kvm); + +	do { +		sp = list_first_entry(invalid_list, struct kvm_mmu_page, link); +		WARN_ON(!sp->role.invalid || sp->root_count); +		kvm_mmu_free_page(kvm, sp); +	} while (!list_empty(invalid_list)); + +} +  /*   * Changing the number of mmu pages allocated to the vm   * Note: if kvm_nr_mmu_pages is too small, you will get dead lock @@ -1521,6 +1670,7 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)  void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages)  {  	int used_pages; +	LIST_HEAD(invalid_list);  	used_pages = kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages;  	used_pages = max(0, used_pages); @@ -1538,9 +1688,10 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages)  			page = container_of(kvm->arch.active_mmu_pages.prev,  					    struct kvm_mmu_page, link); -			used_pages -= kvm_mmu_zap_page(kvm, page); -			used_pages--; +			used_pages -= kvm_mmu_prepare_zap_page(kvm, page, +							       &invalid_list);  		} +		kvm_mmu_commit_zap_page(kvm, &invalid_list);  		kvm_nr_mmu_pages = used_pages;  		kvm->arch.n_free_mmu_pages = 0;  	} @@ -1553,47 +1704,36 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages)  static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)  { -	unsigned index; -	struct hlist_head *bucket;  	struct kvm_mmu_page *sp; -	struct hlist_node *node, *n; +	struct hlist_node *node; +	LIST_HEAD(invalid_list);  	int r;  	pgprintk("%s: looking for gfn %lx\n", __func__, gfn);  	r = 0; -	index = kvm_page_table_hashfn(gfn); -	bucket = &kvm->arch.mmu_page_hash[index]; -restart: -	hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) -		if (sp->gfn == gfn && !sp->role.direct) { -			pgprintk("%s: gfn %lx role %x\n", __func__, gfn, -				 sp->role.word); -			r = 1; -			if (kvm_mmu_zap_page(kvm, sp)) -				goto restart; -		} + +	for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) { +		pgprintk("%s: gfn %lx role %x\n", __func__, gfn, +			 sp->role.word); +		r = 1; +		kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); +	} +	kvm_mmu_commit_zap_page(kvm, &invalid_list);  	return r;  }  static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)  { -	unsigned index; -	struct hlist_head *bucket;  	struct kvm_mmu_page *sp; -	struct hlist_node *node, *nn; +	struct hlist_node *node; +	LIST_HEAD(invalid_list); -	index = kvm_page_table_hashfn(gfn); -	bucket = &kvm->arch.mmu_page_hash[index]; -restart: -	hlist_for_each_entry_safe(sp, node, nn, bucket, hash_link) { -		if (sp->gfn == gfn && !sp->role.direct -		    && !sp->role.invalid) { -			pgprintk("%s: zap %lx %x\n", -				 __func__, gfn, sp->role.word); -			if (kvm_mmu_zap_page(kvm, sp)) -				goto restart; -		} +	for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) { +		pgprintk("%s: zap %lx %x\n", +			 __func__, gfn, sp->role.word); +		kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);  	} +	kvm_mmu_commit_zap_page(kvm, &invalid_list);  }  static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) @@ -1723,47 +1863,51 @@ u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)  }  EXPORT_SYMBOL_GPL(kvm_get_guest_memory_type); -static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) +static void __kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)  { -	unsigned index; -	struct hlist_head *bucket; -	struct kvm_mmu_page *s; -	struct hlist_node *node, *n; - -	index = kvm_page_table_hashfn(sp->gfn); -	bucket = &vcpu->kvm->arch.mmu_page_hash[index]; -	/* don't unsync if pagetable is shadowed with multiple roles */ -	hlist_for_each_entry_safe(s, node, n, bucket, hash_link) { -		if (s->gfn != sp->gfn || s->role.direct) -			continue; -		if (s->role.word != sp->role.word) -			return 1; -	}  	trace_kvm_mmu_unsync_page(sp);  	++vcpu->kvm->stat.mmu_unsync;  	sp->unsync = 1;  	kvm_mmu_mark_parents_unsync(sp); -  	mmu_convert_notrap(sp); -	return 0; +} + +static void kvm_unsync_pages(struct kvm_vcpu *vcpu,  gfn_t gfn) +{ +	struct kvm_mmu_page *s; +	struct hlist_node *node; + +	for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) { +		if (s->unsync) +			continue; +		WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL); +		__kvm_unsync_page(vcpu, s); +	}  }  static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,  				  bool can_unsync)  { -	struct kvm_mmu_page *shadow; +	struct kvm_mmu_page *s; +	struct hlist_node *node; +	bool need_unsync = false; -	shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn); -	if (shadow) { -		if (shadow->role.level != PT_PAGE_TABLE_LEVEL) +	for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) { +		if (!can_unsync)  			return 1; -		if (shadow->unsync) -			return 0; -		if (can_unsync && oos_shadow) -			return kvm_unsync_page(vcpu, shadow); -		return 1; + +		if (s->role.level != PT_PAGE_TABLE_LEVEL) +			return 1; + +		if (!need_unsync && !s->unsync) { +			if (!oos_shadow) +				return 1; +			need_unsync = true; +		}  	} +	if (need_unsync) +		kvm_unsync_pages(vcpu, gfn);  	return 0;  } @@ -1804,13 +1948,14 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,  	spte |= (u64)pfn << PAGE_SHIFT;  	if ((pte_access & ACC_WRITE_MASK) -	    || (write_fault && !is_write_protection(vcpu) && !user_fault)) { +	    || (!tdp_enabled && write_fault && !is_write_protection(vcpu) +		&& !user_fault)) {  		if (level > PT_PAGE_TABLE_LEVEL &&  		    has_wrprotected_page(vcpu->kvm, gfn, level)) {  			ret = 1; -			spte = shadow_trap_nonpresent_pte; -			goto set_pte; +			drop_spte(vcpu->kvm, sptep, shadow_trap_nonpresent_pte); +			goto done;  		}  		spte |= PT_WRITABLE_MASK; @@ -1841,7 +1986,10 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,  		mark_page_dirty(vcpu->kvm, gfn);  set_pte: -	__set_spte(sptep, spte); +	if (is_writable_pte(*sptep) && !is_writable_pte(spte)) +		kvm_set_pfn_dirty(pfn); +	update_spte(sptep, spte); +done:  	return ret;  } @@ -1853,7 +2001,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,  			 bool reset_host_protection)  {  	int was_rmapped = 0; -	int was_writable = is_writable_pte(*sptep);  	int rmap_count;  	pgprintk("%s: spte %llx access %x write_fault %d" @@ -1878,8 +2025,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,  		} else if (pfn != spte_to_pfn(*sptep)) {  			pgprintk("hfn old %lx new %lx\n",  				 spte_to_pfn(*sptep), pfn); -			rmap_remove(vcpu->kvm, sptep); -			__set_spte(sptep, shadow_trap_nonpresent_pte); +			drop_spte(vcpu->kvm, sptep, shadow_trap_nonpresent_pte);  			kvm_flush_remote_tlbs(vcpu->kvm);  		} else  			was_rmapped = 1; @@ -1890,7 +2036,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,  		      reset_host_protection)) {  		if (write_fault)  			*ptwrite = 1; -		kvm_x86_ops->tlb_flush(vcpu); +		kvm_mmu_flush_tlb(vcpu);  	}  	pgprintk("%s: setting spte %llx\n", __func__, *sptep); @@ -1904,15 +2050,10 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,  	page_header_update_slot(vcpu->kvm, sptep, gfn);  	if (!was_rmapped) {  		rmap_count = rmap_add(vcpu, sptep, gfn); -		kvm_release_pfn_clean(pfn);  		if (rmap_count > RMAP_RECYCLE_THRESHOLD)  			rmap_recycle(vcpu, sptep, gfn); -	} else { -		if (was_writable) -			kvm_release_pfn_dirty(pfn); -		else -			kvm_release_pfn_clean(pfn);  	} +	kvm_release_pfn_clean(pfn);  	if (speculative) {  		vcpu->arch.last_pte_updated = sptep;  		vcpu->arch.last_pte_gfn = gfn; @@ -1941,7 +2082,10 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,  		}  		if (*iterator.sptep == shadow_trap_nonpresent_pte) { -			pseudo_gfn = (iterator.addr & PT64_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT; +			u64 base_addr = iterator.addr; + +			base_addr &= PT64_LVL_ADDR_MASK(iterator.level); +			pseudo_gfn = base_addr >> PAGE_SHIFT;  			sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr,  					      iterator.level - 1,  					      1, ACC_ALL, iterator.sptep); @@ -1960,6 +2104,29 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,  	return pt_write;  } +static void kvm_send_hwpoison_signal(struct kvm *kvm, gfn_t gfn) +{ +	char buf[1]; +	void __user *hva; +	int r; + +	/* Touch the page, so send SIGBUS */ +	hva = (void __user *)gfn_to_hva(kvm, gfn); +	r = copy_from_user(buf, hva, 1); +} + +static int kvm_handle_bad_page(struct kvm *kvm, gfn_t gfn, pfn_t pfn) +{ +	kvm_release_pfn_clean(pfn); +	if (is_hwpoison_pfn(pfn)) { +		kvm_send_hwpoison_signal(kvm, gfn); +		return 0; +	} else if (is_fault_pfn(pfn)) +		return -EFAULT; + +	return 1; +} +  static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)  {  	int r; @@ -1983,10 +2150,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)  	pfn = gfn_to_pfn(vcpu->kvm, gfn);  	/* mmio */ -	if (is_error_pfn(pfn)) { -		kvm_release_pfn_clean(pfn); -		return 1; -	} +	if (is_error_pfn(pfn)) +		return kvm_handle_bad_page(vcpu->kvm, gfn, pfn);  	spin_lock(&vcpu->kvm->mmu_lock);  	if (mmu_notifier_retry(vcpu, mmu_seq)) @@ -2009,6 +2174,7 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)  {  	int i;  	struct kvm_mmu_page *sp; +	LIST_HEAD(invalid_list);  	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))  		return; @@ -2018,8 +2184,10 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)  		sp = page_header(root);  		--sp->root_count; -		if (!sp->root_count && sp->role.invalid) -			kvm_mmu_zap_page(vcpu->kvm, sp); +		if (!sp->root_count && sp->role.invalid) { +			kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list); +			kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); +		}  		vcpu->arch.mmu.root_hpa = INVALID_PAGE;  		spin_unlock(&vcpu->kvm->mmu_lock);  		return; @@ -2032,10 +2200,12 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)  			sp = page_header(root);  			--sp->root_count;  			if (!sp->root_count && sp->role.invalid) -				kvm_mmu_zap_page(vcpu->kvm, sp); +				kvm_mmu_prepare_zap_page(vcpu->kvm, sp, +							 &invalid_list);  		}  		vcpu->arch.mmu.pae_root[i] = INVALID_PAGE;  	} +	kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);  	spin_unlock(&vcpu->kvm->mmu_lock);  	vcpu->arch.mmu.root_hpa = INVALID_PAGE;  } @@ -2045,7 +2215,7 @@ static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn)  	int ret = 0;  	if (!kvm_is_visible_gfn(vcpu->kvm, root_gfn)) { -		set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); +		kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);  		ret = 1;  	} @@ -2073,6 +2243,7 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu)  			root_gfn = 0;  		}  		spin_lock(&vcpu->kvm->mmu_lock); +		kvm_mmu_free_some_pages(vcpu);  		sp = kvm_mmu_get_page(vcpu, root_gfn, 0,  				      PT64_ROOT_LEVEL, direct,  				      ACC_ALL, NULL); @@ -2103,6 +2274,7 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu)  			root_gfn = i << 30;  		}  		spin_lock(&vcpu->kvm->mmu_lock); +		kvm_mmu_free_some_pages(vcpu);  		sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,  				      PT32_ROOT_LEVEL, direct,  				      ACC_ALL, NULL); @@ -2198,10 +2370,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,  	mmu_seq = vcpu->kvm->mmu_notifier_seq;  	smp_rmb();  	pfn = gfn_to_pfn(vcpu->kvm, gfn); -	if (is_error_pfn(pfn)) { -		kvm_release_pfn_clean(pfn); -		return 1; -	} +	if (is_error_pfn(pfn)) +		return kvm_handle_bad_page(vcpu->kvm, gfn, pfn);  	spin_lock(&vcpu->kvm->mmu_lock);  	if (mmu_notifier_retry(vcpu, mmu_seq))  		goto out_unlock; @@ -2243,7 +2413,7 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu)  void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)  {  	++vcpu->stat.tlb_flush; -	kvm_x86_ops->tlb_flush(vcpu); +	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);  }  static void paging_new_cr3(struct kvm_vcpu *vcpu) @@ -2457,10 +2627,9 @@ static int init_kvm_mmu(struct kvm_vcpu *vcpu)  static void destroy_kvm_mmu(struct kvm_vcpu *vcpu)  {  	ASSERT(vcpu); -	if (VALID_PAGE(vcpu->arch.mmu.root_hpa)) { +	if (VALID_PAGE(vcpu->arch.mmu.root_hpa)) +		/* mmu.free() should set root_hpa = INVALID_PAGE */  		vcpu->arch.mmu.free(vcpu); -		vcpu->arch.mmu.root_hpa = INVALID_PAGE; -	}  }  int kvm_mmu_reset_context(struct kvm_vcpu *vcpu) @@ -2477,9 +2646,6 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)  	r = mmu_topup_memory_caches(vcpu);  	if (r)  		goto out; -	spin_lock(&vcpu->kvm->mmu_lock); -	kvm_mmu_free_some_pages(vcpu); -	spin_unlock(&vcpu->kvm->mmu_lock);  	r = mmu_alloc_roots(vcpu);  	spin_lock(&vcpu->kvm->mmu_lock);  	mmu_sync_roots(vcpu); @@ -2508,7 +2674,7 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu,  	pte = *spte;  	if (is_shadow_present_pte(pte)) {  		if (is_last_spte(pte, sp->role.level)) -			rmap_remove(vcpu->kvm, spte); +			drop_spte(vcpu->kvm, spte, shadow_trap_nonpresent_pte);  		else {  			child = page_header(pte & PT64_BASE_ADDR_MASK);  			mmu_page_remove_parent_pte(child, spte); @@ -2529,6 +2695,9 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,  		return;          } +	if (is_rsvd_bits_set(vcpu, *(u64 *)new, PT_PAGE_TABLE_LEVEL)) +		return; +  	++vcpu->kvm->stat.mmu_pte_updated;  	if (!sp->role.cr4_pae)  		paging32_update_pte(vcpu, sp, spte, new); @@ -2549,11 +2718,15 @@ static bool need_remote_flush(u64 old, u64 new)  	return (old & ~new & PT64_PERM_MASK) != 0;  } -static void mmu_pte_write_flush_tlb(struct kvm_vcpu *vcpu, u64 old, u64 new) +static void mmu_pte_write_flush_tlb(struct kvm_vcpu *vcpu, bool zap_page, +				    bool remote_flush, bool local_flush)  { -	if (need_remote_flush(old, new)) +	if (zap_page) +		return; + +	if (remote_flush)  		kvm_flush_remote_tlbs(vcpu->kvm); -	else +	else if (local_flush)  		kvm_mmu_flush_tlb(vcpu);  } @@ -2603,10 +2776,10 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,  		       bool guest_initiated)  {  	gfn_t gfn = gpa >> PAGE_SHIFT; +	union kvm_mmu_page_role mask = { .word = 0 };  	struct kvm_mmu_page *sp; -	struct hlist_node *node, *n; -	struct hlist_head *bucket; -	unsigned index; +	struct hlist_node *node; +	LIST_HEAD(invalid_list);  	u64 entry, gentry;  	u64 *spte;  	unsigned offset = offset_in_page(gpa); @@ -2619,6 +2792,9 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,  	int npte;  	int r;  	int invlpg_counter; +	bool remote_flush, local_flush, zap_page; + +	zap_page = remote_flush = local_flush = false;  	pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); @@ -2674,13 +2850,9 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,  			vcpu->arch.last_pte_updated = NULL;  		}  	} -	index = kvm_page_table_hashfn(gfn); -	bucket = &vcpu->kvm->arch.mmu_page_hash[index]; -restart: -	hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) { -		if (sp->gfn != gfn || sp->role.direct || sp->role.invalid) -			continue; +	mask.cr0_wp = mask.cr4_pae = mask.nxe = 1; +	for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) {  		pte_size = sp->role.cr4_pae ? 8 : 4;  		misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1);  		misaligned |= bytes < 4; @@ -2697,8 +2869,8 @@ restart:  			 */  			pgprintk("misaligned: gpa %llx bytes %d role %x\n",  				 gpa, bytes, sp->role.word); -			if (kvm_mmu_zap_page(vcpu->kvm, sp)) -				goto restart; +			zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp, +						     &invalid_list);  			++vcpu->kvm->stat.mmu_flooded;  			continue;  		} @@ -2722,16 +2894,22 @@ restart:  			if (quadrant != sp->role.quadrant)  				continue;  		} +		local_flush = true;  		spte = &sp->spt[page_offset / sizeof(*spte)];  		while (npte--) {  			entry = *spte;  			mmu_pte_write_zap_pte(vcpu, sp, spte); -			if (gentry) +			if (gentry && +			      !((sp->role.word ^ vcpu->arch.mmu.base_role.word) +			      & mask.word))  				mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); -			mmu_pte_write_flush_tlb(vcpu, entry, *spte); +			if (!remote_flush && need_remote_flush(entry, *spte)) +				remote_flush = true;  			++spte;  		}  	} +	mmu_pte_write_flush_tlb(vcpu, zap_page, remote_flush, local_flush); +	kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);  	kvm_mmu_audit(vcpu, "post pte write");  	spin_unlock(&vcpu->kvm->mmu_lock);  	if (!is_error_pfn(vcpu->arch.update_pte.pfn)) { @@ -2759,15 +2937,21 @@ EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt);  void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)  { -	while (vcpu->kvm->arch.n_free_mmu_pages < KVM_REFILL_PAGES && +	int free_pages; +	LIST_HEAD(invalid_list); + +	free_pages = vcpu->kvm->arch.n_free_mmu_pages; +	while (free_pages < KVM_REFILL_PAGES &&  	       !list_empty(&vcpu->kvm->arch.active_mmu_pages)) {  		struct kvm_mmu_page *sp;  		sp = container_of(vcpu->kvm->arch.active_mmu_pages.prev,  				  struct kvm_mmu_page, link); -		kvm_mmu_zap_page(vcpu->kvm, sp); +		free_pages += kvm_mmu_prepare_zap_page(vcpu->kvm, sp, +						       &invalid_list);  		++vcpu->kvm->stat.mmu_recycled;  	} +	kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);  }  int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) @@ -2795,11 +2979,8 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)  		return 1;  	case EMULATE_DO_MMIO:  		++vcpu->stat.mmio_exits; -		return 0; +		/* fall through */  	case EMULATE_FAIL: -		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; -		vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; -		vcpu->run->internal.ndata = 0;  		return 0;  	default:  		BUG(); @@ -2896,7 +3077,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)  		pt = sp->spt;  		for (i = 0; i < PT64_ENT_PER_PAGE; ++i)  			/* avoid RMW */ -			if (pt[i] & PT_WRITABLE_MASK) +			if (is_writable_pte(pt[i]))  				pt[i] &= ~PT_WRITABLE_MASK;  	}  	kvm_flush_remote_tlbs(kvm); @@ -2905,25 +3086,26 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)  void kvm_mmu_zap_all(struct kvm *kvm)  {  	struct kvm_mmu_page *sp, *node; +	LIST_HEAD(invalid_list);  	spin_lock(&kvm->mmu_lock);  restart:  	list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) -		if (kvm_mmu_zap_page(kvm, sp)) +		if (kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list))  			goto restart; +	kvm_mmu_commit_zap_page(kvm, &invalid_list);  	spin_unlock(&kvm->mmu_lock); - -	kvm_flush_remote_tlbs(kvm);  } -static int kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm) +static int kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm, +					       struct list_head *invalid_list)  {  	struct kvm_mmu_page *page;  	page = container_of(kvm->arch.active_mmu_pages.prev,  			    struct kvm_mmu_page, link); -	return kvm_mmu_zap_page(kvm, page) + 1; +	return kvm_mmu_prepare_zap_page(kvm, page, invalid_list);  }  static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) @@ -2936,6 +3118,7 @@ static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)  	list_for_each_entry(kvm, &vm_list, vm_list) {  		int npages, idx, freed_pages; +		LIST_HEAD(invalid_list);  		idx = srcu_read_lock(&kvm->srcu);  		spin_lock(&kvm->mmu_lock); @@ -2943,12 +3126,14 @@ static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)  			 kvm->arch.n_free_mmu_pages;  		cache_count += npages;  		if (!kvm_freed && nr_to_scan > 0 && npages > 0) { -			freed_pages = kvm_mmu_remove_some_alloc_mmu_pages(kvm); +			freed_pages = kvm_mmu_remove_some_alloc_mmu_pages(kvm, +							  &invalid_list);  			cache_count -= freed_pages;  			kvm_freed = kvm;  		}  		nr_to_scan--; +		kvm_mmu_commit_zap_page(kvm, &invalid_list);  		spin_unlock(&kvm->mmu_lock);  		srcu_read_unlock(&kvm->srcu, idx);  	} @@ -3074,7 +3259,7 @@ static int kvm_pv_mmu_write(struct kvm_vcpu *vcpu,  static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu)  { -	kvm_set_cr3(vcpu, vcpu->arch.cr3); +	(void)kvm_set_cr3(vcpu, vcpu->arch.cr3);  	return 1;  } @@ -3331,9 +3516,9 @@ void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)  	struct kvm_mmu_page *rev_sp;  	gfn_t gfn; -	if (*sptep & PT_WRITABLE_MASK) { +	if (is_writable_pte(*sptep)) {  		rev_sp = page_header(__pa(sptep)); -		gfn = rev_sp->gfns[sptep - rev_sp->spt]; +		gfn = kvm_mmu_page_get_gfn(rev_sp, sptep - rev_sp->spt);  		if (!gfn_to_memslot(kvm, gfn)) {  			if (!printk_ratelimit()) @@ -3347,8 +3532,7 @@ void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)  			return;  		} -		rmapp = gfn_to_rmap(kvm, rev_sp->gfns[sptep - rev_sp->spt], -				    rev_sp->role.level); +		rmapp = gfn_to_rmap(kvm, gfn, rev_sp->role.level);  		if (!*rmapp) {  			if (!printk_ratelimit())  				return; @@ -3381,7 +3565,7 @@ static void check_writable_mappings_rmap(struct kvm_vcpu *vcpu)  			if (!(ent & PT_PRESENT_MASK))  				continue; -			if (!(ent & PT_WRITABLE_MASK)) +			if (!is_writable_pte(ent))  				continue;  			inspect_spte_has_rmap(vcpu->kvm, &pt[i]);  		} @@ -3409,13 +3593,12 @@ static void audit_write_protection(struct kvm_vcpu *vcpu)  		if (sp->unsync)  			continue; -		gfn = unalias_gfn(vcpu->kvm, sp->gfn); -		slot = gfn_to_memslot_unaliased(vcpu->kvm, sp->gfn); +		slot = gfn_to_memslot(vcpu->kvm, sp->gfn);  		rmapp = &slot->rmap[gfn - slot->base_gfn];  		spte = rmap_next(vcpu->kvm, rmapp, NULL);  		while (spte) { -			if (*spte & PT_WRITABLE_MASK) +			if (is_writable_pte(*spte))  				printk(KERN_ERR "%s: (%s) shadow page has "  				"writable mappings: gfn %lx role %x\n",  			       __func__, audit_msg, sp->gfn, diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index 42f07b1bfbc9..3aab0f0930ef 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h @@ -190,7 +190,7 @@ DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_unsync_page,  	TP_ARGS(sp)  ); -DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_zap_page, +DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_prepare_zap_page,  	TP_PROTO(struct kvm_mmu_page *sp),  	TP_ARGS(sp) diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 2331bdc2b549..51ef9097960d 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -7,6 +7,7 @@   * MMU support   *   * Copyright (C) 2006 Qumranet, Inc. + * Copyright 2010 Red Hat, Inc. and/or its affilates.   *   * Authors:   *   Yaniv Kamay  <yaniv@qumranet.com> @@ -118,21 +119,25 @@ static int FNAME(walk_addr)(struct guest_walker *walker,  {  	pt_element_t pte;  	gfn_t table_gfn; -	unsigned index, pt_access, pte_access; +	unsigned index, pt_access, uninitialized_var(pte_access);  	gpa_t pte_gpa; -	int rsvd_fault = 0; +	bool eperm, present, rsvd_fault;  	trace_kvm_mmu_pagetable_walk(addr, write_fault, user_fault,  				     fetch_fault);  walk: +	present = true; +	eperm = rsvd_fault = false;  	walker->level = vcpu->arch.mmu.root_level;  	pte = vcpu->arch.cr3;  #if PTTYPE == 64  	if (!is_long_mode(vcpu)) {  		pte = kvm_pdptr_read(vcpu, (addr >> 30) & 3);  		trace_kvm_mmu_paging_element(pte, walker->level); -		if (!is_present_gpte(pte)) -			goto not_present; +		if (!is_present_gpte(pte)) { +			present = false; +			goto error; +		}  		--walker->level;  	}  #endif @@ -150,37 +155,42 @@ walk:  		walker->table_gfn[walker->level - 1] = table_gfn;  		walker->pte_gpa[walker->level - 1] = pte_gpa; -		if (kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte))) -			goto not_present; +		if (kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte))) { +			present = false; +			break; +		}  		trace_kvm_mmu_paging_element(pte, walker->level); -		if (!is_present_gpte(pte)) -			goto not_present; +		if (!is_present_gpte(pte)) { +			present = false; +			break; +		} -		rsvd_fault = is_rsvd_bits_set(vcpu, pte, walker->level); -		if (rsvd_fault) -			goto access_error; +		if (is_rsvd_bits_set(vcpu, pte, walker->level)) { +			rsvd_fault = true; +			break; +		}  		if (write_fault && !is_writable_pte(pte))  			if (user_fault || is_write_protection(vcpu)) -				goto access_error; +				eperm = true;  		if (user_fault && !(pte & PT_USER_MASK)) -			goto access_error; +			eperm = true;  #if PTTYPE == 64  		if (fetch_fault && (pte & PT64_NX_MASK)) -			goto access_error; +			eperm = true;  #endif -		if (!(pte & PT_ACCESSED_MASK)) { +		if (!eperm && !rsvd_fault && !(pte & PT_ACCESSED_MASK)) {  			trace_kvm_mmu_set_accessed_bit(table_gfn, index,  						       sizeof(pte)); -			mark_page_dirty(vcpu->kvm, table_gfn);  			if (FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn,  			    index, pte, pte|PT_ACCESSED_MASK))  				goto walk; +			mark_page_dirty(vcpu->kvm, table_gfn);  			pte |= PT_ACCESSED_MASK;  		} @@ -213,15 +223,18 @@ walk:  		--walker->level;  	} +	if (!present || eperm || rsvd_fault) +		goto error; +  	if (write_fault && !is_dirty_gpte(pte)) {  		bool ret;  		trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); -		mark_page_dirty(vcpu->kvm, table_gfn);  		ret = FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte,  			    pte|PT_DIRTY_MASK);  		if (ret)  			goto walk; +		mark_page_dirty(vcpu->kvm, table_gfn);  		pte |= PT_DIRTY_MASK;  		walker->ptes[walker->level - 1] = pte;  	} @@ -229,22 +242,18 @@ walk:  	walker->pt_access = pt_access;  	walker->pte_access = pte_access;  	pgprintk("%s: pte %llx pte_access %x pt_access %x\n", -		 __func__, (u64)pte, pt_access, pte_access); +		 __func__, (u64)pte, pte_access, pt_access);  	return 1; -not_present: +error:  	walker->error_code = 0; -	goto err; - -access_error: -	walker->error_code = PFERR_PRESENT_MASK; - -err: +	if (present) +		walker->error_code |= PFERR_PRESENT_MASK;  	if (write_fault)  		walker->error_code |= PFERR_WRITE_MASK;  	if (user_fault)  		walker->error_code |= PFERR_USER_MASK; -	if (fetch_fault) +	if (fetch_fault && is_nx(vcpu))  		walker->error_code |= PFERR_FETCH_MASK;  	if (rsvd_fault)  		walker->error_code |= PFERR_RSVD_MASK; @@ -252,7 +261,7 @@ err:  	return 0;  } -static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, +static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,  			      u64 *spte, const void *pte)  {  	pt_element_t gpte; @@ -263,7 +272,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,  	gpte = *(const pt_element_t *)pte;  	if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) {  		if (!is_present_gpte(gpte)) { -			if (page->unsync) +			if (sp->unsync)  				new_spte = shadow_trap_nonpresent_pte;  			else  				new_spte = shadow_notrap_nonpresent_pte; @@ -272,7 +281,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,  		return;  	}  	pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); -	pte_access = page->role.access & FNAME(gpte_access)(vcpu, gpte); +	pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);  	if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn)  		return;  	pfn = vcpu->arch.update_pte.pfn; @@ -285,11 +294,22 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,  	 * we call mmu_set_spte() with reset_host_protection = true beacuse that  	 * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1).  	 */ -	mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, -		     gpte & PT_DIRTY_MASK, NULL, PT_PAGE_TABLE_LEVEL, +	mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0, +		     is_dirty_gpte(gpte), NULL, PT_PAGE_TABLE_LEVEL,  		     gpte_to_gfn(gpte), pfn, true, true);  } +static bool FNAME(gpte_changed)(struct kvm_vcpu *vcpu, +				struct guest_walker *gw, int level) +{ +	int r; +	pt_element_t curr_pte; + +	r = kvm_read_guest_atomic(vcpu->kvm, gw->pte_gpa[level - 1], +				  &curr_pte, sizeof(curr_pte)); +	return r || curr_pte != gw->ptes[level - 1]; +} +  /*   * Fetch a shadow pte for a specific level in the paging hierarchy.   */ @@ -299,75 +319,86 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,  			 int *ptwrite, pfn_t pfn)  {  	unsigned access = gw->pt_access; -	struct kvm_mmu_page *shadow_page; -	u64 spte, *sptep = NULL; -	int direct; -	gfn_t table_gfn; -	int r; -	int level; -	pt_element_t curr_pte; -	struct kvm_shadow_walk_iterator iterator; +	struct kvm_mmu_page *sp = NULL; +	bool dirty = is_dirty_gpte(gw->ptes[gw->level - 1]); +	int top_level; +	unsigned direct_access; +	struct kvm_shadow_walk_iterator it;  	if (!is_present_gpte(gw->ptes[gw->level - 1]))  		return NULL; -	for_each_shadow_entry(vcpu, addr, iterator) { -		level = iterator.level; -		sptep = iterator.sptep; -		if (iterator.level == hlevel) { -			mmu_set_spte(vcpu, sptep, access, -				     gw->pte_access & access, -				     user_fault, write_fault, -				     gw->ptes[gw->level-1] & PT_DIRTY_MASK, -				     ptwrite, level, -				     gw->gfn, pfn, false, true); -			break; -		} +	direct_access = gw->pt_access & gw->pte_access; +	if (!dirty) +		direct_access &= ~ACC_WRITE_MASK; -		if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep)) -			continue; +	top_level = vcpu->arch.mmu.root_level; +	if (top_level == PT32E_ROOT_LEVEL) +		top_level = PT32_ROOT_LEVEL; +	/* +	 * Verify that the top-level gpte is still there.  Since the page +	 * is a root page, it is either write protected (and cannot be +	 * changed from now on) or it is invalid (in which case, we don't +	 * really care if it changes underneath us after this point). +	 */ +	if (FNAME(gpte_changed)(vcpu, gw, top_level)) +		goto out_gpte_changed; -		if (is_large_pte(*sptep)) { -			rmap_remove(vcpu->kvm, sptep); -			__set_spte(sptep, shadow_trap_nonpresent_pte); -			kvm_flush_remote_tlbs(vcpu->kvm); -		} +	for (shadow_walk_init(&it, vcpu, addr); +	     shadow_walk_okay(&it) && it.level > gw->level; +	     shadow_walk_next(&it)) { +		gfn_t table_gfn; -		if (level <= gw->level) { -			int delta = level - gw->level + 1; -			direct = 1; -			if (!is_dirty_gpte(gw->ptes[level - delta])) -				access &= ~ACC_WRITE_MASK; -			table_gfn = gpte_to_gfn(gw->ptes[level - delta]); -			/* advance table_gfn when emulating 1gb pages with 4k */ -			if (delta == 0) -				table_gfn += PT_INDEX(addr, level); -			access &= gw->pte_access; -		} else { -			direct = 0; -			table_gfn = gw->table_gfn[level - 2]; -		} -		shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1, -					       direct, access, sptep); -		if (!direct) { -			r = kvm_read_guest_atomic(vcpu->kvm, -						  gw->pte_gpa[level - 2], -						  &curr_pte, sizeof(curr_pte)); -			if (r || curr_pte != gw->ptes[level - 2]) { -				kvm_mmu_put_page(shadow_page, sptep); -				kvm_release_pfn_clean(pfn); -				sptep = NULL; -				break; -			} +		drop_large_spte(vcpu, it.sptep); + +		sp = NULL; +		if (!is_shadow_present_pte(*it.sptep)) { +			table_gfn = gw->table_gfn[it.level - 2]; +			sp = kvm_mmu_get_page(vcpu, table_gfn, addr, it.level-1, +					      false, access, it.sptep);  		} -		spte = __pa(shadow_page->spt) -			| PT_PRESENT_MASK | PT_ACCESSED_MASK -			| PT_WRITABLE_MASK | PT_USER_MASK; -		*sptep = spte; +		/* +		 * Verify that the gpte in the page we've just write +		 * protected is still there. +		 */ +		if (FNAME(gpte_changed)(vcpu, gw, it.level - 1)) +			goto out_gpte_changed; + +		if (sp) +			link_shadow_page(it.sptep, sp);  	} -	return sptep; +	for (; +	     shadow_walk_okay(&it) && it.level > hlevel; +	     shadow_walk_next(&it)) { +		gfn_t direct_gfn; + +		validate_direct_spte(vcpu, it.sptep, direct_access); + +		drop_large_spte(vcpu, it.sptep); + +		if (is_shadow_present_pte(*it.sptep)) +			continue; + +		direct_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); + +		sp = kvm_mmu_get_page(vcpu, direct_gfn, addr, it.level-1, +				      true, direct_access, it.sptep); +		link_shadow_page(it.sptep, sp); +	} + +	mmu_set_spte(vcpu, it.sptep, access, gw->pte_access & access, +		     user_fault, write_fault, dirty, ptwrite, it.level, +		     gw->gfn, pfn, false, true); + +	return it.sptep; + +out_gpte_changed: +	if (sp) +		kvm_mmu_put_page(sp, it.sptep); +	kvm_release_pfn_clean(pfn); +	return NULL;  }  /* @@ -431,11 +462,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,  	pfn = gfn_to_pfn(vcpu->kvm, walker.gfn);  	/* mmio */ -	if (is_error_pfn(pfn)) { -		pgprintk("gfn %lx is mmio\n", walker.gfn); -		kvm_release_pfn_clean(pfn); -		return 1; -	} +	if (is_error_pfn(pfn)) +		return kvm_handle_bad_page(vcpu->kvm, walker.gfn, pfn);  	spin_lock(&vcpu->kvm->mmu_lock);  	if (mmu_notifier_retry(vcpu, mmu_seq)) @@ -443,6 +471,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,  	kvm_mmu_free_some_pages(vcpu);  	sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,  			     level, &write_pt, pfn); +	(void)sptep;  	pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__,  		 sptep, *sptep, write_pt); @@ -464,6 +493,7 @@ out_unlock:  static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)  {  	struct kvm_shadow_walk_iterator iterator; +	struct kvm_mmu_page *sp;  	gpa_t pte_gpa = -1;  	int level;  	u64 *sptep; @@ -475,10 +505,13 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)  		level = iterator.level;  		sptep = iterator.sptep; +		sp = page_header(__pa(sptep));  		if (is_last_spte(*sptep, level)) { -			struct kvm_mmu_page *sp = page_header(__pa(sptep));  			int offset, shift; +			if (!sp->unsync) +				break; +  			shift = PAGE_SHIFT -  				  (PT_LEVEL_BITS - PT64_LEVEL_BITS) * level;  			offset = sp->role.quadrant << shift; @@ -487,16 +520,17 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)  			pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);  			if (is_shadow_present_pte(*sptep)) { -				rmap_remove(vcpu->kvm, sptep);  				if (is_large_pte(*sptep))  					--vcpu->kvm->stat.lpages; +				drop_spte(vcpu->kvm, sptep, +					  shadow_trap_nonpresent_pte);  				need_flush = 1; -			} -			__set_spte(sptep, shadow_trap_nonpresent_pte); +			} else +				__set_spte(sptep, shadow_trap_nonpresent_pte);  			break;  		} -		if (!is_shadow_present_pte(*sptep)) +		if (!is_shadow_present_pte(*sptep) || !sp->unsync_children)  			break;  	} @@ -570,9 +604,9 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,   * Using the cached information from sp->gfns is safe because:   * - The spte has a reference to the struct page, so the pfn for a given gfn   *   can't change unless all sptes pointing to it are nuked first. - * - Alias changes zap the entire shadow cache.   */ -static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) +static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, +			    bool clear_unsync)  {  	int i, offset, nr_present;  	bool reset_host_protection; @@ -580,6 +614,9 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)  	offset = nr_present = 0; +	/* direct kvm_mmu_page can not be unsync. */ +	BUG_ON(sp->role.direct); +  	if (PTTYPE == 32)  		offset = sp->role.quadrant << PT64_LEVEL_BITS; @@ -589,7 +626,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)  		unsigned pte_access;  		pt_element_t gpte;  		gpa_t pte_gpa; -		gfn_t gfn = sp->gfns[i]; +		gfn_t gfn;  		if (!is_shadow_present_pte(sp->spt[i]))  			continue; @@ -600,16 +637,17 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)  					  sizeof(pt_element_t)))  			return -EINVAL; -		if (gpte_to_gfn(gpte) != gfn || !is_present_gpte(gpte) || -		    !(gpte & PT_ACCESSED_MASK)) { +		gfn = gpte_to_gfn(gpte); +		if (is_rsvd_bits_set(vcpu, gpte, PT_PAGE_TABLE_LEVEL) +		      || gfn != sp->gfns[i] || !is_present_gpte(gpte) +		      || !(gpte & PT_ACCESSED_MASK)) {  			u64 nonpresent; -			rmap_remove(vcpu->kvm, &sp->spt[i]); -			if (is_present_gpte(gpte)) +			if (is_present_gpte(gpte) || !clear_unsync)  				nonpresent = shadow_trap_nonpresent_pte;  			else  				nonpresent = shadow_notrap_nonpresent_pte; -			__set_spte(&sp->spt[i], nonpresent); +			drop_spte(vcpu->kvm, &sp->spt[i], nonpresent);  			continue;  		} diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index ce438e0fdd26..bc5b9b8d4a33 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -4,6 +4,7 @@   * AMD SVM support   *   * Copyright (C) 2006 Qumranet, Inc. + * Copyright 2010 Red Hat, Inc. and/or its affilates.   *   * Authors:   *   Yaniv Kamay  <yaniv@qumranet.com> @@ -130,7 +131,7 @@ static struct svm_direct_access_msrs {  	u32 index;   /* Index of the MSR */  	bool always; /* True if intercept is always on */  } direct_access_msrs[] = { -	{ .index = MSR_K6_STAR,				.always = true  }, +	{ .index = MSR_STAR,				.always = true  },  	{ .index = MSR_IA32_SYSENTER_CS,		.always = true  },  #ifdef CONFIG_X86_64  	{ .index = MSR_GS_BASE,				.always = true  }, @@ -285,11 +286,11 @@ static inline void flush_guest_tlb(struct kvm_vcpu *vcpu)  static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)  { +	vcpu->arch.efer = efer;  	if (!npt_enabled && !(efer & EFER_LMA))  		efer &= ~EFER_LME;  	to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME; -	vcpu->arch.efer = efer;  }  static int is_external_interrupt(u32 info) @@ -383,8 +384,7 @@ static void svm_init_erratum_383(void)  	int err;  	u64 val; -	/* Only Fam10h is affected */ -	if (boot_cpu_data.x86 != 0x10) +	if (!cpu_has_amd_erratum(amd_erratum_383))  		return;  	/* Use _safe variants to not break nested virtualization */ @@ -640,7 +640,7 @@ static __init int svm_hardware_setup(void)  	if (nested) {  		printk(KERN_INFO "kvm: Nested Virtualization enabled\n"); -		kvm_enable_efer_bits(EFER_SVME); +		kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);  	}  	for_each_possible_cpu(cpu) { @@ -806,7 +806,7 @@ static void init_vmcb(struct vcpu_svm *svm)  	 * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0.  	 */  	svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; -	kvm_set_cr0(&svm->vcpu, svm->vcpu.arch.cr0); +	(void)kvm_set_cr0(&svm->vcpu, svm->vcpu.arch.cr0);  	save->cr4 = X86_CR4_PAE;  	/* rdx = ?? */ @@ -903,13 +903,18 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)  	svm->asid_generation = 0;  	init_vmcb(svm); -	fx_init(&svm->vcpu); +	err = fx_init(&svm->vcpu); +	if (err) +		goto free_page4; +  	svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;  	if (kvm_vcpu_is_bsp(&svm->vcpu))  		svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;  	return &svm->vcpu; +free_page4: +	__free_page(hsave_page);  free_page3:  	__free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER);  free_page2: @@ -1488,7 +1493,7 @@ static void svm_handle_mce(struct vcpu_svm *svm)  		 */  		pr_err("KVM: Guest triggered AMD Erratum 383\n"); -		set_bit(KVM_REQ_TRIPLE_FAULT, &svm->vcpu.requests); +		kvm_make_request(KVM_REQ_TRIPLE_FAULT, &svm->vcpu);  		return;  	} @@ -1535,7 +1540,7 @@ static int io_interception(struct vcpu_svm *svm)  	string = (io_info & SVM_IOIO_STR_MASK) != 0;  	in = (io_info & SVM_IOIO_TYPE_MASK) != 0;  	if (string || in) -		return !(emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO); +		return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE;  	port = io_info >> 16;  	size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; @@ -1957,7 +1962,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)  		svm->vmcb->save.cr3 = hsave->save.cr3;  		svm->vcpu.arch.cr3 = hsave->save.cr3;  	} else { -		kvm_set_cr3(&svm->vcpu, hsave->save.cr3); +		(void)kvm_set_cr3(&svm->vcpu, hsave->save.cr3);  	}  	kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax);  	kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp); @@ -2080,7 +2085,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)  		svm->vmcb->save.cr3 = nested_vmcb->save.cr3;  		svm->vcpu.arch.cr3 = nested_vmcb->save.cr3;  	} else -		kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3); +		(void)kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);  	/* Guest paging mode is active - reset mmu */  	kvm_mmu_reset_context(&svm->vcpu); @@ -2386,16 +2391,12 @@ static int iret_interception(struct vcpu_svm *svm)  static int invlpg_interception(struct vcpu_svm *svm)  { -	if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE) -		pr_unimpl(&svm->vcpu, "%s: failed\n", __func__); -	return 1; +	return emulate_instruction(&svm->vcpu, 0, 0, 0) == EMULATE_DONE;  }  static int emulate_on_interception(struct vcpu_svm *svm)  { -	if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE) -		pr_unimpl(&svm->vcpu, "%s: failed\n", __func__); -	return 1; +	return emulate_instruction(&svm->vcpu, 0, 0, 0) == EMULATE_DONE;  }  static int cr8_write_interception(struct vcpu_svm *svm) @@ -2431,7 +2432,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)  		*data = tsc_offset + native_read_tsc();  		break;  	} -	case MSR_K6_STAR: +	case MSR_STAR:  		*data = svm->vmcb->save.star;  		break;  #ifdef CONFIG_X86_64 @@ -2555,7 +2556,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)  		break;  	} -	case MSR_K6_STAR: +	case MSR_STAR:  		svm->vmcb->save.star = data;  		break;  #ifdef CONFIG_X86_64 @@ -2726,6 +2727,99 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {  	[SVM_EXIT_NPF]				= pf_interception,  }; +void dump_vmcb(struct kvm_vcpu *vcpu) +{ +	struct vcpu_svm *svm = to_svm(vcpu); +	struct vmcb_control_area *control = &svm->vmcb->control; +	struct vmcb_save_area *save = &svm->vmcb->save; + +	pr_err("VMCB Control Area:\n"); +	pr_err("cr_read:            %04x\n", control->intercept_cr_read); +	pr_err("cr_write:           %04x\n", control->intercept_cr_write); +	pr_err("dr_read:            %04x\n", control->intercept_dr_read); +	pr_err("dr_write:           %04x\n", control->intercept_dr_write); +	pr_err("exceptions:         %08x\n", control->intercept_exceptions); +	pr_err("intercepts:         %016llx\n", control->intercept); +	pr_err("pause filter count: %d\n", control->pause_filter_count); +	pr_err("iopm_base_pa:       %016llx\n", control->iopm_base_pa); +	pr_err("msrpm_base_pa:      %016llx\n", control->msrpm_base_pa); +	pr_err("tsc_offset:         %016llx\n", control->tsc_offset); +	pr_err("asid:               %d\n", control->asid); +	pr_err("tlb_ctl:            %d\n", control->tlb_ctl); +	pr_err("int_ctl:            %08x\n", control->int_ctl); +	pr_err("int_vector:         %08x\n", control->int_vector); +	pr_err("int_state:          %08x\n", control->int_state); +	pr_err("exit_code:          %08x\n", control->exit_code); +	pr_err("exit_info1:         %016llx\n", control->exit_info_1); +	pr_err("exit_info2:         %016llx\n", control->exit_info_2); +	pr_err("exit_int_info:      %08x\n", control->exit_int_info); +	pr_err("exit_int_info_err:  %08x\n", control->exit_int_info_err); +	pr_err("nested_ctl:         %lld\n", control->nested_ctl); +	pr_err("nested_cr3:         %016llx\n", control->nested_cr3); +	pr_err("event_inj:          %08x\n", control->event_inj); +	pr_err("event_inj_err:      %08x\n", control->event_inj_err); +	pr_err("lbr_ctl:            %lld\n", control->lbr_ctl); +	pr_err("next_rip:           %016llx\n", control->next_rip); +	pr_err("VMCB State Save Area:\n"); +	pr_err("es:   s: %04x a: %04x l: %08x b: %016llx\n", +		save->es.selector, save->es.attrib, +		save->es.limit, save->es.base); +	pr_err("cs:   s: %04x a: %04x l: %08x b: %016llx\n", +		save->cs.selector, save->cs.attrib, +		save->cs.limit, save->cs.base); +	pr_err("ss:   s: %04x a: %04x l: %08x b: %016llx\n", +		save->ss.selector, save->ss.attrib, +		save->ss.limit, save->ss.base); +	pr_err("ds:   s: %04x a: %04x l: %08x b: %016llx\n", +		save->ds.selector, save->ds.attrib, +		save->ds.limit, save->ds.base); +	pr_err("fs:   s: %04x a: %04x l: %08x b: %016llx\n", +		save->fs.selector, save->fs.attrib, +		save->fs.limit, save->fs.base); +	pr_err("gs:   s: %04x a: %04x l: %08x b: %016llx\n", +		save->gs.selector, save->gs.attrib, +		save->gs.limit, save->gs.base); +	pr_err("gdtr: s: %04x a: %04x l: %08x b: %016llx\n", +		save->gdtr.selector, save->gdtr.attrib, +		save->gdtr.limit, save->gdtr.base); +	pr_err("ldtr: s: %04x a: %04x l: %08x b: %016llx\n", +		save->ldtr.selector, save->ldtr.attrib, +		save->ldtr.limit, save->ldtr.base); +	pr_err("idtr: s: %04x a: %04x l: %08x b: %016llx\n", +		save->idtr.selector, save->idtr.attrib, +		save->idtr.limit, save->idtr.base); +	pr_err("tr:   s: %04x a: %04x l: %08x b: %016llx\n", +		save->tr.selector, save->tr.attrib, +		save->tr.limit, save->tr.base); +	pr_err("cpl:            %d                efer:         %016llx\n", +		save->cpl, save->efer); +	pr_err("cr0:            %016llx cr2:          %016llx\n", +		save->cr0, save->cr2); +	pr_err("cr3:            %016llx cr4:          %016llx\n", +		save->cr3, save->cr4); +	pr_err("dr6:            %016llx dr7:          %016llx\n", +		save->dr6, save->dr7); +	pr_err("rip:            %016llx rflags:       %016llx\n", +		save->rip, save->rflags); +	pr_err("rsp:            %016llx rax:          %016llx\n", +		save->rsp, save->rax); +	pr_err("star:           %016llx lstar:        %016llx\n", +		save->star, save->lstar); +	pr_err("cstar:          %016llx sfmask:       %016llx\n", +		save->cstar, save->sfmask); +	pr_err("kernel_gs_base: %016llx sysenter_cs:  %016llx\n", +		save->kernel_gs_base, save->sysenter_cs); +	pr_err("sysenter_esp:   %016llx sysenter_eip: %016llx\n", +		save->sysenter_esp, save->sysenter_eip); +	pr_err("gpat:           %016llx dbgctl:       %016llx\n", +		save->g_pat, save->dbgctl); +	pr_err("br_from:        %016llx br_to:        %016llx\n", +		save->br_from, save->br_to); +	pr_err("excp_from:      %016llx excp_to:      %016llx\n", +		save->last_excp_from, save->last_excp_to); + +} +  static int handle_exit(struct kvm_vcpu *vcpu)  {  	struct vcpu_svm *svm = to_svm(vcpu); @@ -2770,6 +2864,8 @@ static int handle_exit(struct kvm_vcpu *vcpu)  		kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;  		kvm_run->fail_entry.hardware_entry_failure_reason  			= svm->vmcb->control.exit_code; +		pr_err("KVM: FAILED VMRUN WITH VMCB:\n"); +		dump_vmcb(vcpu);  		return 0;  	} @@ -2826,9 +2922,6 @@ static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)  {  	struct vmcb_control_area *control; -	trace_kvm_inj_virq(irq); - -	++svm->vcpu.stat.irq_injections;  	control = &svm->vmcb->control;  	control->int_vector = irq;  	control->int_ctl &= ~V_INTR_PRIO_MASK; @@ -2842,6 +2935,9 @@ static void svm_set_irq(struct kvm_vcpu *vcpu)  	BUG_ON(!(gif_set(svm))); +	trace_kvm_inj_virq(vcpu->arch.interrupt.nr); +	++vcpu->stat.irq_injections; +  	svm->vmcb->control.event_inj = vcpu->arch.interrupt.nr |  		SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;  } @@ -3327,6 +3423,11 @@ static bool svm_rdtscp_supported(void)  	return false;  } +static bool svm_has_wbinvd_exit(void) +{ +	return true; +} +  static void svm_fpu_deactivate(struct kvm_vcpu *vcpu)  {  	struct vcpu_svm *svm = to_svm(vcpu); @@ -3411,6 +3512,8 @@ static struct kvm_x86_ops svm_x86_ops = {  	.rdtscp_supported = svm_rdtscp_supported,  	.set_supported_cpuid = svm_set_supported_cpuid, + +	.has_wbinvd_exit = svm_has_wbinvd_exit,  };  static int __init svm_init(void) diff --git a/arch/x86/kvm/timer.c b/arch/x86/kvm/timer.c index 4ddadb1a5ffe..e16a0dbe74d8 100644 --- a/arch/x86/kvm/timer.c +++ b/arch/x86/kvm/timer.c @@ -1,3 +1,17 @@ +/* + * Kernel-based Virtual Machine driver for Linux + * + * This module enables machines with Intel VT-x extensions to run virtual + * machines without emulation or binary translation. + * + * timer support + * + * Copyright 2010 Red Hat, Inc. and/or its affilates. + * + * This work is licensed under the terms of the GNU GPL, version 2.  See + * the COPYING file in the top-level directory. + */ +  #include <linux/kvm_host.h>  #include <linux/kvm.h>  #include <linux/hrtimer.h> @@ -18,7 +32,7 @@ static int __kvm_timer_fn(struct kvm_vcpu *vcpu, struct kvm_timer *ktimer)  	if (ktimer->reinject || !atomic_read(&ktimer->pending)) {  		atomic_inc(&ktimer->pending);  		/* FIXME: this code should not know anything about vcpus */ -		set_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests); +		kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);  	}  	if (waitqueue_active(q)) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ee03679efe78..49b25eee25ac 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5,6 +5,7 @@   * machines without emulation or binary translation.   *   * Copyright (C) 2006 Qumranet, Inc. + * Copyright 2010 Red Hat, Inc. and/or its affilates.   *   * Authors:   *   Avi Kivity   <avi@qumranet.com> @@ -36,6 +37,8 @@  #include <asm/vmx.h>  #include <asm/virtext.h>  #include <asm/mce.h> +#include <asm/i387.h> +#include <asm/xcr.h>  #include "trace.h" @@ -63,6 +66,9 @@ module_param_named(unrestricted_guest,  static int __read_mostly emulate_invalid_guest_state = 0;  module_param(emulate_invalid_guest_state, bool, S_IRUGO); +static int __read_mostly vmm_exclusive = 1; +module_param(vmm_exclusive, bool, S_IRUGO); +  #define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST				\  	(X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD)  #define KVM_GUEST_CR0_MASK						\ @@ -173,10 +179,13 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)  static int init_rmode(struct kvm *kvm);  static u64 construct_eptp(unsigned long root_hpa); +static void kvm_cpu_vmxon(u64 addr); +static void kvm_cpu_vmxoff(void);  static DEFINE_PER_CPU(struct vmcs *, vmxarea);  static DEFINE_PER_CPU(struct vmcs *, current_vmcs);  static DEFINE_PER_CPU(struct list_head, vcpus_on_cpu); +static DEFINE_PER_CPU(struct desc_ptr, host_gdt);  static unsigned long *vmx_io_bitmap_a;  static unsigned long *vmx_io_bitmap_b; @@ -231,14 +240,14 @@ static u64 host_efer;  static void ept_save_pdptrs(struct kvm_vcpu *vcpu);  /* - * Keep MSR_K6_STAR at the end, as setup_msrs() will try to optimize it + * Keep MSR_STAR at the end, as setup_msrs() will try to optimize it   * away by decrementing the array size.   */  static const u32 vmx_msr_index[] = {  #ifdef CONFIG_X86_64  	MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,  #endif -	MSR_EFER, MSR_TSC_AUX, MSR_K6_STAR, +	MSR_EFER, MSR_TSC_AUX, MSR_STAR,  };  #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) @@ -334,6 +343,11 @@ static inline bool cpu_has_vmx_ept_1g_page(void)  	return vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT;  } +static inline bool cpu_has_vmx_ept_4levels(void) +{ +	return vmx_capability.ept & VMX_EPT_PAGE_WALK_4_BIT; +} +  static inline bool cpu_has_vmx_invept_individual_addr(void)  {  	return vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT; @@ -349,6 +363,16 @@ static inline bool cpu_has_vmx_invept_global(void)  	return vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT;  } +static inline bool cpu_has_vmx_invvpid_single(void) +{ +	return vmx_capability.vpid & VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT; +} + +static inline bool cpu_has_vmx_invvpid_global(void) +{ +	return vmx_capability.vpid & VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT; +} +  static inline bool cpu_has_vmx_ept(void)  {  	return vmcs_config.cpu_based_2nd_exec_ctrl & @@ -389,6 +413,12 @@ static inline bool cpu_has_virtual_nmis(void)  	return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;  } +static inline bool cpu_has_vmx_wbinvd_exit(void) +{ +	return vmcs_config.cpu_based_2nd_exec_ctrl & +		SECONDARY_EXEC_WBINVD_EXITING; +} +  static inline bool report_flexpriority(void)  {  	return flexpriority_enabled; @@ -453,6 +483,19 @@ static void vmcs_clear(struct vmcs *vmcs)  		       vmcs, phys_addr);  } +static void vmcs_load(struct vmcs *vmcs) +{ +	u64 phys_addr = __pa(vmcs); +	u8 error; + +	asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0" +			: "=g"(error) : "a"(&phys_addr), "m"(phys_addr) +			: "cc", "memory"); +	if (error) +		printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n", +		       vmcs, phys_addr); +} +  static void __vcpu_clear(void *arg)  {  	struct vcpu_vmx *vmx = arg; @@ -475,12 +518,27 @@ static void vcpu_clear(struct vcpu_vmx *vmx)  	smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear, vmx, 1);  } -static inline void vpid_sync_vcpu_all(struct vcpu_vmx *vmx) +static inline void vpid_sync_vcpu_single(struct vcpu_vmx *vmx)  {  	if (vmx->vpid == 0)  		return; -	__invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0); +	if (cpu_has_vmx_invvpid_single()) +		__invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0); +} + +static inline void vpid_sync_vcpu_global(void) +{ +	if (cpu_has_vmx_invvpid_global()) +		__invvpid(VMX_VPID_EXTENT_ALL_CONTEXT, 0, 0); +} + +static inline void vpid_sync_context(struct vcpu_vmx *vmx) +{ +	if (cpu_has_vmx_invvpid_single()) +		vpid_sync_vcpu_single(vmx); +	else +		vpid_sync_vcpu_global();  }  static inline void ept_sync_global(void) @@ -812,6 +870,9 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)  		wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);  	}  #endif +	if (current_thread_info()->status & TS_USEDFPU) +		clts(); +	load_gdt(&__get_cpu_var(host_gdt));  }  static void vmx_load_host_state(struct vcpu_vmx *vmx) @@ -828,35 +889,30 @@ static void vmx_load_host_state(struct vcpu_vmx *vmx)  static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)  {  	struct vcpu_vmx *vmx = to_vmx(vcpu); -	u64 phys_addr = __pa(vmx->vmcs);  	u64 tsc_this, delta, new_offset; +	u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); -	if (vcpu->cpu != cpu) { +	if (!vmm_exclusive) +		kvm_cpu_vmxon(phys_addr); +	else if (vcpu->cpu != cpu)  		vcpu_clear(vmx); -		kvm_migrate_timers(vcpu); -		set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests); -		local_irq_disable(); -		list_add(&vmx->local_vcpus_link, -			 &per_cpu(vcpus_on_cpu, cpu)); -		local_irq_enable(); -	}  	if (per_cpu(current_vmcs, cpu) != vmx->vmcs) { -		u8 error; -  		per_cpu(current_vmcs, cpu) = vmx->vmcs; -		asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0" -			      : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) -			      : "cc"); -		if (error) -			printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n", -			       vmx->vmcs, phys_addr); +		vmcs_load(vmx->vmcs);  	}  	if (vcpu->cpu != cpu) {  		struct desc_ptr dt;  		unsigned long sysenter_esp; +		kvm_migrate_timers(vcpu); +		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); +		local_irq_disable(); +		list_add(&vmx->local_vcpus_link, +			 &per_cpu(vcpus_on_cpu, cpu)); +		local_irq_enable(); +  		vcpu->cpu = cpu;  		/*  		 * Linux uses per-cpu TSS and GDT, so set these when switching @@ -884,6 +940,10 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)  static void vmx_vcpu_put(struct kvm_vcpu *vcpu)  {  	__vmx_load_host_state(to_vmx(vcpu)); +	if (!vmm_exclusive) { +		__vcpu_clear(to_vmx(vcpu)); +		kvm_cpu_vmxoff(); +	}  }  static void vmx_fpu_activate(struct kvm_vcpu *vcpu) @@ -1057,10 +1117,10 @@ static void setup_msrs(struct vcpu_vmx *vmx)  		if (index >= 0 && vmx->rdtscp_enabled)  			move_msr_up(vmx, index, save_nmsrs++);  		/* -		 * MSR_K6_STAR is only needed on long mode guests, and only +		 * MSR_STAR is only needed on long mode guests, and only  		 * if efer.sce is enabled.  		 */ -		index = __find_msr_index(vmx, MSR_K6_STAR); +		index = __find_msr_index(vmx, MSR_STAR);  		if ((index >= 0) && (vmx->vcpu.arch.efer & EFER_SCE))  			move_msr_up(vmx, index, save_nmsrs++);  	} @@ -1286,6 +1346,13 @@ static __init int vmx_disabled_by_bios(void)  	/* locked but not enabled */  } +static void kvm_cpu_vmxon(u64 addr) +{ +	asm volatile (ASM_VMX_VMXON_RAX +			: : "a"(&addr), "m"(addr) +			: "memory", "cc"); +} +  static int hardware_enable(void *garbage)  {  	int cpu = raw_smp_processor_id(); @@ -1308,11 +1375,13 @@ static int hardware_enable(void *garbage)  		wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits);  	}  	write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */ -	asm volatile (ASM_VMX_VMXON_RAX -		      : : "a"(&phys_addr), "m"(phys_addr) -		      : "memory", "cc"); -	ept_sync_global(); +	if (vmm_exclusive) { +		kvm_cpu_vmxon(phys_addr); +		ept_sync_global(); +	} + +	store_gdt(&__get_cpu_var(host_gdt));  	return 0;  } @@ -1334,13 +1403,15 @@ static void vmclear_local_vcpus(void)  static void kvm_cpu_vmxoff(void)  {  	asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); -	write_cr4(read_cr4() & ~X86_CR4_VMXE);  }  static void hardware_disable(void *garbage)  { -	vmclear_local_vcpus(); -	kvm_cpu_vmxoff(); +	if (vmm_exclusive) { +		vmclear_local_vcpus(); +		kvm_cpu_vmxoff(); +	} +	write_cr4(read_cr4() & ~X86_CR4_VMXE);  }  static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, @@ -1539,7 +1610,8 @@ static __init int hardware_setup(void)  	if (!cpu_has_vmx_vpid())  		enable_vpid = 0; -	if (!cpu_has_vmx_ept()) { +	if (!cpu_has_vmx_ept() || +	    !cpu_has_vmx_ept_4levels()) {  		enable_ept = 0;  		enable_unrestricted_guest = 0;  	} @@ -1628,7 +1700,7 @@ static gva_t rmode_tss_base(struct kvm *kvm)  		gfn_t base_gfn;  		slots = kvm_memslots(kvm); -		base_gfn = kvm->memslots->memslots[0].base_gfn + +		base_gfn = slots->memslots[0].base_gfn +  				 kvm->memslots->memslots[0].npages - 3;  		return base_gfn << PAGE_SHIFT;  	} @@ -1759,9 +1831,12 @@ static void exit_lmode(struct kvm_vcpu *vcpu)  static void vmx_flush_tlb(struct kvm_vcpu *vcpu)  { -	vpid_sync_vcpu_all(to_vmx(vcpu)); -	if (enable_ept) +	vpid_sync_context(to_vmx(vcpu)); +	if (enable_ept) { +		if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) +			return;  		ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa)); +	}  }  static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) @@ -2507,7 +2582,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)  	vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, !!bypass_guest_pf);  	vmcs_write32(CR3_TARGET_COUNT, 0);           /* 22.2.1 */ -	vmcs_writel(HOST_CR0, read_cr0());  /* 22.2.3 */ +	vmcs_writel(HOST_CR0, read_cr0() | X86_CR0_TS);  /* 22.2.3 */  	vmcs_writel(HOST_CR4, read_cr4());  /* 22.2.3, 22.2.5 */  	vmcs_writel(HOST_CR3, read_cr3());  /* 22.2.3  FIXME: shadow tables */ @@ -2599,21 +2674,27 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)  static int init_rmode(struct kvm *kvm)  { +	int idx, ret = 0; + +	idx = srcu_read_lock(&kvm->srcu);  	if (!init_rmode_tss(kvm)) -		return 0; +		goto exit;  	if (!init_rmode_identity_map(kvm)) -		return 0; -	return 1; +		goto exit; + +	ret = 1; +exit: +	srcu_read_unlock(&kvm->srcu, idx); +	return ret;  }  static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)  {  	struct vcpu_vmx *vmx = to_vmx(vcpu);  	u64 msr; -	int ret, idx; +	int ret;  	vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)); -	idx = srcu_read_lock(&vcpu->kvm->srcu);  	if (!init_rmode(vmx->vcpu.kvm)) {  		ret = -ENOMEM;  		goto out; @@ -2630,7 +2711,9 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)  		msr |= MSR_IA32_APICBASE_BSP;  	kvm_set_apic_base(&vmx->vcpu, msr); -	fx_init(&vmx->vcpu); +	ret = fx_init(&vmx->vcpu); +	if (ret != 0) +		goto out;  	seg_setup(VCPU_SREG_CS);  	/* @@ -2713,7 +2796,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)  	vmx_fpu_activate(&vmx->vcpu);  	update_exception_bitmap(&vmx->vcpu); -	vpid_sync_vcpu_all(vmx); +	vpid_sync_context(vmx);  	ret = 0; @@ -2721,7 +2804,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)  	vmx->emulation_required = 0;  out: -	srcu_read_unlock(&vcpu->kvm->srcu, idx);  	return ret;  } @@ -2826,9 +2908,7 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)  {  	if (!cpu_has_virtual_nmis())  		return to_vmx(vcpu)->soft_vnmi_blocked; -	else -		return !!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & -			  GUEST_INTR_STATE_NMI); +	return vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)	& GUEST_INTR_STATE_NMI;  }  static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) @@ -3070,7 +3150,7 @@ static int handle_io(struct kvm_vcpu *vcpu)  	++vcpu->stat.io_exits;  	if (string || in) -		return !(emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO); +		return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE;  	port = exit_qualification >> 16;  	size = (exit_qualification & 7) + 1; @@ -3090,11 +3170,20 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)  	hypercall[2] = 0xc1;  } +static void complete_insn_gp(struct kvm_vcpu *vcpu, int err) +{ +	if (err) +		kvm_inject_gp(vcpu, 0); +	else +		skip_emulated_instruction(vcpu); +} +  static int handle_cr(struct kvm_vcpu *vcpu)  {  	unsigned long exit_qualification, val;  	int cr;  	int reg; +	int err;  	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);  	cr = exit_qualification & 15; @@ -3105,16 +3194,16 @@ static int handle_cr(struct kvm_vcpu *vcpu)  		trace_kvm_cr_write(cr, val);  		switch (cr) {  		case 0: -			kvm_set_cr0(vcpu, val); -			skip_emulated_instruction(vcpu); +			err = kvm_set_cr0(vcpu, val); +			complete_insn_gp(vcpu, err);  			return 1;  		case 3: -			kvm_set_cr3(vcpu, val); -			skip_emulated_instruction(vcpu); +			err = kvm_set_cr3(vcpu, val); +			complete_insn_gp(vcpu, err);  			return 1;  		case 4: -			kvm_set_cr4(vcpu, val); -			skip_emulated_instruction(vcpu); +			err = kvm_set_cr4(vcpu, val); +			complete_insn_gp(vcpu, err);  			return 1;  		case 8: {  				u8 cr8_prev = kvm_get_cr8(vcpu); @@ -3321,30 +3410,25 @@ static int handle_invlpg(struct kvm_vcpu *vcpu)  static int handle_wbinvd(struct kvm_vcpu *vcpu)  {  	skip_emulated_instruction(vcpu); -	/* TODO: Add support for VT-d/pass-through device */ +	kvm_emulate_wbinvd(vcpu);  	return 1;  } -static int handle_apic_access(struct kvm_vcpu *vcpu) +static int handle_xsetbv(struct kvm_vcpu *vcpu)  { -	unsigned long exit_qualification; -	enum emulation_result er; -	unsigned long offset; +	u64 new_bv = kvm_read_edx_eax(vcpu); +	u32 index = kvm_register_read(vcpu, VCPU_REGS_RCX); -	exit_qualification = vmcs_readl(EXIT_QUALIFICATION); -	offset = exit_qualification & 0xffful; - -	er = emulate_instruction(vcpu, 0, 0, 0); - -	if (er !=  EMULATE_DONE) { -		printk(KERN_ERR -		       "Fail to handle apic access vmexit! Offset is 0x%lx\n", -		       offset); -		return -ENOEXEC; -	} +	if (kvm_set_xcr(vcpu, index, new_bv) == 0) +		skip_emulated_instruction(vcpu);  	return 1;  } +static int handle_apic_access(struct kvm_vcpu *vcpu) +{ +	return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE; +} +  static int handle_task_switch(struct kvm_vcpu *vcpu)  {  	struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -3554,13 +3638,8 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)  			goto out;  		} -		if (err != EMULATE_DONE) { -			vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; -			vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; -			vcpu->run->internal.ndata = 0; -			ret = 0; -			goto out; -		} +		if (err != EMULATE_DONE) +			return 0;  		if (signal_pending(current))  			goto out; @@ -3623,6 +3702,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {  	[EXIT_REASON_TPR_BELOW_THRESHOLD]     = handle_tpr_below_threshold,  	[EXIT_REASON_APIC_ACCESS]             = handle_apic_access,  	[EXIT_REASON_WBINVD]                  = handle_wbinvd, +	[EXIT_REASON_XSETBV]                  = handle_xsetbv,  	[EXIT_REASON_TASK_SWITCH]             = handle_task_switch,  	[EXIT_REASON_MCE_DURING_VMENTRY]      = handle_machine_check,  	[EXIT_REASON_EPT_VIOLATION]	      = handle_ept_violation, @@ -3656,6 +3736,13 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)  	if (enable_ept && is_paging(vcpu))  		vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); +	if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { +		vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; +		vcpu->run->fail_entry.hardware_entry_failure_reason +			= exit_reason; +		return 0; +	} +  	if (unlikely(vmx->fail)) {  		vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;  		vcpu->run->fail_entry.hardware_entry_failure_reason @@ -3861,11 +3948,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)  	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)  		vmx_set_interrupt_shadow(vcpu, 0); -	/* -	 * Loading guest fpu may have cleared host cr0.ts -	 */ -	vmcs_writel(HOST_CR0, read_cr0()); -  	asm(  		/* Store host registers */  		"push %%"R"dx; push %%"R"bp;" @@ -4001,6 +4083,19 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)  	kmem_cache_free(kvm_vcpu_cache, vmx);  } +static inline void vmcs_init(struct vmcs *vmcs) +{ +	u64 phys_addr = __pa(per_cpu(vmxarea, raw_smp_processor_id())); + +	if (!vmm_exclusive) +		kvm_cpu_vmxon(phys_addr); + +	vmcs_clear(vmcs); + +	if (!vmm_exclusive) +		kvm_cpu_vmxoff(); +} +  static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)  {  	int err; @@ -4026,7 +4121,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)  	if (!vmx->vmcs)  		goto free_msrs; -	vmcs_clear(vmx->vmcs); +	vmcs_init(vmx->vmcs);  	cpu = get_cpu();  	vmx_vcpu_load(&vmx->vcpu, cpu); @@ -4265,6 +4360,8 @@ static struct kvm_x86_ops vmx_x86_ops = {  	.rdtscp_supported = vmx_rdtscp_supported,  	.set_supported_cpuid = vmx_set_supported_cpuid, + +	.has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,  };  static int __init vmx_init(void) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7fa89c39c64f..25f19078b321 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6,6 +6,7 @@   * Copyright (C) 2006 Qumranet, Inc.   * Copyright (C) 2008 Qumranet, Inc.   * Copyright IBM Corporation, 2008 + * Copyright 2010 Red Hat, Inc. and/or its affilates.   *   * Authors:   *   Avi Kivity   <avi@qumranet.com> @@ -41,17 +42,19 @@  #include <linux/srcu.h>  #include <linux/slab.h>  #include <linux/perf_event.h> +#include <linux/uaccess.h>  #include <trace/events/kvm.h>  #define CREATE_TRACE_POINTS  #include "trace.h"  #include <asm/debugreg.h> -#include <asm/uaccess.h>  #include <asm/msr.h>  #include <asm/desc.h>  #include <asm/mtrr.h>  #include <asm/mce.h> +#include <asm/i387.h> +#include <asm/xcr.h>  #define MAX_IO_MSRS 256  #define CR0_RESERVED_BITS						\ @@ -62,6 +65,7 @@  	(~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\  			  | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE	\  			  | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR	\ +			  | X86_CR4_OSXSAVE \  			  | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))  #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) @@ -147,6 +151,13 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {  	{ NULL }  }; +u64 __read_mostly host_xcr0; + +static inline u32 bit(int bitno) +{ +	return 1 << (bitno & 31); +} +  static void kvm_on_user_return(struct user_return_notifier *urn)  {  	unsigned slot; @@ -285,7 +296,7 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,  	prev_nr = vcpu->arch.exception.nr;  	if (prev_nr == DF_VECTOR) {  		/* triple fault -> shutdown */ -		set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); +		kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);  		return;  	}  	class1 = exception_class(prev_nr); @@ -414,121 +425,163 @@ out:  	return changed;  } -void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) +int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)  { +	unsigned long old_cr0 = kvm_read_cr0(vcpu); +	unsigned long update_bits = X86_CR0_PG | X86_CR0_WP | +				    X86_CR0_CD | X86_CR0_NW; +  	cr0 |= X86_CR0_ET;  #ifdef CONFIG_X86_64 -	if (cr0 & 0xffffffff00000000UL) { -		kvm_inject_gp(vcpu, 0); -		return; -	} +	if (cr0 & 0xffffffff00000000UL) +		return 1;  #endif  	cr0 &= ~CR0_RESERVED_BITS; -	if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) { -		kvm_inject_gp(vcpu, 0); -		return; -	} +	if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) +		return 1; -	if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) { -		kvm_inject_gp(vcpu, 0); -		return; -	} +	if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) +		return 1;  	if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {  #ifdef CONFIG_X86_64  		if ((vcpu->arch.efer & EFER_LME)) {  			int cs_db, cs_l; -			if (!is_pae(vcpu)) { -				kvm_inject_gp(vcpu, 0); -				return; -			} +			if (!is_pae(vcpu)) +				return 1;  			kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); -			if (cs_l) { -				kvm_inject_gp(vcpu, 0); -				return; - -			} +			if (cs_l) +				return 1;  		} else  #endif -		if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) { -			kvm_inject_gp(vcpu, 0); -			return; -		} - +		if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) +			return 1;  	}  	kvm_x86_ops->set_cr0(vcpu, cr0); -	kvm_mmu_reset_context(vcpu); -	return; +	if ((cr0 ^ old_cr0) & update_bits) +		kvm_mmu_reset_context(vcpu); +	return 0;  }  EXPORT_SYMBOL_GPL(kvm_set_cr0);  void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)  { -	kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f)); +	(void)kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f));  }  EXPORT_SYMBOL_GPL(kvm_lmsw); -void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) +int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)  { -	unsigned long old_cr4 = kvm_read_cr4(vcpu); -	unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE; +	u64 xcr0; -	if (cr4 & CR4_RESERVED_BITS) { +	/* Only support XCR_XFEATURE_ENABLED_MASK(xcr0) now  */ +	if (index != XCR_XFEATURE_ENABLED_MASK) +		return 1; +	xcr0 = xcr; +	if (kvm_x86_ops->get_cpl(vcpu) != 0) +		return 1; +	if (!(xcr0 & XSTATE_FP)) +		return 1; +	if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE)) +		return 1; +	if (xcr0 & ~host_xcr0) +		return 1; +	vcpu->arch.xcr0 = xcr0; +	vcpu->guest_xcr0_loaded = 0; +	return 0; +} + +int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) +{ +	if (__kvm_set_xcr(vcpu, index, xcr)) {  		kvm_inject_gp(vcpu, 0); +		return 1; +	} +	return 0; +} +EXPORT_SYMBOL_GPL(kvm_set_xcr); + +static bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu) +{ +	struct kvm_cpuid_entry2 *best; + +	best = kvm_find_cpuid_entry(vcpu, 1, 0); +	return best && (best->ecx & bit(X86_FEATURE_XSAVE)); +} + +static void update_cpuid(struct kvm_vcpu *vcpu) +{ +	struct kvm_cpuid_entry2 *best; + +	best = kvm_find_cpuid_entry(vcpu, 1, 0); +	if (!best)  		return; + +	/* Update OSXSAVE bit */ +	if (cpu_has_xsave && best->function == 0x1) { +		best->ecx &= ~(bit(X86_FEATURE_OSXSAVE)); +		if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) +			best->ecx |= bit(X86_FEATURE_OSXSAVE);  	} +} + +int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) +{ +	unsigned long old_cr4 = kvm_read_cr4(vcpu); +	unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE; + +	if (cr4 & CR4_RESERVED_BITS) +		return 1; + +	if (!guest_cpuid_has_xsave(vcpu) && (cr4 & X86_CR4_OSXSAVE)) +		return 1;  	if (is_long_mode(vcpu)) { -		if (!(cr4 & X86_CR4_PAE)) { -			kvm_inject_gp(vcpu, 0); -			return; -		} +		if (!(cr4 & X86_CR4_PAE)) +			return 1;  	} else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)  		   && ((cr4 ^ old_cr4) & pdptr_bits) -		   && !load_pdptrs(vcpu, vcpu->arch.cr3)) { -		kvm_inject_gp(vcpu, 0); -		return; -	} +		   && !load_pdptrs(vcpu, vcpu->arch.cr3)) +		return 1; + +	if (cr4 & X86_CR4_VMXE) +		return 1; -	if (cr4 & X86_CR4_VMXE) { -		kvm_inject_gp(vcpu, 0); -		return; -	}  	kvm_x86_ops->set_cr4(vcpu, cr4); -	vcpu->arch.cr4 = cr4; -	kvm_mmu_reset_context(vcpu); + +	if ((cr4 ^ old_cr4) & pdptr_bits) +		kvm_mmu_reset_context(vcpu); + +	if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE) +		update_cpuid(vcpu); + +	return 0;  }  EXPORT_SYMBOL_GPL(kvm_set_cr4); -void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) +int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)  {  	if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) {  		kvm_mmu_sync_roots(vcpu);  		kvm_mmu_flush_tlb(vcpu); -		return; +		return 0;  	}  	if (is_long_mode(vcpu)) { -		if (cr3 & CR3_L_MODE_RESERVED_BITS) { -			kvm_inject_gp(vcpu, 0); -			return; -		} +		if (cr3 & CR3_L_MODE_RESERVED_BITS) +			return 1;  	} else {  		if (is_pae(vcpu)) { -			if (cr3 & CR3_PAE_RESERVED_BITS) { -				kvm_inject_gp(vcpu, 0); -				return; -			} -			if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) { -				kvm_inject_gp(vcpu, 0); -				return; -			} +			if (cr3 & CR3_PAE_RESERVED_BITS) +				return 1; +			if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) +				return 1;  		}  		/*  		 * We don't check reserved bits in nonpae mode, because @@ -546,24 +599,28 @@ void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)  	 * to debug) behavior on the guest side.  	 */  	if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT))) -		kvm_inject_gp(vcpu, 0); -	else { -		vcpu->arch.cr3 = cr3; -		vcpu->arch.mmu.new_cr3(vcpu); -	} +		return 1; +	vcpu->arch.cr3 = cr3; +	vcpu->arch.mmu.new_cr3(vcpu); +	return 0;  }  EXPORT_SYMBOL_GPL(kvm_set_cr3); -void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) +int __kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)  { -	if (cr8 & CR8_RESERVED_BITS) { -		kvm_inject_gp(vcpu, 0); -		return; -	} +	if (cr8 & CR8_RESERVED_BITS) +		return 1;  	if (irqchip_in_kernel(vcpu->kvm))  		kvm_lapic_set_tpr(vcpu, cr8);  	else  		vcpu->arch.cr8 = cr8; +	return 0; +} + +void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) +{ +	if (__kvm_set_cr8(vcpu, cr8)) +		kvm_inject_gp(vcpu, 0);  }  EXPORT_SYMBOL_GPL(kvm_set_cr8); @@ -576,7 +633,7 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)  }  EXPORT_SYMBOL_GPL(kvm_get_cr8); -int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) +static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)  {  	switch (dr) {  	case 0 ... 3: @@ -585,29 +642,21 @@ int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)  			vcpu->arch.eff_db[dr] = val;  		break;  	case 4: -		if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { -			kvm_queue_exception(vcpu, UD_VECTOR); -			return 1; -		} +		if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) +			return 1; /* #UD */  		/* fall through */  	case 6: -		if (val & 0xffffffff00000000ULL) { -			kvm_inject_gp(vcpu, 0); -			return 1; -		} +		if (val & 0xffffffff00000000ULL) +			return -1; /* #GP */  		vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1;  		break;  	case 5: -		if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { -			kvm_queue_exception(vcpu, UD_VECTOR); -			return 1; -		} +		if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) +			return 1; /* #UD */  		/* fall through */  	default: /* 7 */ -		if (val & 0xffffffff00000000ULL) { -			kvm_inject_gp(vcpu, 0); -			return 1; -		} +		if (val & 0xffffffff00000000ULL) +			return -1; /* #GP */  		vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;  		if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {  			kvm_x86_ops->set_dr7(vcpu, vcpu->arch.dr7); @@ -618,28 +667,37 @@ int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)  	return 0;  } + +int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) +{ +	int res; + +	res = __kvm_set_dr(vcpu, dr, val); +	if (res > 0) +		kvm_queue_exception(vcpu, UD_VECTOR); +	else if (res < 0) +		kvm_inject_gp(vcpu, 0); + +	return res; +}  EXPORT_SYMBOL_GPL(kvm_set_dr); -int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) +static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)  {  	switch (dr) {  	case 0 ... 3:  		*val = vcpu->arch.db[dr];  		break;  	case 4: -		if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { -			kvm_queue_exception(vcpu, UD_VECTOR); +		if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))  			return 1; -		}  		/* fall through */  	case 6:  		*val = vcpu->arch.dr6;  		break;  	case 5: -		if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { -			kvm_queue_exception(vcpu, UD_VECTOR); +		if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))  			return 1; -		}  		/* fall through */  	default: /* 7 */  		*val = vcpu->arch.dr7; @@ -648,12 +706,16 @@ int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)  	return 0;  } -EXPORT_SYMBOL_GPL(kvm_get_dr); -static inline u32 bit(int bitno) +int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)  { -	return 1 << (bitno & 31); +	if (_kvm_get_dr(vcpu, dr, val)) { +		kvm_queue_exception(vcpu, UD_VECTOR); +		return 1; +	} +	return 0;  } +EXPORT_SYMBOL_GPL(kvm_get_dr);  /*   * List of msr numbers which we expose to userspace through KVM_GET_MSRS @@ -671,7 +733,7 @@ static u32 msrs_to_save[] = {  	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,  	HV_X64_MSR_APIC_ASSIST_PAGE,  	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, -	MSR_K6_STAR, +	MSR_STAR,  #ifdef CONFIG_X86_64  	MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,  #endif @@ -682,10 +744,14 @@ static unsigned num_msrs_to_save;  static u32 emulated_msrs[] = {  	MSR_IA32_MISC_ENABLE, +	MSR_IA32_MCG_STATUS, +	MSR_IA32_MCG_CTL,  };  static int set_efer(struct kvm_vcpu *vcpu, u64 efer)  { +	u64 old_efer = vcpu->arch.efer; +  	if (efer & efer_reserved_bits)  		return 1; @@ -714,11 +780,13 @@ static int set_efer(struct kvm_vcpu *vcpu, u64 efer)  	kvm_x86_ops->set_efer(vcpu, efer); -	vcpu->arch.efer = efer; -  	vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled;  	kvm_mmu_reset_context(vcpu); +	/* Update reserved bits */ +	if ((efer ^ old_efer) & EFER_NX) +		kvm_mmu_reset_context(vcpu); +  	return 0;  } @@ -882,7 +950,7 @@ static int kvm_request_guest_time_update(struct kvm_vcpu *v)  	if (!vcpu->time_page)  		return 0; -	set_bit(KVM_REQ_KVMCLOCK_UPDATE, &v->requests); +	kvm_make_request(KVM_REQ_KVMCLOCK_UPDATE, v);  	return 1;  } @@ -1524,16 +1592,12 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,  {  	int i, idx; -	vcpu_load(vcpu); -  	idx = srcu_read_lock(&vcpu->kvm->srcu);  	for (i = 0; i < msrs->nmsrs; ++i)  		if (do_msr(vcpu, entries[i].index, &entries[i].data))  			break;  	srcu_read_unlock(&vcpu->kvm->srcu, idx); -	vcpu_put(vcpu); -  	return i;  } @@ -1618,6 +1682,7 @@ int kvm_dev_ioctl_check_extension(long ext)  	case KVM_CAP_PCI_SEGMENT:  	case KVM_CAP_DEBUGREGS:  	case KVM_CAP_X86_ROBUST_SINGLESTEP: +	case KVM_CAP_XSAVE:  		r = 1;  		break;  	case KVM_CAP_COALESCED_MMIO: @@ -1641,6 +1706,9 @@ int kvm_dev_ioctl_check_extension(long ext)  	case KVM_CAP_MCE:  		r = KVM_MAX_MCE_BANKS;  		break; +	case KVM_CAP_XCRS: +		r = cpu_has_xsave; +		break;  	default:  		r = 0;  		break; @@ -1717,8 +1785,28 @@ out:  	return r;  } +static void wbinvd_ipi(void *garbage) +{ +	wbinvd(); +} + +static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu) +{ +	return vcpu->kvm->arch.iommu_domain && +		!(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY); +} +  void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)  { +	/* Address WBINVD may be executed by guest */ +	if (need_emulate_wbinvd(vcpu)) { +		if (kvm_x86_ops->has_wbinvd_exit()) +			cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask); +		else if (vcpu->cpu != -1 && vcpu->cpu != cpu) +			smp_call_function_single(vcpu->cpu, +					wbinvd_ipi, NULL, 1); +	} +  	kvm_x86_ops->vcpu_load(vcpu, cpu);  	if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0)) {  		unsigned long khz = cpufreq_quick_get(cpu); @@ -1731,8 +1819,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)  void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)  { -	kvm_put_guest_fpu(vcpu);  	kvm_x86_ops->vcpu_put(vcpu); +	kvm_put_guest_fpu(vcpu);  }  static int is_efer_nx(void) @@ -1781,7 +1869,6 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,  	if (copy_from_user(cpuid_entries, entries,  			   cpuid->nent * sizeof(struct kvm_cpuid_entry)))  		goto out_free; -	vcpu_load(vcpu);  	for (i = 0; i < cpuid->nent; i++) {  		vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function;  		vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax; @@ -1799,7 +1886,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,  	r = 0;  	kvm_apic_set_version(vcpu);  	kvm_x86_ops->cpuid_update(vcpu); -	vcpu_put(vcpu); +	update_cpuid(vcpu);  out_free:  	vfree(cpuid_entries); @@ -1820,11 +1907,10 @@ static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,  	if (copy_from_user(&vcpu->arch.cpuid_entries, entries,  			   cpuid->nent * sizeof(struct kvm_cpuid_entry2)))  		goto out; -	vcpu_load(vcpu);  	vcpu->arch.cpuid_nent = cpuid->nent;  	kvm_apic_set_version(vcpu);  	kvm_x86_ops->cpuid_update(vcpu); -	vcpu_put(vcpu); +	update_cpuid(vcpu);  	return 0;  out: @@ -1837,7 +1923,6 @@ static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,  {  	int r; -	vcpu_load(vcpu);  	r = -E2BIG;  	if (cpuid->nent < vcpu->arch.cpuid_nent)  		goto out; @@ -1849,7 +1934,6 @@ static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,  out:  	cpuid->nent = vcpu->arch.cpuid_nent; -	vcpu_put(vcpu);  	return r;  } @@ -1901,13 +1985,13 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,  		0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);  	/* cpuid 1.ecx */  	const u32 kvm_supported_word4_x86_features = -		F(XMM3) | 0 /* Reserved, DTES64, MONITOR */ | +		F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ |  		0 /* DS-CPL, VMX, SMX, EST */ |  		0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |  		0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ |  		0 /* Reserved, DCA */ | F(XMM4_1) |  		F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | -		0 /* Reserved, XSAVE, OSXSAVE */; +		0 /* Reserved, AES */ | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX);  	/* cpuid 0x80000001.ecx */  	const u32 kvm_supported_word6_x86_features =  		F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ | @@ -1922,7 +2006,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,  	switch (function) {  	case 0: -		entry->eax = min(entry->eax, (u32)0xb); +		entry->eax = min(entry->eax, (u32)0xd);  		break;  	case 1:  		entry->edx &= kvm_supported_word0_x86_features; @@ -1980,6 +2064,20 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,  		}  		break;  	} +	case 0xd: { +		int i; + +		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; +		for (i = 1; *nent < maxnent; ++i) { +			if (entry[i - 1].eax == 0 && i != 2) +				break; +			do_cpuid_1_ent(&entry[i], function, i); +			entry[i].flags |= +			       KVM_CPUID_FLAG_SIGNIFCANT_INDEX; +			++*nent; +		} +		break; +	}  	case KVM_CPUID_SIGNATURE: {  		char signature[12] = "KVMKVMKVM\0\0";  		u32 *sigptr = (u32 *)signature; @@ -2081,9 +2179,7 @@ out:  static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,  				    struct kvm_lapic_state *s)  { -	vcpu_load(vcpu);  	memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s); -	vcpu_put(vcpu);  	return 0;  } @@ -2091,11 +2187,9 @@ static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,  static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,  				    struct kvm_lapic_state *s)  { -	vcpu_load(vcpu);  	memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);  	kvm_apic_post_state_restore(vcpu);  	update_cr8_intercept(vcpu); -	vcpu_put(vcpu);  	return 0;  } @@ -2107,20 +2201,15 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,  		return -EINVAL;  	if (irqchip_in_kernel(vcpu->kvm))  		return -ENXIO; -	vcpu_load(vcpu);  	kvm_queue_interrupt(vcpu, irq->irq, false); -	vcpu_put(vcpu); -  	return 0;  }  static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)  { -	vcpu_load(vcpu);  	kvm_inject_nmi(vcpu); -	vcpu_put(vcpu);  	return 0;  } @@ -2140,7 +2229,6 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,  	int r;  	unsigned bank_num = mcg_cap & 0xff, bank; -	vcpu_load(vcpu);  	r = -EINVAL;  	if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS)  		goto out; @@ -2155,7 +2243,6 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,  	for (bank = 0; bank < bank_num; bank++)  		vcpu->arch.mce_banks[bank*4] = ~(u64)0;  out: -	vcpu_put(vcpu);  	return r;  } @@ -2188,7 +2275,7 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,  			printk(KERN_DEBUG "kvm: set_mce: "  			       "injects mce exception while "  			       "previous one is in progress!\n"); -			set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); +			kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);  			return 0;  		}  		if (banks[1] & MCI_STATUS_VAL) @@ -2213,8 +2300,6 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,  static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,  					       struct kvm_vcpu_events *events)  { -	vcpu_load(vcpu); -  	events->exception.injected =  		vcpu->arch.exception.pending &&  		!kvm_exception_is_soft(vcpu->arch.exception.nr); @@ -2239,8 +2324,6 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,  	events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING  			 | KVM_VCPUEVENT_VALID_SIPI_VECTOR  			 | KVM_VCPUEVENT_VALID_SHADOW); - -	vcpu_put(vcpu);  }  static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, @@ -2251,8 +2334,6 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,  			      | KVM_VCPUEVENT_VALID_SHADOW))  		return -EINVAL; -	vcpu_load(vcpu); -  	vcpu->arch.exception.pending = events->exception.injected;  	vcpu->arch.exception.nr = events->exception.nr;  	vcpu->arch.exception.has_error_code = events->exception.has_error_code; @@ -2275,22 +2356,16 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,  	if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR)  		vcpu->arch.sipi_vector = events->sipi_vector; -	vcpu_put(vcpu); -  	return 0;  }  static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,  					     struct kvm_debugregs *dbgregs)  { -	vcpu_load(vcpu); -  	memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));  	dbgregs->dr6 = vcpu->arch.dr6;  	dbgregs->dr7 = vcpu->arch.dr7;  	dbgregs->flags = 0; - -	vcpu_put(vcpu);  }  static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, @@ -2299,40 +2374,113 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,  	if (dbgregs->flags)  		return -EINVAL; -	vcpu_load(vcpu); -  	memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));  	vcpu->arch.dr6 = dbgregs->dr6;  	vcpu->arch.dr7 = dbgregs->dr7; -	vcpu_put(vcpu); +	return 0; +} + +static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, +					 struct kvm_xsave *guest_xsave) +{ +	if (cpu_has_xsave) +		memcpy(guest_xsave->region, +			&vcpu->arch.guest_fpu.state->xsave, +			sizeof(struct xsave_struct)); +	else { +		memcpy(guest_xsave->region, +			&vcpu->arch.guest_fpu.state->fxsave, +			sizeof(struct i387_fxsave_struct)); +		*(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] = +			XSTATE_FPSSE; +	} +} + +static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, +					struct kvm_xsave *guest_xsave) +{ +	u64 xstate_bv = +		*(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)]; +	if (cpu_has_xsave) +		memcpy(&vcpu->arch.guest_fpu.state->xsave, +			guest_xsave->region, sizeof(struct xsave_struct)); +	else { +		if (xstate_bv & ~XSTATE_FPSSE) +			return -EINVAL; +		memcpy(&vcpu->arch.guest_fpu.state->fxsave, +			guest_xsave->region, sizeof(struct i387_fxsave_struct)); +	}  	return 0;  } +static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu, +					struct kvm_xcrs *guest_xcrs) +{ +	if (!cpu_has_xsave) { +		guest_xcrs->nr_xcrs = 0; +		return; +	} + +	guest_xcrs->nr_xcrs = 1; +	guest_xcrs->flags = 0; +	guest_xcrs->xcrs[0].xcr = XCR_XFEATURE_ENABLED_MASK; +	guest_xcrs->xcrs[0].value = vcpu->arch.xcr0; +} + +static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu, +				       struct kvm_xcrs *guest_xcrs) +{ +	int i, r = 0; + +	if (!cpu_has_xsave) +		return -EINVAL; + +	if (guest_xcrs->nr_xcrs > KVM_MAX_XCRS || guest_xcrs->flags) +		return -EINVAL; + +	for (i = 0; i < guest_xcrs->nr_xcrs; i++) +		/* Only support XCR0 currently */ +		if (guest_xcrs->xcrs[0].xcr == XCR_XFEATURE_ENABLED_MASK) { +			r = __kvm_set_xcr(vcpu, XCR_XFEATURE_ENABLED_MASK, +				guest_xcrs->xcrs[0].value); +			break; +		} +	if (r) +		r = -EINVAL; +	return r; +} +  long kvm_arch_vcpu_ioctl(struct file *filp,  			 unsigned int ioctl, unsigned long arg)  {  	struct kvm_vcpu *vcpu = filp->private_data;  	void __user *argp = (void __user *)arg;  	int r; -	struct kvm_lapic_state *lapic = NULL; +	union { +		struct kvm_lapic_state *lapic; +		struct kvm_xsave *xsave; +		struct kvm_xcrs *xcrs; +		void *buffer; +	} u; +	u.buffer = NULL;  	switch (ioctl) {  	case KVM_GET_LAPIC: {  		r = -EINVAL;  		if (!vcpu->arch.apic)  			goto out; -		lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); +		u.lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);  		r = -ENOMEM; -		if (!lapic) +		if (!u.lapic)  			goto out; -		r = kvm_vcpu_ioctl_get_lapic(vcpu, lapic); +		r = kvm_vcpu_ioctl_get_lapic(vcpu, u.lapic);  		if (r)  			goto out;  		r = -EFAULT; -		if (copy_to_user(argp, lapic, sizeof(struct kvm_lapic_state))) +		if (copy_to_user(argp, u.lapic, sizeof(struct kvm_lapic_state)))  			goto out;  		r = 0;  		break; @@ -2341,14 +2489,14 @@ long kvm_arch_vcpu_ioctl(struct file *filp,  		r = -EINVAL;  		if (!vcpu->arch.apic)  			goto out; -		lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); +		u.lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);  		r = -ENOMEM; -		if (!lapic) +		if (!u.lapic)  			goto out;  		r = -EFAULT; -		if (copy_from_user(lapic, argp, sizeof(struct kvm_lapic_state))) +		if (copy_from_user(u.lapic, argp, sizeof(struct kvm_lapic_state)))  			goto out; -		r = kvm_vcpu_ioctl_set_lapic(vcpu, lapic); +		r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic);  		if (r)  			goto out;  		r = 0; @@ -2464,9 +2612,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,  		r = -EFAULT;  		if (copy_from_user(&mce, argp, sizeof mce))  			goto out; -		vcpu_load(vcpu);  		r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce); -		vcpu_put(vcpu);  		break;  	}  	case KVM_GET_VCPU_EVENTS: { @@ -2513,11 +2659,67 @@ long kvm_arch_vcpu_ioctl(struct file *filp,  		r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs);  		break;  	} +	case KVM_GET_XSAVE: { +		u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL); +		r = -ENOMEM; +		if (!u.xsave) +			break; + +		kvm_vcpu_ioctl_x86_get_xsave(vcpu, u.xsave); + +		r = -EFAULT; +		if (copy_to_user(argp, u.xsave, sizeof(struct kvm_xsave))) +			break; +		r = 0; +		break; +	} +	case KVM_SET_XSAVE: { +		u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL); +		r = -ENOMEM; +		if (!u.xsave) +			break; + +		r = -EFAULT; +		if (copy_from_user(u.xsave, argp, sizeof(struct kvm_xsave))) +			break; + +		r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave); +		break; +	} +	case KVM_GET_XCRS: { +		u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL); +		r = -ENOMEM; +		if (!u.xcrs) +			break; + +		kvm_vcpu_ioctl_x86_get_xcrs(vcpu, u.xcrs); + +		r = -EFAULT; +		if (copy_to_user(argp, u.xcrs, +				 sizeof(struct kvm_xcrs))) +			break; +		r = 0; +		break; +	} +	case KVM_SET_XCRS: { +		u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL); +		r = -ENOMEM; +		if (!u.xcrs) +			break; + +		r = -EFAULT; +		if (copy_from_user(u.xcrs, argp, +				   sizeof(struct kvm_xcrs))) +			break; + +		r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs); +		break; +	}  	default:  		r = -EINVAL;  	}  out: -	kfree(lapic); +	kfree(u.buffer);  	return r;  } @@ -2560,115 +2762,6 @@ static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)  	return kvm->arch.n_alloc_mmu_pages;  } -gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn) -{ -	int i; -	struct kvm_mem_alias *alias; -	struct kvm_mem_aliases *aliases; - -	aliases = kvm_aliases(kvm); - -	for (i = 0; i < aliases->naliases; ++i) { -		alias = &aliases->aliases[i]; -		if (alias->flags & KVM_ALIAS_INVALID) -			continue; -		if (gfn >= alias->base_gfn -		    && gfn < alias->base_gfn + alias->npages) -			return alias->target_gfn + gfn - alias->base_gfn; -	} -	return gfn; -} - -gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) -{ -	int i; -	struct kvm_mem_alias *alias; -	struct kvm_mem_aliases *aliases; - -	aliases = kvm_aliases(kvm); - -	for (i = 0; i < aliases->naliases; ++i) { -		alias = &aliases->aliases[i]; -		if (gfn >= alias->base_gfn -		    && gfn < alias->base_gfn + alias->npages) -			return alias->target_gfn + gfn - alias->base_gfn; -	} -	return gfn; -} - -/* - * Set a new alias region.  Aliases map a portion of physical memory into - * another portion.  This is useful for memory windows, for example the PC - * VGA region. - */ -static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, -					 struct kvm_memory_alias *alias) -{ -	int r, n; -	struct kvm_mem_alias *p; -	struct kvm_mem_aliases *aliases, *old_aliases; - -	r = -EINVAL; -	/* General sanity checks */ -	if (alias->memory_size & (PAGE_SIZE - 1)) -		goto out; -	if (alias->guest_phys_addr & (PAGE_SIZE - 1)) -		goto out; -	if (alias->slot >= KVM_ALIAS_SLOTS) -		goto out; -	if (alias->guest_phys_addr + alias->memory_size -	    < alias->guest_phys_addr) -		goto out; -	if (alias->target_phys_addr + alias->memory_size -	    < alias->target_phys_addr) -		goto out; - -	r = -ENOMEM; -	aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL); -	if (!aliases) -		goto out; - -	mutex_lock(&kvm->slots_lock); - -	/* invalidate any gfn reference in case of deletion/shrinking */ -	memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases)); -	aliases->aliases[alias->slot].flags |= KVM_ALIAS_INVALID; -	old_aliases = kvm->arch.aliases; -	rcu_assign_pointer(kvm->arch.aliases, aliases); -	synchronize_srcu_expedited(&kvm->srcu); -	kvm_mmu_zap_all(kvm); -	kfree(old_aliases); - -	r = -ENOMEM; -	aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL); -	if (!aliases) -		goto out_unlock; - -	memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases)); - -	p = &aliases->aliases[alias->slot]; -	p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; -	p->npages = alias->memory_size >> PAGE_SHIFT; -	p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT; -	p->flags &= ~(KVM_ALIAS_INVALID); - -	for (n = KVM_ALIAS_SLOTS; n > 0; --n) -		if (aliases->aliases[n - 1].npages) -			break; -	aliases->naliases = n; - -	old_aliases = kvm->arch.aliases; -	rcu_assign_pointer(kvm->arch.aliases, aliases); -	synchronize_srcu_expedited(&kvm->srcu); -	kfree(old_aliases); -	r = 0; - -out_unlock: -	mutex_unlock(&kvm->slots_lock); -out: -	return r; -} -  static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)  {  	int r; @@ -2797,7 +2890,6 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,  	struct kvm_memory_slot *memslot;  	unsigned long n;  	unsigned long is_dirty = 0; -	unsigned long *dirty_bitmap = NULL;  	mutex_lock(&kvm->slots_lock); @@ -2812,27 +2904,30 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,  	n = kvm_dirty_bitmap_bytes(memslot); -	r = -ENOMEM; -	dirty_bitmap = vmalloc(n); -	if (!dirty_bitmap) -		goto out; -	memset(dirty_bitmap, 0, n); -  	for (i = 0; !is_dirty && i < n/sizeof(long); i++)  		is_dirty = memslot->dirty_bitmap[i];  	/* If nothing is dirty, don't bother messing with page tables. */  	if (is_dirty) {  		struct kvm_memslots *slots, *old_slots; +		unsigned long *dirty_bitmap;  		spin_lock(&kvm->mmu_lock);  		kvm_mmu_slot_remove_write_access(kvm, log->slot);  		spin_unlock(&kvm->mmu_lock); -		slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); -		if (!slots) -			goto out_free; +		r = -ENOMEM; +		dirty_bitmap = vmalloc(n); +		if (!dirty_bitmap) +			goto out; +		memset(dirty_bitmap, 0, n); +		r = -ENOMEM; +		slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); +		if (!slots) { +			vfree(dirty_bitmap); +			goto out; +		}  		memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));  		slots->memslots[log->slot].dirty_bitmap = dirty_bitmap; @@ -2841,13 +2936,20 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,  		synchronize_srcu_expedited(&kvm->srcu);  		dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap;  		kfree(old_slots); + +		r = -EFAULT; +		if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) { +			vfree(dirty_bitmap); +			goto out; +		} +		vfree(dirty_bitmap); +	} else { +		r = -EFAULT; +		if (clear_user(log->dirty_bitmap, n)) +			goto out;  	}  	r = 0; -	if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) -		r = -EFAULT; -out_free: -	vfree(dirty_bitmap);  out:  	mutex_unlock(&kvm->slots_lock);  	return r; @@ -2867,7 +2969,6 @@ long kvm_arch_vm_ioctl(struct file *filp,  	union {  		struct kvm_pit_state ps;  		struct kvm_pit_state2 ps2; -		struct kvm_memory_alias alias;  		struct kvm_pit_config pit_config;  	} u; @@ -2888,22 +2989,6 @@ long kvm_arch_vm_ioctl(struct file *filp,  			goto out;  		break;  	} -	case KVM_SET_MEMORY_REGION: { -		struct kvm_memory_region kvm_mem; -		struct kvm_userspace_memory_region kvm_userspace_mem; - -		r = -EFAULT; -		if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem)) -			goto out; -		kvm_userspace_mem.slot = kvm_mem.slot; -		kvm_userspace_mem.flags = kvm_mem.flags; -		kvm_userspace_mem.guest_phys_addr = kvm_mem.guest_phys_addr; -		kvm_userspace_mem.memory_size = kvm_mem.memory_size; -		r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 0); -		if (r) -			goto out; -		break; -	}  	case KVM_SET_NR_MMU_PAGES:  		r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);  		if (r) @@ -2912,14 +2997,6 @@ long kvm_arch_vm_ioctl(struct file *filp,  	case KVM_GET_NR_MMU_PAGES:  		r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);  		break; -	case KVM_SET_MEMORY_ALIAS: -		r = -EFAULT; -		if (copy_from_user(&u.alias, argp, sizeof(struct kvm_memory_alias))) -			goto out; -		r = kvm_vm_ioctl_set_memory_alias(kvm, &u.alias); -		if (r) -			goto out; -		break;  	case KVM_CREATE_IRQCHIP: {  		struct kvm_pic *vpic; @@ -3259,7 +3336,7 @@ static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,  		}  		ret = kvm_read_guest(vcpu->kvm, gpa, data, toread);  		if (ret < 0) { -			r = X86EMUL_UNHANDLEABLE; +			r = X86EMUL_IO_NEEDED;  			goto out;  		} @@ -3315,7 +3392,7 @@ static int kvm_write_guest_virt_system(gva_t addr, void *val,  		}  		ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite);  		if (ret < 0) { -			r = X86EMUL_UNHANDLEABLE; +			r = X86EMUL_IO_NEEDED;  			goto out;  		} @@ -3330,10 +3407,10 @@ out:  static int emulator_read_emulated(unsigned long addr,  				  void *val,  				  unsigned int bytes, +				  unsigned int *error_code,  				  struct kvm_vcpu *vcpu)  {  	gpa_t                 gpa; -	u32 error_code;  	if (vcpu->mmio_read_completed) {  		memcpy(val, vcpu->mmio_data, bytes); @@ -3343,12 +3420,10 @@ static int emulator_read_emulated(unsigned long addr,  		return X86EMUL_CONTINUE;  	} -	gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, &error_code); +	gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, error_code); -	if (gpa == UNMAPPED_GVA) { -		kvm_inject_page_fault(vcpu, addr, error_code); +	if (gpa == UNMAPPED_GVA)  		return X86EMUL_PROPAGATE_FAULT; -	}  	/* For APIC access vmexit */  	if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) @@ -3370,11 +3445,12 @@ mmio:  	trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);  	vcpu->mmio_needed = 1; -	vcpu->mmio_phys_addr = gpa; -	vcpu->mmio_size = bytes; -	vcpu->mmio_is_write = 0; +	vcpu->run->exit_reason = KVM_EXIT_MMIO; +	vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; +	vcpu->run->mmio.len = vcpu->mmio_size = bytes; +	vcpu->run->mmio.is_write = vcpu->mmio_is_write = 0; -	return X86EMUL_UNHANDLEABLE; +	return X86EMUL_IO_NEEDED;  }  int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, @@ -3392,17 +3468,15 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,  static int emulator_write_emulated_onepage(unsigned long addr,  					   const void *val,  					   unsigned int bytes, +					   unsigned int *error_code,  					   struct kvm_vcpu *vcpu)  {  	gpa_t                 gpa; -	u32 error_code; -	gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, &error_code); +	gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, error_code); -	if (gpa == UNMAPPED_GVA) { -		kvm_inject_page_fault(vcpu, addr, error_code); +	if (gpa == UNMAPPED_GVA)  		return X86EMUL_PROPAGATE_FAULT; -	}  	/* For APIC access vmexit */  	if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) @@ -3420,10 +3494,11 @@ mmio:  		return X86EMUL_CONTINUE;  	vcpu->mmio_needed = 1; -	vcpu->mmio_phys_addr = gpa; -	vcpu->mmio_size = bytes; -	vcpu->mmio_is_write = 1; -	memcpy(vcpu->mmio_data, val, bytes); +	vcpu->run->exit_reason = KVM_EXIT_MMIO; +	vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; +	vcpu->run->mmio.len = vcpu->mmio_size = bytes; +	vcpu->run->mmio.is_write = vcpu->mmio_is_write = 1; +	memcpy(vcpu->run->mmio.data, val, bytes);  	return X86EMUL_CONTINUE;  } @@ -3431,6 +3506,7 @@ mmio:  int emulator_write_emulated(unsigned long addr,  			    const void *val,  			    unsigned int bytes, +			    unsigned int *error_code,  			    struct kvm_vcpu *vcpu)  {  	/* Crossing a page boundary? */ @@ -3438,16 +3514,17 @@ int emulator_write_emulated(unsigned long addr,  		int rc, now;  		now = -addr & ~PAGE_MASK; -		rc = emulator_write_emulated_onepage(addr, val, now, vcpu); +		rc = emulator_write_emulated_onepage(addr, val, now, error_code, +						     vcpu);  		if (rc != X86EMUL_CONTINUE)  			return rc;  		addr += now;  		val += now;  		bytes -= now;  	} -	return emulator_write_emulated_onepage(addr, val, bytes, vcpu); +	return emulator_write_emulated_onepage(addr, val, bytes, error_code, +					       vcpu);  } -EXPORT_SYMBOL_GPL(emulator_write_emulated);  #define CMPXCHG_TYPE(t, ptr, old, new) \  	(cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old)) @@ -3463,6 +3540,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr,  				     const void *old,  				     const void *new,  				     unsigned int bytes, +				     unsigned int *error_code,  				     struct kvm_vcpu *vcpu)  {  	gpa_t gpa; @@ -3484,6 +3562,10 @@ static int emulator_cmpxchg_emulated(unsigned long addr,  		goto emul_write;  	page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); +	if (is_error_page(page)) { +		kvm_release_page_clean(page); +		goto emul_write; +	}  	kaddr = kmap_atomic(page, KM_USER0);  	kaddr += offset_in_page(gpa); @@ -3516,7 +3598,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr,  emul_write:  	printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); -	return emulator_write_emulated(addr, new, bytes, vcpu); +	return emulator_write_emulated(addr, new, bytes, error_code, vcpu);  }  static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) @@ -3604,42 +3686,38 @@ int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)  	return X86EMUL_CONTINUE;  } -int emulate_clts(struct kvm_vcpu *vcpu) +int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)  { -	kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); -	kvm_x86_ops->fpu_activate(vcpu); +	if (!need_emulate_wbinvd(vcpu)) +		return X86EMUL_CONTINUE; + +	if (kvm_x86_ops->has_wbinvd_exit()) { +		smp_call_function_many(vcpu->arch.wbinvd_dirty_mask, +				wbinvd_ipi, NULL, 1); +		cpumask_clear(vcpu->arch.wbinvd_dirty_mask); +	} +	wbinvd();  	return X86EMUL_CONTINUE;  } +EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd); -int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) +int emulate_clts(struct kvm_vcpu *vcpu)  { -	return kvm_get_dr(ctxt->vcpu, dr, dest); +	kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); +	kvm_x86_ops->fpu_activate(vcpu); +	return X86EMUL_CONTINUE;  } -int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) +int emulator_get_dr(int dr, unsigned long *dest, struct kvm_vcpu *vcpu)  { -	unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U; - -	return kvm_set_dr(ctxt->vcpu, dr, value & mask); +	return _kvm_get_dr(vcpu, dr, dest);  } -void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) +int emulator_set_dr(int dr, unsigned long value, struct kvm_vcpu *vcpu)  { -	u8 opcodes[4]; -	unsigned long rip = kvm_rip_read(vcpu); -	unsigned long rip_linear; - -	if (!printk_ratelimit()) -		return; -	rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS); - -	kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu, NULL); - -	printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n", -	       context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]); +	return __kvm_set_dr(vcpu, dr, value);  } -EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);  static u64 mk_cr_64(u64 curr_cr, u32 new_val)  { @@ -3674,27 +3752,32 @@ static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu)  	return value;  } -static void emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu) +static int emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu)  { +	int res = 0; +  	switch (cr) {  	case 0: -		kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val)); +		res = kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val));  		break;  	case 2:  		vcpu->arch.cr2 = val;  		break;  	case 3: -		kvm_set_cr3(vcpu, val); +		res = kvm_set_cr3(vcpu, val);  		break;  	case 4: -		kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val)); +		res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));  		break;  	case 8: -		kvm_set_cr8(vcpu, val & 0xfUL); +		res = __kvm_set_cr8(vcpu, val & 0xfUL);  		break;  	default:  		vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); +		res = -1;  	} + +	return res;  }  static int emulator_get_cpl(struct kvm_vcpu *vcpu) @@ -3707,6 +3790,12 @@ static void emulator_get_gdt(struct desc_ptr *dt, struct kvm_vcpu *vcpu)  	kvm_x86_ops->get_gdt(vcpu, dt);  } +static unsigned long emulator_get_cached_segment_base(int seg, +						      struct kvm_vcpu *vcpu) +{ +	return get_segment_base(vcpu, seg); +} +  static bool emulator_get_cached_descriptor(struct desc_struct *desc, int seg,  					   struct kvm_vcpu *vcpu)  { @@ -3779,11 +3868,6 @@ static void emulator_set_segment_selector(u16 sel, int seg,  	kvm_set_segment(vcpu, &kvm_seg, seg);  } -static void emulator_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) -{ -	kvm_x86_ops->set_rflags(vcpu, rflags); -} -  static struct x86_emulate_ops emulate_ops = {  	.read_std            = kvm_read_guest_virt_system,  	.write_std           = kvm_write_guest_virt_system, @@ -3797,11 +3881,15 @@ static struct x86_emulate_ops emulate_ops = {  	.set_cached_descriptor = emulator_set_cached_descriptor,  	.get_segment_selector = emulator_get_segment_selector,  	.set_segment_selector = emulator_set_segment_selector, +	.get_cached_segment_base = emulator_get_cached_segment_base,  	.get_gdt             = emulator_get_gdt,  	.get_cr              = emulator_get_cr,  	.set_cr              = emulator_set_cr,  	.cpl                 = emulator_get_cpl, -	.set_rflags          = emulator_set_rflags, +	.get_dr              = emulator_get_dr, +	.set_dr              = emulator_set_dr, +	.set_msr             = kvm_set_msr, +	.get_msr             = kvm_get_msr,  };  static void cache_all_regs(struct kvm_vcpu *vcpu) @@ -3812,14 +3900,75 @@ static void cache_all_regs(struct kvm_vcpu *vcpu)  	vcpu->arch.regs_dirty = ~0;  } +static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask) +{ +	u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu, mask); +	/* +	 * an sti; sti; sequence only disable interrupts for the first +	 * instruction. So, if the last instruction, be it emulated or +	 * not, left the system with the INT_STI flag enabled, it +	 * means that the last instruction is an sti. We should not +	 * leave the flag on in this case. The same goes for mov ss +	 */ +	if (!(int_shadow & mask)) +		kvm_x86_ops->set_interrupt_shadow(vcpu, mask); +} + +static void inject_emulated_exception(struct kvm_vcpu *vcpu) +{ +	struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; +	if (ctxt->exception == PF_VECTOR) +		kvm_inject_page_fault(vcpu, ctxt->cr2, ctxt->error_code); +	else if (ctxt->error_code_valid) +		kvm_queue_exception_e(vcpu, ctxt->exception, ctxt->error_code); +	else +		kvm_queue_exception(vcpu, ctxt->exception); +} + +static int handle_emulation_failure(struct kvm_vcpu *vcpu) +{ +	++vcpu->stat.insn_emulation_fail; +	trace_kvm_emulate_insn_failed(vcpu); +	vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; +	vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; +	vcpu->run->internal.ndata = 0; +	kvm_queue_exception(vcpu, UD_VECTOR); +	return EMULATE_FAIL; +} + +static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) +{ +	gpa_t gpa; + +	if (tdp_enabled) +		return false; + +	/* +	 * if emulation was due to access to shadowed page table +	 * and it failed try to unshadow page and re-entetr the +	 * guest to let CPU execute the instruction. +	 */ +	if (kvm_mmu_unprotect_page_virt(vcpu, gva)) +		return true; + +	gpa = kvm_mmu_gva_to_gpa_system(vcpu, gva, NULL); + +	if (gpa == UNMAPPED_GVA) +		return true; /* let cpu generate fault */ + +	if (!kvm_is_error_hva(gfn_to_hva(vcpu->kvm, gpa >> PAGE_SHIFT))) +		return true; + +	return false; +} +  int emulate_instruction(struct kvm_vcpu *vcpu,  			unsigned long cr2,  			u16 error_code,  			int emulation_type)  { -	int r, shadow_mask; -	struct decode_cache *c; -	struct kvm_run *run = vcpu->run; +	int r; +	struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;  	kvm_clear_exception_queue(vcpu);  	vcpu->arch.mmio_fault_cr2 = cr2; @@ -3831,8 +3980,6 @@ int emulate_instruction(struct kvm_vcpu *vcpu,  	 */  	cache_all_regs(vcpu); -	vcpu->mmio_is_write = 0; -  	if (!(emulation_type & EMULTYPE_NO_DECODE)) {  		int cs_db, cs_l;  		kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); @@ -3846,13 +3993,16 @@ int emulate_instruction(struct kvm_vcpu *vcpu,  			? X86EMUL_MODE_VM86 : cs_l  			? X86EMUL_MODE_PROT64 :	cs_db  			? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; +		memset(c, 0, sizeof(struct decode_cache)); +		memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); +		vcpu->arch.emulate_ctxt.interruptibility = 0; +		vcpu->arch.emulate_ctxt.exception = -1;  		r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);  		trace_kvm_emulate_insn_start(vcpu);  		/* Only allow emulation of specific instructions on #UD  		 * (namely VMMCALL, sysenter, sysexit, syscall)*/ -		c = &vcpu->arch.emulate_ctxt.decode;  		if (emulation_type & EMULTYPE_TRAP_UD) {  			if (!c->twobyte)  				return EMULATE_FAIL; @@ -3880,11 +4030,11 @@ int emulate_instruction(struct kvm_vcpu *vcpu,  		++vcpu->stat.insn_emulation;  		if (r)  { -			++vcpu->stat.insn_emulation_fail; -			trace_kvm_emulate_insn_failed(vcpu); -			if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) +			if (reexecute_instruction(vcpu, cr2))  				return EMULATE_DONE; -			return EMULATE_FAIL; +			if (emulation_type & EMULTYPE_SKIP) +				return EMULATE_FAIL; +			return handle_emulation_failure(vcpu);  		}  	} @@ -3893,48 +4043,42 @@ int emulate_instruction(struct kvm_vcpu *vcpu,  		return EMULATE_DONE;  	} +	/* this is needed for vmware backdor interface to work since it +	   changes registers values  during IO operation */ +	memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); +  restart:  	r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); -	shadow_mask = vcpu->arch.emulate_ctxt.interruptibility; -	if (r == 0) -		kvm_x86_ops->set_interrupt_shadow(vcpu, shadow_mask); +	if (r) { /* emulation failed */ +		if (reexecute_instruction(vcpu, cr2)) +			return EMULATE_DONE; -	if (vcpu->arch.pio.count) { -		if (!vcpu->arch.pio.in) -			vcpu->arch.pio.count = 0; -		return EMULATE_DO_MMIO; +		return handle_emulation_failure(vcpu);  	} -	if (r || vcpu->mmio_is_write) { -		run->exit_reason = KVM_EXIT_MMIO; -		run->mmio.phys_addr = vcpu->mmio_phys_addr; -		memcpy(run->mmio.data, vcpu->mmio_data, 8); -		run->mmio.len = vcpu->mmio_size; -		run->mmio.is_write = vcpu->mmio_is_write; +	toggle_interruptibility(vcpu, vcpu->arch.emulate_ctxt.interruptibility); +	kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); +	memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); +	kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); + +	if (vcpu->arch.emulate_ctxt.exception >= 0) { +		inject_emulated_exception(vcpu); +		return EMULATE_DONE;  	} -	if (r) { -		if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) -			goto done; -		if (!vcpu->mmio_needed) { -			++vcpu->stat.insn_emulation_fail; -			trace_kvm_emulate_insn_failed(vcpu); -			kvm_report_emulation_failure(vcpu, "mmio"); -			return EMULATE_FAIL; -		} +	if (vcpu->arch.pio.count) { +		if (!vcpu->arch.pio.in) +			vcpu->arch.pio.count = 0;  		return EMULATE_DO_MMIO;  	} -	if (vcpu->mmio_is_write) { -		vcpu->mmio_needed = 0; +	if (vcpu->mmio_needed) { +		if (vcpu->mmio_is_write) +			vcpu->mmio_needed = 0;  		return EMULATE_DO_MMIO;  	} -done: -	if (vcpu->arch.exception.pending) -		vcpu->arch.emulate_ctxt.restart = false; -  	if (vcpu->arch.emulate_ctxt.restart)  		goto restart; @@ -4108,6 +4252,9 @@ int kvm_arch_init(void *opaque)  	perf_register_guest_info_callbacks(&kvm_guest_cbs); +	if (cpu_has_xsave) +		host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); +  	return 0;  out: @@ -4270,7 +4417,7 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu)  	kvm_x86_ops->patch_hypercall(vcpu, instruction); -	return emulator_write_emulated(rip, instruction, 3, vcpu); +	return emulator_write_emulated(rip, instruction, 3, NULL, vcpu);  }  void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) @@ -4506,59 +4653,78 @@ static void inject_pending_event(struct kvm_vcpu *vcpu)  	}  } +static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu) +{ +	if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) && +			!vcpu->guest_xcr0_loaded) { +		/* kvm_set_xcr() also depends on this */ +		xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0); +		vcpu->guest_xcr0_loaded = 1; +	} +} + +static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu) +{ +	if (vcpu->guest_xcr0_loaded) { +		if (vcpu->arch.xcr0 != host_xcr0) +			xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0); +		vcpu->guest_xcr0_loaded = 0; +	} +} +  static int vcpu_enter_guest(struct kvm_vcpu *vcpu)  {  	int r;  	bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&  		vcpu->run->request_interrupt_window; -	if (vcpu->requests) -		if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) -			kvm_mmu_unload(vcpu); - -	r = kvm_mmu_reload(vcpu); -	if (unlikely(r)) -		goto out; -  	if (vcpu->requests) { -		if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests)) +		if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) +			kvm_mmu_unload(vcpu); +		if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))  			__kvm_migrate_timers(vcpu); -		if (test_and_clear_bit(KVM_REQ_KVMCLOCK_UPDATE, &vcpu->requests)) +		if (kvm_check_request(KVM_REQ_KVMCLOCK_UPDATE, vcpu))  			kvm_write_guest_time(vcpu); -		if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests)) +		if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))  			kvm_mmu_sync_roots(vcpu); -		if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) +		if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))  			kvm_x86_ops->tlb_flush(vcpu); -		if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS, -				       &vcpu->requests)) { +		if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {  			vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;  			r = 0;  			goto out;  		} -		if (test_and_clear_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests)) { +		if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {  			vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;  			r = 0;  			goto out;  		} -		if (test_and_clear_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests)) { +		if (kvm_check_request(KVM_REQ_DEACTIVATE_FPU, vcpu)) {  			vcpu->fpu_active = 0;  			kvm_x86_ops->fpu_deactivate(vcpu);  		}  	} +	r = kvm_mmu_reload(vcpu); +	if (unlikely(r)) +		goto out; +  	preempt_disable();  	kvm_x86_ops->prepare_guest_switch(vcpu);  	if (vcpu->fpu_active)  		kvm_load_guest_fpu(vcpu); +	kvm_load_guest_xcr0(vcpu); -	local_irq_disable(); +	atomic_set(&vcpu->guest_mode, 1); +	smp_wmb(); -	clear_bit(KVM_REQ_KICK, &vcpu->requests); -	smp_mb__after_clear_bit(); +	local_irq_disable(); -	if (vcpu->requests || need_resched() || signal_pending(current)) { -		set_bit(KVM_REQ_KICK, &vcpu->requests); +	if (!atomic_read(&vcpu->guest_mode) || vcpu->requests +	    || need_resched() || signal_pending(current)) { +		atomic_set(&vcpu->guest_mode, 0); +		smp_wmb();  		local_irq_enable();  		preempt_enable();  		r = 1; @@ -4603,7 +4769,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)  	if (hw_breakpoint_active())  		hw_breakpoint_restore(); -	set_bit(KVM_REQ_KICK, &vcpu->requests); +	atomic_set(&vcpu->guest_mode, 0); +	smp_wmb();  	local_irq_enable();  	++vcpu->stat.exits; @@ -4665,7 +4832,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)  			srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);  			kvm_vcpu_block(vcpu);  			vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); -			if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests)) +			if (kvm_check_request(KVM_REQ_UNHALT, vcpu))  			{  				switch(vcpu->arch.mp_state) {  				case KVM_MP_STATE_HALTED: @@ -4717,8 +4884,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)  	int r;  	sigset_t sigsaved; -	vcpu_load(vcpu); -  	if (vcpu->sigset_active)  		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); @@ -4743,7 +4908,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)  		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);  		r = emulate_instruction(vcpu, 0, 0, EMULTYPE_NO_DECODE);  		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); -		if (r == EMULATE_DO_MMIO) { +		if (r != EMULATE_DONE) {  			r = 0;  			goto out;  		} @@ -4759,14 +4924,11 @@ out:  	if (vcpu->sigset_active)  		sigprocmask(SIG_SETMASK, &sigsaved, NULL); -	vcpu_put(vcpu);  	return r;  }  int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)  { -	vcpu_load(vcpu); -  	regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX);  	regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX);  	regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX); @@ -4789,15 +4951,11 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)  	regs->rip = kvm_rip_read(vcpu);  	regs->rflags = kvm_get_rflags(vcpu); -	vcpu_put(vcpu); -  	return 0;  }  int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)  { -	vcpu_load(vcpu); -  	kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax);  	kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx);  	kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx); @@ -4822,8 +4980,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)  	vcpu->arch.exception.pending = false; -	vcpu_put(vcpu); -  	return 0;  } @@ -4842,8 +4998,6 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,  {  	struct desc_ptr dt; -	vcpu_load(vcpu); -  	kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);  	kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);  	kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES); @@ -4875,32 +5029,27 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,  		set_bit(vcpu->arch.interrupt.nr,  			(unsigned long *)sregs->interrupt_bitmap); -	vcpu_put(vcpu); -  	return 0;  }  int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,  				    struct kvm_mp_state *mp_state)  { -	vcpu_load(vcpu);  	mp_state->mp_state = vcpu->arch.mp_state; -	vcpu_put(vcpu);  	return 0;  }  int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,  				    struct kvm_mp_state *mp_state)  { -	vcpu_load(vcpu);  	vcpu->arch.mp_state = mp_state->mp_state; -	vcpu_put(vcpu);  	return 0;  }  int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason,  		    bool has_error_code, u32 error_code)  { +	struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;  	int cs_db, cs_l, ret;  	cache_all_regs(vcpu); @@ -4915,6 +5064,8 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason,  		? X86EMUL_MODE_VM86 : cs_l  		? X86EMUL_MODE_PROT64 :	cs_db  		? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; +	memset(c, 0, sizeof(struct decode_cache)); +	memcpy(c->regs, vcpu->arch.regs, sizeof c->regs);  	ret = emulator_task_switch(&vcpu->arch.emulate_ctxt, &emulate_ops,  				   tss_selector, reason, has_error_code, @@ -4923,6 +5074,8 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason,  	if (ret)  		return EMULATE_FAIL; +	memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); +	kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip);  	kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);  	return EMULATE_DONE;  } @@ -4935,8 +5088,6 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,  	int pending_vec, max_bits;  	struct desc_ptr dt; -	vcpu_load(vcpu); -  	dt.size = sregs->idt.limit;  	dt.address = sregs->idt.base;  	kvm_x86_ops->set_idt(vcpu, &dt); @@ -4996,8 +5147,6 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,  	    !is_protmode(vcpu))  		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; -	vcpu_put(vcpu); -  	return 0;  } @@ -5007,12 +5156,10 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,  	unsigned long rflags;  	int i, r; -	vcpu_load(vcpu); -  	if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {  		r = -EBUSY;  		if (vcpu->arch.exception.pending) -			goto unlock_out; +			goto out;  		if (dbg->control & KVM_GUESTDBG_INJECT_DB)  			kvm_queue_exception(vcpu, DB_VECTOR);  		else @@ -5054,34 +5201,12 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,  	r = 0; -unlock_out: -	vcpu_put(vcpu); +out:  	return r;  }  /* - * fxsave fpu state.  Taken from x86_64/processor.h.  To be killed when - * we have asm/x86/processor.h - */ -struct fxsave { -	u16	cwd; -	u16	swd; -	u16	twd; -	u16	fop; -	u64	rip; -	u64	rdp; -	u32	mxcsr; -	u32	mxcsr_mask; -	u32	st_space[32];	/* 8*16 bytes for each FP-reg = 128 bytes */ -#ifdef CONFIG_X86_64 -	u32	xmm_space[64];	/* 16*16 bytes for each XMM-reg = 256 bytes */ -#else -	u32	xmm_space[32];	/* 8*16 bytes for each XMM-reg = 128 bytes */ -#endif -}; - -/*   * Translate a guest virtual address to a guest physical address.   */  int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, @@ -5091,7 +5216,6 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,  	gpa_t gpa;  	int idx; -	vcpu_load(vcpu);  	idx = srcu_read_lock(&vcpu->kvm->srcu);  	gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);  	srcu_read_unlock(&vcpu->kvm->srcu, idx); @@ -5099,16 +5223,14 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,  	tr->valid = gpa != UNMAPPED_GVA;  	tr->writeable = 1;  	tr->usermode = 0; -	vcpu_put(vcpu);  	return 0;  }  int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)  { -	struct fxsave *fxsave = (struct fxsave *)&vcpu->arch.guest_fx_image; - -	vcpu_load(vcpu); +	struct i387_fxsave_struct *fxsave = +			&vcpu->arch.guest_fpu.state->fxsave;  	memcpy(fpu->fpr, fxsave->st_space, 128);  	fpu->fcw = fxsave->cwd; @@ -5119,16 +5241,13 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)  	fpu->last_dp = fxsave->rdp;  	memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space); -	vcpu_put(vcpu); -  	return 0;  }  int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)  { -	struct fxsave *fxsave = (struct fxsave *)&vcpu->arch.guest_fx_image; - -	vcpu_load(vcpu); +	struct i387_fxsave_struct *fxsave = +			&vcpu->arch.guest_fpu.state->fxsave;  	memcpy(fxsave->st_space, fpu->fpr, 128);  	fxsave->cwd = fpu->fcw; @@ -5139,61 +5258,63 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)  	fxsave->rdp = fpu->last_dp;  	memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space); -	vcpu_put(vcpu); -  	return 0;  } -void fx_init(struct kvm_vcpu *vcpu) +int fx_init(struct kvm_vcpu *vcpu)  { -	unsigned after_mxcsr_mask; +	int err; + +	err = fpu_alloc(&vcpu->arch.guest_fpu); +	if (err) +		return err; + +	fpu_finit(&vcpu->arch.guest_fpu);  	/* -	 * Touch the fpu the first time in non atomic context as if -	 * this is the first fpu instruction the exception handler -	 * will fire before the instruction returns and it'll have to -	 * allocate ram with GFP_KERNEL. +	 * Ensure guest xcr0 is valid for loading  	 */ -	if (!used_math()) -		kvm_fx_save(&vcpu->arch.host_fx_image); - -	/* Initialize guest FPU by resetting ours and saving into guest's */ -	preempt_disable(); -	kvm_fx_save(&vcpu->arch.host_fx_image); -	kvm_fx_finit(); -	kvm_fx_save(&vcpu->arch.guest_fx_image); -	kvm_fx_restore(&vcpu->arch.host_fx_image); -	preempt_enable(); +	vcpu->arch.xcr0 = XSTATE_FP;  	vcpu->arch.cr0 |= X86_CR0_ET; -	after_mxcsr_mask = offsetof(struct i387_fxsave_struct, st_space); -	vcpu->arch.guest_fx_image.mxcsr = 0x1f80; -	memset((void *)&vcpu->arch.guest_fx_image + after_mxcsr_mask, -	       0, sizeof(struct i387_fxsave_struct) - after_mxcsr_mask); + +	return 0;  }  EXPORT_SYMBOL_GPL(fx_init); +static void fx_free(struct kvm_vcpu *vcpu) +{ +	fpu_free(&vcpu->arch.guest_fpu); +} +  void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)  {  	if (vcpu->guest_fpu_loaded)  		return; +	/* +	 * Restore all possible states in the guest, +	 * and assume host would use all available bits. +	 * Guest xcr0 would be loaded later. +	 */ +	kvm_put_guest_xcr0(vcpu);  	vcpu->guest_fpu_loaded = 1; -	kvm_fx_save(&vcpu->arch.host_fx_image); -	kvm_fx_restore(&vcpu->arch.guest_fx_image); +	unlazy_fpu(current); +	fpu_restore_checking(&vcpu->arch.guest_fpu);  	trace_kvm_fpu(1);  }  void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)  { +	kvm_put_guest_xcr0(vcpu); +  	if (!vcpu->guest_fpu_loaded)  		return;  	vcpu->guest_fpu_loaded = 0; -	kvm_fx_save(&vcpu->arch.guest_fx_image); -	kvm_fx_restore(&vcpu->arch.host_fx_image); +	fpu_save_init(&vcpu->arch.guest_fpu);  	++vcpu->stat.fpu_reload; -	set_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests); +	kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);  	trace_kvm_fpu(0);  } @@ -5204,6 +5325,8 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)  		vcpu->arch.time_page = NULL;  	} +	free_cpumask_var(vcpu->arch.wbinvd_dirty_mask); +	fx_free(vcpu);  	kvm_x86_ops->vcpu_free(vcpu);  } @@ -5217,9 +5340,6 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)  {  	int r; -	/* We do fxsave: this must be aligned. */ -	BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF); -  	vcpu->arch.mtrr_state.have_fixed = 1;  	vcpu_load(vcpu);  	r = kvm_arch_vcpu_reset(vcpu); @@ -5241,6 +5361,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)  	kvm_mmu_unload(vcpu);  	vcpu_put(vcpu); +	fx_free(vcpu);  	kvm_x86_ops->vcpu_free(vcpu);  } @@ -5334,7 +5455,12 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)  	}  	vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; +	if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) +		goto fail_free_mce_banks; +  	return 0; +fail_free_mce_banks: +	kfree(vcpu->arch.mce_banks);  fail_free_lapic:  	kvm_free_lapic(vcpu);  fail_mmu_destroy: @@ -5364,12 +5490,6 @@ struct  kvm *kvm_arch_create_vm(void)  	if (!kvm)  		return ERR_PTR(-ENOMEM); -	kvm->arch.aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL); -	if (!kvm->arch.aliases) { -		kfree(kvm); -		return ERR_PTR(-ENOMEM); -	} -  	INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);  	INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); @@ -5412,12 +5532,12 @@ static void kvm_free_vcpus(struct kvm *kvm)  void kvm_arch_sync_events(struct kvm *kvm)  {  	kvm_free_all_assigned_devices(kvm); +	kvm_free_pit(kvm);  }  void kvm_arch_destroy_vm(struct kvm *kvm)  {  	kvm_iommu_unmap_guest(kvm); -	kvm_free_pit(kvm);  	kfree(kvm->arch.vpic);  	kfree(kvm->arch.vioapic);  	kvm_free_vcpus(kvm); @@ -5427,7 +5547,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)  	if (kvm->arch.ept_identity_pagetable)  		put_page(kvm->arch.ept_identity_pagetable);  	cleanup_srcu_struct(&kvm->srcu); -	kfree(kvm->arch.aliases);  	kfree(kvm);  } @@ -5438,6 +5557,11 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,  				int user_alloc)  {  	int npages = memslot->npages; +	int map_flags = MAP_PRIVATE | MAP_ANONYMOUS; + +	/* Prevent internal slot pages from being moved by fork()/COW. */ +	if (memslot->id >= KVM_MEMORY_SLOTS) +		map_flags = MAP_SHARED | MAP_ANONYMOUS;  	/*To keep backward compatibility with older userspace,  	 *x86 needs to hanlde !user_alloc case. @@ -5450,7 +5574,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,  			userspace_addr = do_mmap(NULL, 0,  						 npages * PAGE_SIZE,  						 PROT_READ | PROT_WRITE, -						 MAP_PRIVATE | MAP_ANONYMOUS, +						 map_flags,  						 0);  			up_write(¤t->mm->mmap_sem); @@ -5523,7 +5647,7 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)  	me = get_cpu();  	if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) -		if (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests)) +		if (atomic_xchg(&vcpu->guest_mode, 0))  			smp_send_reschedule(cpu);  	put_cpu();  } diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index f4b54458285b..b7a404722d2b 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -65,13 +65,6 @@ static inline int is_paging(struct kvm_vcpu *vcpu)  	return kvm_read_cr0_bits(vcpu, X86_CR0_PG);  } -static inline struct kvm_mem_aliases *kvm_aliases(struct kvm *kvm) -{ -	return rcu_dereference_check(kvm->arch.aliases, -			srcu_read_lock_held(&kvm->srcu) -			|| lockdep_is_held(&kvm->slots_lock)); -} -  void kvm_before_handle_nmi(struct kvm_vcpu *vcpu);  void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index f871e04b6965..e10cf070ede0 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -30,6 +30,7 @@ ifeq ($(CONFIG_X86_32),y)          lib-y += checksum_32.o          lib-y += strstr_32.o          lib-y += semaphore_32.o string_32.o +        lib-y += cmpxchg.o  ifneq ($(CONFIG_X86_CMPXCHG64),y)          lib-y += cmpxchg8b_emu.o atomic64_386_32.o  endif diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S index 4a5979aa6883..2cda60a06e65 100644 --- a/arch/x86/lib/atomic64_386_32.S +++ b/arch/x86/lib/atomic64_386_32.S @@ -25,150 +25,172 @@  	CFI_ADJUST_CFA_OFFSET -4  .endm -.macro BEGIN func reg -$v = \reg - -ENTRY(atomic64_\func\()_386) -	CFI_STARTPROC -	LOCK $v - -.macro RETURN -	UNLOCK $v +#define BEGIN(op) \ +.macro endp; \ +	CFI_ENDPROC; \ +ENDPROC(atomic64_##op##_386); \ +.purgem endp; \ +.endm; \ +ENTRY(atomic64_##op##_386); \ +	CFI_STARTPROC; \ +	LOCK v; + +#define ENDP endp + +#define RET \ +	UNLOCK v; \  	ret -.endm - -.macro END_ -	CFI_ENDPROC -ENDPROC(atomic64_\func\()_386) -.purgem RETURN -.purgem END_ -.purgem END -.endm - -.macro END -RETURN -END_ -.endm -.endm -BEGIN read %ecx -	movl  ($v), %eax -	movl 4($v), %edx -END - -BEGIN set %esi -	movl %ebx,  ($v) -	movl %ecx, 4($v) -END - -BEGIN xchg %esi -	movl  ($v), %eax -	movl 4($v), %edx -	movl %ebx,  ($v) -	movl %ecx, 4($v) -END - -BEGIN add %ecx -	addl %eax,  ($v) -	adcl %edx, 4($v) -END - -BEGIN add_return %ecx -	addl  ($v), %eax -	adcl 4($v), %edx -	movl %eax,  ($v) -	movl %edx, 4($v) -END - -BEGIN sub %ecx -	subl %eax,  ($v) -	sbbl %edx, 4($v) -END - -BEGIN sub_return %ecx +#define RET_ENDP \ +	RET; \ +	ENDP + +#define v %ecx +BEGIN(read) +	movl  (v), %eax +	movl 4(v), %edx +RET_ENDP +#undef v + +#define v %esi +BEGIN(set) +	movl %ebx,  (v) +	movl %ecx, 4(v) +RET_ENDP +#undef v + +#define v  %esi +BEGIN(xchg) +	movl  (v), %eax +	movl 4(v), %edx +	movl %ebx,  (v) +	movl %ecx, 4(v) +RET_ENDP +#undef v + +#define v %ecx +BEGIN(add) +	addl %eax,  (v) +	adcl %edx, 4(v) +RET_ENDP +#undef v + +#define v %ecx +BEGIN(add_return) +	addl  (v), %eax +	adcl 4(v), %edx +	movl %eax,  (v) +	movl %edx, 4(v) +RET_ENDP +#undef v + +#define v %ecx +BEGIN(sub) +	subl %eax,  (v) +	sbbl %edx, 4(v) +RET_ENDP +#undef v + +#define v %ecx +BEGIN(sub_return)  	negl %edx  	negl %eax  	sbbl $0, %edx -	addl  ($v), %eax -	adcl 4($v), %edx -	movl %eax,  ($v) -	movl %edx, 4($v) -END - -BEGIN inc %esi -	addl $1,  ($v) -	adcl $0, 4($v) -END - -BEGIN inc_return %esi -	movl  ($v), %eax -	movl 4($v), %edx +	addl  (v), %eax +	adcl 4(v), %edx +	movl %eax,  (v) +	movl %edx, 4(v) +RET_ENDP +#undef v + +#define v %esi +BEGIN(inc) +	addl $1,  (v) +	adcl $0, 4(v) +RET_ENDP +#undef v + +#define v %esi +BEGIN(inc_return) +	movl  (v), %eax +	movl 4(v), %edx  	addl $1, %eax  	adcl $0, %edx -	movl %eax,  ($v) -	movl %edx, 4($v) -END - -BEGIN dec %esi -	subl $1,  ($v) -	sbbl $0, 4($v) -END - -BEGIN dec_return %esi -	movl  ($v), %eax -	movl 4($v), %edx +	movl %eax,  (v) +	movl %edx, 4(v) +RET_ENDP +#undef v + +#define v %esi +BEGIN(dec) +	subl $1,  (v) +	sbbl $0, 4(v) +RET_ENDP +#undef v + +#define v %esi +BEGIN(dec_return) +	movl  (v), %eax +	movl 4(v), %edx  	subl $1, %eax  	sbbl $0, %edx -	movl %eax,  ($v) -	movl %edx, 4($v) -END +	movl %eax,  (v) +	movl %edx, 4(v) +RET_ENDP +#undef v -BEGIN add_unless %ecx +#define v %ecx +BEGIN(add_unless)  	addl %eax, %esi  	adcl %edx, %edi -	addl  ($v), %eax -	adcl 4($v), %edx +	addl  (v), %eax +	adcl 4(v), %edx  	cmpl %eax, %esi  	je 3f  1: -	movl %eax,  ($v) -	movl %edx, 4($v) +	movl %eax,  (v) +	movl %edx, 4(v)  	movl $1, %eax  2: -RETURN +	RET  3:  	cmpl %edx, %edi  	jne 1b  	xorl %eax, %eax  	jmp 2b -END_ +ENDP +#undef v -BEGIN inc_not_zero %esi -	movl  ($v), %eax -	movl 4($v), %edx +#define v %esi +BEGIN(inc_not_zero) +	movl  (v), %eax +	movl 4(v), %edx  	testl %eax, %eax  	je 3f  1:  	addl $1, %eax  	adcl $0, %edx -	movl %eax,  ($v) -	movl %edx, 4($v) +	movl %eax,  (v) +	movl %edx, 4(v)  	movl $1, %eax  2: -RETURN +	RET  3:  	testl %edx, %edx  	jne 1b  	jmp 2b -END_ +ENDP +#undef v -BEGIN dec_if_positive %esi -	movl  ($v), %eax -	movl 4($v), %edx +#define v %esi +BEGIN(dec_if_positive) +	movl  (v), %eax +	movl 4(v), %edx  	subl $1, %eax  	sbbl $0, %edx  	js 1f -	movl %eax,  ($v) -	movl %edx, 4($v) +	movl %eax,  (v) +	movl %edx, 4(v)  1: -END +RET_ENDP +#undef v diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S index ebeafcce04a9..aa4326bfb24a 100644 --- a/arch/x86/lib/clear_page_64.S +++ b/arch/x86/lib/clear_page_64.S @@ -52,7 +52,7 @@ ENDPROC(clear_page)  	.align 8  	.quad clear_page  	.quad 1b -	.byte X86_FEATURE_REP_GOOD +	.word X86_FEATURE_REP_GOOD  	.byte .Lclear_page_end - clear_page  	.byte 2b - 1b  	.previous diff --git a/arch/x86/kernel/cpu/cmpxchg.c b/arch/x86/lib/cmpxchg.c index 2056ccf572cc..5d619f6df3ee 100644 --- a/arch/x86/kernel/cpu/cmpxchg.c +++ b/arch/x86/lib/cmpxchg.c @@ -52,21 +52,3 @@ unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new)  }  EXPORT_SYMBOL(cmpxchg_386_u32);  #endif - -#ifndef CONFIG_X86_CMPXCHG64 -unsigned long long cmpxchg_486_u64(volatile void *ptr, u64 old, u64 new) -{ -	u64 prev; -	unsigned long flags; - -	/* Poor man's cmpxchg8b for 386 and 486. Unsuitable for SMP */ -	local_irq_save(flags); -	prev = *(u64 *)ptr; -	if (prev == old) -		*(u64 *)ptr = new; -	local_irq_restore(flags); -	return prev; -} -EXPORT_SYMBOL(cmpxchg_486_u64); -#endif - diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S index 727a5d46d2fc..6fec2d1cebe1 100644 --- a/arch/x86/lib/copy_page_64.S +++ b/arch/x86/lib/copy_page_64.S @@ -113,7 +113,7 @@ ENDPROC(copy_page)  	.align 8  	.quad copy_page  	.quad 1b -	.byte X86_FEATURE_REP_GOOD +	.word X86_FEATURE_REP_GOOD  	.byte .Lcopy_page_end - copy_page  	.byte 2b - 1b  	.previous diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 71100c98e337..a460158b5ac5 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -29,7 +29,7 @@  	.align 8  	.quad  0b  	.quad  2b -	.byte  \feature			/* when feature is set */ +	.word  \feature			/* when feature is set */  	.byte  5  	.byte  5  	.previous diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index f82e884928af..bcbcd1e0f7d5 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -131,7 +131,7 @@ ENDPROC(__memcpy)  	.align 8  	.quad memcpy  	.quad .Lmemcpy_c -	.byte X86_FEATURE_REP_GOOD +	.word X86_FEATURE_REP_GOOD  	/*  	 * Replace only beginning, memcpy is used to apply alternatives, diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index e88d3b81644a..09d344269652 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S @@ -121,7 +121,7 @@ ENDPROC(__memset)  	.align 8  	.quad memset  	.quad .Lmemset_c -	.byte X86_FEATURE_REP_GOOD +	.word X86_FEATURE_REP_GOOD  	.byte .Lfinal - memset  	.byte .Lmemset_e - .Lmemset_c  	.previous diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index a725b7f760ae..0002a3a33081 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -37,6 +37,28 @@ struct addr_marker {  	const char *name;  }; +/* indices for address_markers; keep sync'd w/ address_markers below */ +enum address_markers_idx { +	USER_SPACE_NR = 0, +#ifdef CONFIG_X86_64 +	KERNEL_SPACE_NR, +	LOW_KERNEL_NR, +	VMALLOC_START_NR, +	VMEMMAP_START_NR, +	HIGH_KERNEL_NR, +	MODULES_VADDR_NR, +	MODULES_END_NR, +#else +	KERNEL_SPACE_NR, +	VMALLOC_START_NR, +	VMALLOC_END_NR, +# ifdef CONFIG_HIGHMEM +	PKMAP_BASE_NR, +# endif +	FIXADDR_START_NR, +#endif +}; +  /* Address space markers hints */  static struct addr_marker address_markers[] = {  	{ 0, "User Space" }, @@ -331,14 +353,12 @@ static int pt_dump_init(void)  #ifdef CONFIG_X86_32  	/* Not a compile-time constant on x86-32 */ -	address_markers[2].start_address = VMALLOC_START; -	address_markers[3].start_address = VMALLOC_END; +	address_markers[VMALLOC_START_NR].start_address = VMALLOC_START; +	address_markers[VMALLOC_END_NR].start_address = VMALLOC_END;  # ifdef CONFIG_HIGHMEM -	address_markers[4].start_address = PKMAP_BASE; -	address_markers[5].start_address = FIXADDR_START; -# else -	address_markers[4].start_address = FIXADDR_START; +	address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE;  # endif +	address_markers[FIXADDR_START_NR].start_address = FIXADDR_START;  #endif  	pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL, diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index f62777940dfb..4c4508e8a204 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -802,8 +802,10 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,  	up_read(&mm->mmap_sem);  	/* Kernel mode? Handle exceptions or die: */ -	if (!(error_code & PF_USER)) +	if (!(error_code & PF_USER)) {  		no_context(regs, error_code, address); +		return; +	}  	/* User-space => ok to do another page fault: */  	if (is_prefetch(regs, error_code, address)) diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index 63a6ba66cbe0..5e8fa12ef861 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -53,7 +53,7 @@ void *kmap_atomic(struct page *page, enum km_type type)  	return kmap_atomic_prot(page, type, kmap_prot);  } -void kunmap_atomic(void *kvaddr, enum km_type type) +void kunmap_atomic_notypecheck(void *kvaddr, enum km_type type)  {  	unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;  	enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); @@ -102,7 +102,7 @@ struct page *kmap_atomic_to_page(void *ptr)  EXPORT_SYMBOL(kmap);  EXPORT_SYMBOL(kunmap);  EXPORT_SYMBOL(kmap_atomic); -EXPORT_SYMBOL(kunmap_atomic); +EXPORT_SYMBOL(kunmap_atomic_notypecheck);  EXPORT_SYMBOL(kmap_atomic_prot);  EXPORT_SYMBOL(kmap_atomic_to_page); diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index ee41bba315d1..9a6674689a20 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -2,7 +2,7 @@   *  linux/arch/x86_64/mm/init.c   *   *  Copyright (C) 1995  Linus Torvalds - *  Copyright (C) 2000  Pavel Machek <pavel@suse.cz> + *  Copyright (C) 2000  Pavel Machek <pavel@ucw.cz>   *  Copyright (C) 2002,2003 Andi Kleen <ak@suse.de>   */ diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 12e4d2d3c110..3ba6e0608c55 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -62,8 +62,8 @@ int ioremap_change_attr(unsigned long vaddr, unsigned long size,  static void __iomem *__ioremap_caller(resource_size_t phys_addr,  		unsigned long size, unsigned long prot_val, void *caller)  { -	unsigned long pfn, offset, vaddr; -	resource_size_t last_addr; +	unsigned long offset, vaddr; +	resource_size_t pfn, last_pfn, last_addr;  	const resource_size_t unaligned_phys_addr = phys_addr;  	const unsigned long unaligned_size = size;  	struct vm_struct *area; @@ -100,10 +100,8 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,  	/*  	 * Don't allow anybody to remap normal RAM that we're using..  	 */ -	for (pfn = phys_addr >> PAGE_SHIFT; -				(pfn << PAGE_SHIFT) < (last_addr & PAGE_MASK); -				pfn++) { - +	last_pfn = last_addr >> PAGE_SHIFT; +	for (pfn = phys_addr >> PAGE_SHIFT; pfn <= last_pfn; pfn++) {  		int is_ram = page_is_ram(pfn);  		if (is_ram && pfn_valid(pfn) && !PageReserved(pfn_to_page(pfn))) @@ -115,7 +113,7 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,  	 * Mappings have to be page-aligned  	 */  	offset = phys_addr & ~PAGE_MASK; -	phys_addr &= PAGE_MASK; +	phys_addr &= PHYSICAL_PAGE_MASK;  	size = PAGE_ALIGN(last_addr+1) - phys_addr;  	retval = reserve_memtype(phys_addr, (u64)phys_addr + size, @@ -613,7 +611,7 @@ void __init early_iounmap(void __iomem *addr, unsigned long size)  		return;  	}  	offset = virt_addr & ~PAGE_MASK; -	nrpages = PAGE_ALIGN(offset + size - 1) >> PAGE_SHIFT; +	nrpages = PAGE_ALIGN(offset + size) >> PAGE_SHIFT;  	idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot;  	while (nrpages > 0) { diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 5d0e67fff1a6..e5d5e2ce9f77 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -45,6 +45,8 @@ struct kmmio_fault_page {  	 * Protected by kmmio_lock, when linked into kmmio_page_table.  	 */  	int count; + +	bool scheduled_for_release;  };  struct kmmio_delayed_release { @@ -398,8 +400,11 @@ static void release_kmmio_fault_page(unsigned long page,  	BUG_ON(f->count < 0);  	if (!f->count) {  		disarm_kmmio_fault_page(f); -		f->release_next = *release_list; -		*release_list = f; +		if (!f->scheduled_for_release) { +			f->release_next = *release_list; +			*release_list = f; +			f->scheduled_for_release = true; +		}  	}  } @@ -471,8 +476,10 @@ static void remove_kmmio_fault_pages(struct rcu_head *head)  			prevp = &f->release_next;  		} else {  			*prevp = f->release_next; +			f->release_next = NULL; +			f->scheduled_for_release = false;  		} -		f = f->release_next; +		f = *prevp;  	}  	spin_unlock_irqrestore(&kmmio_lock, flags); @@ -510,6 +517,9 @@ void unregister_kmmio_probe(struct kmmio_probe *p)  	kmmio_count--;  	spin_unlock_irqrestore(&kmmio_lock, flags); +	if (!release_list) +		return; +  	drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC);  	if (!drelease) {  		pr_crit("leaking kmmio_fault_page objects.\n"); diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 64121a18b8cb..f6ff57b7efa5 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -158,7 +158,7 @@ static unsigned long pat_x_mtrr_type(u64 start, u64 end, unsigned long req_type)  	return req_type;  } -static int pat_pagerange_is_ram(unsigned long start, unsigned long end) +static int pat_pagerange_is_ram(resource_size_t start, resource_size_t end)  {  	int ram_page = 0, not_rampage = 0;  	unsigned long page_nr; diff --git a/arch/x86/mm/pf_in.c b/arch/x86/mm/pf_in.c index 308e32570d84..38e6d174c497 100644 --- a/arch/x86/mm/pf_in.c +++ b/arch/x86/mm/pf_in.c @@ -40,16 +40,16 @@ static unsigned char prefix_codes[] = {  static unsigned int reg_rop[] = {  	0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F  }; -static unsigned int reg_wop[] = { 0x88, 0x89 }; +static unsigned int reg_wop[] = { 0x88, 0x89, 0xAA, 0xAB };  static unsigned int imm_wop[] = { 0xC6, 0xC7 };  /* IA32 Manual 3, 3-432*/ -static unsigned int rw8[] = { 0x88, 0x8A, 0xC6 }; +static unsigned int rw8[] = { 0x88, 0x8A, 0xC6, 0xAA };  static unsigned int rw32[] = { -	0x89, 0x8B, 0xC7, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F +	0x89, 0x8B, 0xC7, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F, 0xAB  }; -static unsigned int mw8[] = { 0x88, 0x8A, 0xC6, 0xB60F, 0xBE0F }; +static unsigned int mw8[] = { 0x88, 0x8A, 0xC6, 0xB60F, 0xBE0F, 0xAA };  static unsigned int mw16[] = { 0xB70F, 0xBF0F }; -static unsigned int mw32[] = { 0x89, 0x8B, 0xC7 }; +static unsigned int mw32[] = { 0x89, 0x8B, 0xC7, 0xAB };  static unsigned int mw64[] = {};  #else /* not __i386__ */  static unsigned char prefix_codes[] = { @@ -63,20 +63,20 @@ static unsigned char prefix_codes[] = {  static unsigned int reg_rop[] = {  	0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F  }; -static unsigned int reg_wop[] = { 0x88, 0x89 }; +static unsigned int reg_wop[] = { 0x88, 0x89, 0xAA, 0xAB };  static unsigned int imm_wop[] = { 0xC6, 0xC7 }; -static unsigned int rw8[] = { 0xC6, 0x88, 0x8A }; +static unsigned int rw8[] = { 0xC6, 0x88, 0x8A, 0xAA };  static unsigned int rw32[] = { -	0xC7, 0x89, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F +	0xC7, 0x89, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F, 0xAB  };  /* 8 bit only */ -static unsigned int mw8[] = { 0xC6, 0x88, 0x8A, 0xB60F, 0xBE0F }; +static unsigned int mw8[] = { 0xC6, 0x88, 0x8A, 0xB60F, 0xBE0F, 0xAA };  /* 16 bit only */  static unsigned int mw16[] = { 0xB70F, 0xBF0F };  /* 16 or 32 bit */  static unsigned int mw32[] = { 0xC7 };  /* 16, 32 or 64 bit */ -static unsigned int mw64[] = { 0x89, 0x8B }; +static unsigned int mw64[] = { 0x89, 0x8B, 0xAB };  #endif /* not __i386__ */  struct prefix_bits { @@ -410,7 +410,6 @@ static unsigned long *get_reg_w32(int no, struct pt_regs *regs)  unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs)  {  	unsigned int opcode; -	unsigned char mod_rm;  	int reg;  	unsigned char *p;  	struct prefix_bits prf; @@ -437,8 +436,13 @@ unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs)  	goto err;  do_work: -	mod_rm = *p; -	reg = ((mod_rm >> 3) & 0x7) | (prf.rexr << 3); +	/* for STOS, source register is fixed */ +	if (opcode == 0xAA || opcode == 0xAB) { +		reg = arg_AX; +	} else { +		unsigned char mod_rm = *p; +		reg = ((mod_rm >> 3) & 0x7) | (prf.rexr << 3); +	}  	switch (get_ins_reg_width(ins_addr)) {  	case 1:  		return *get_reg_w8(reg, prf.rex, regs); diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c index 8565d944f7cf..38868adf07ea 100644 --- a/arch/x86/mm/testmmiotrace.c +++ b/arch/x86/mm/testmmiotrace.c @@ -90,6 +90,27 @@ static void do_test(unsigned long size)  	iounmap(p);  } +/* + * Tests how mmiotrace behaves in face of multiple ioremap / iounmaps in + * a short time. We had a bug in deferred freeing procedure which tried + * to free this region multiple times (ioremap can reuse the same address + * for many mappings). + */ +static void do_test_bulk_ioremapping(void) +{ +	void __iomem *p; +	int i; + +	for (i = 0; i < 10; ++i) { +		p = ioremap_nocache(mmio_address, PAGE_SIZE); +		if (p) +			iounmap(p); +	} + +	/* Force freeing. If it will crash we will know why. */ +	synchronize_rcu(); +} +  static int __init init(void)  {  	unsigned long size = (read_far) ? (8 << 20) : (16 << 10); @@ -104,6 +125,7 @@ static int __init init(void)  		   "and writing 16 kB of rubbish in there.\n",  		   size >> 10, mmio_address);  	do_test(size); +	do_test_bulk_ioremapping();  	pr_info("All done.\n");  	return 0;  } diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 426f3a1a64d3..c03f14ab6667 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -278,11 +278,9 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)  static void do_flush_tlb_all(void *info)  { -	unsigned long cpu = smp_processor_id(); -  	__flush_tlb_all();  	if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY) -		leave_mm(cpu); +		leave_mm(smp_processor_id());  }  void flush_tlb_all(void) diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index b28d2f1253bb..f6b48f6c5951 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -634,6 +634,18 @@ static int __init ppro_init(char **cpu_type)  	if (force_arch_perfmon && cpu_has_arch_perfmon)  		return 0; +	/* +	 * Documentation on identifying Intel processors by CPU family +	 * and model can be found in the Intel Software Developer's +	 * Manuals (SDM): +	 * +	 *  http://www.intel.com/products/processor/manuals/ +	 * +	 * As of May 2010 the documentation for this was in the: +	 * "Intel 64 and IA-32 Architectures Software Developer's +	 * Manual Volume 3B: System Programming Guide", "Table B-1 +	 * CPUID Signature Values of DisplayFamily_DisplayModel". +	 */  	switch (cpu_model) {  	case 0 ... 2:  		*cpu_type = "i386/ppro"; @@ -655,12 +667,13 @@ static int __init ppro_init(char **cpu_type)  	case 15: case 23:  		*cpu_type = "i386/core_2";  		break; +	case 0x1a: +	case 0x1e:  	case 0x2e: -	case 26:  		spec = &op_arch_perfmon_spec;  		*cpu_type = "i386/core_i7";  		break; -	case 28: +	case 0x1c:  		*cpu_type = "i386/atom";  		break;  	default: diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 2ec04c424a62..15466c096ba5 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -34,6 +34,15 @@ static const struct dmi_system_id pci_use_crs_table[] __initconst = {  			DMI_MATCH(DMI_PRODUCT_NAME, "x3800"),  		},  	}, +	/* https://bugzilla.kernel.org/show_bug.cgi?id=16007 */ +	/* 2006 AMD HT/VIA system with two host bridges */ +        { +		.callback = set_use_crs, +		.ident = "ASRock ALiveSATA2-GLAN", +		.matches = { +			DMI_MATCH(DMI_PRODUCT_NAME, "ALiveSATA2-GLAN"), +                }, +        },  	{}  }; diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 215a27ae050d..a0772af64efb 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -125,6 +125,23 @@ void __init dmi_check_skip_isa_align(void)  static void __devinit pcibios_fixup_device_resources(struct pci_dev *dev)  {  	struct resource *rom_r = &dev->resource[PCI_ROM_RESOURCE]; +	struct resource *bar_r; +	int bar; + +	if (pci_probe & PCI_NOASSIGN_BARS) { +		/* +		* If the BIOS did not assign the BAR, zero out the +		* resource so the kernel doesn't attmept to assign +		* it later on in pci_assign_unassigned_resources +		*/ +		for (bar = 0; bar <= PCI_STD_RESOURCE_END; bar++) { +			bar_r = &dev->resource[bar]; +			if (bar_r->start == 0 && bar_r->end != 0) { +				bar_r->flags = 0; +				bar_r->end = 0; +			} +		} +	}  	if (pci_probe & PCI_NOASSIGN_ROMS) {  		if (rom_r->parent) @@ -509,6 +526,9 @@ char * __devinit  pcibios_setup(char *str)  	} else if (!strcmp(str, "norom")) {  		pci_probe |= PCI_NOASSIGN_ROMS;  		return NULL; +	} else if (!strcmp(str, "nobar")) { +		pci_probe |= PCI_NOASSIGN_BARS; +		return NULL;  	} else if (!strcmp(str, "assign-busses")) {  		pci_probe |= PCI_ASSIGN_ALL_BUSSES;  		return NULL; diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 9810a0f76c91..f547ee05f715 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -989,7 +989,7 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)  	dev_info(&dev->dev, "%s PCI INT %c -> IRQ %d\n", msg, 'A' + pin - 1, irq);  	/* Update IRQ for all devices with the same pirq value */ -	while ((dev2 = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev2)) != NULL) { +	for_each_pci_dev(dev2) {  		pci_read_config_byte(dev2, PCI_INTERRUPT_PIN, &pin);  		if (!pin)  			continue; @@ -1028,7 +1028,7 @@ void __init pcibios_fixup_irqs(void)  	u8 pin;  	DBG(KERN_DEBUG "PCI: IRQ fixup\n"); -	while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { +	for_each_pci_dev(dev) {  		/*  		 * If the BIOS has set an out of range IRQ number, just  		 * ignore it.  Also keep track of which IRQ's are @@ -1052,7 +1052,7 @@ void __init pcibios_fixup_irqs(void)  		return;  	dev = NULL; -	while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { +	for_each_pci_dev(dev) {  		pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);  		if (!pin)  			continue; diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c index 8d460eaf524f..c89266be6048 100644 --- a/arch/x86/pci/legacy.c +++ b/arch/x86/pci/legacy.c @@ -36,7 +36,7 @@ int __init pci_legacy_init(void)  	return 0;  } -void pcibios_scan_specific_bus(int busn) +void __devinit pcibios_scan_specific_bus(int busn)  {  	int devfn;  	long node; diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 1290ba54b350..e7e8c5f54956 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -4,7 +4,7 @@   * Distribute under GPLv2   *   * Copyright (c) 2007 Rafael J. Wysocki <rjw@sisk.pl> - * Copyright (c) 2002 Pavel Machek <pavel@suse.cz> + * Copyright (c) 2002 Pavel Machek <pavel@ucw.cz>   * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>   */ diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c index d24f983ba1e5..460f314d13e5 100644 --- a/arch/x86/power/hibernate_64.c +++ b/arch/x86/power/hibernate_64.c @@ -4,7 +4,7 @@   * Distribute under GPLv2   *   * Copyright (c) 2007 Rafael J. Wysocki <rjw@sisk.pl> - * Copyright (c) 2002 Pavel Machek <pavel@suse.cz> + * Copyright (c) 2002 Pavel Machek <pavel@ucw.cz>   * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>   */ diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index 6b4ffedb93c9..4a2afa1bac51 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile @@ -120,7 +120,8 @@ $(obj)/vdso32-syms.lds: $(vdso32.so-y:%=$(obj)/vdso32-%-syms.lds) FORCE  quiet_cmd_vdso = VDSO    $@        cmd_vdso = $(CC) -nostdlib -o $@ \  		       $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \ -		       -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) +		       -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) && \ +		 sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@'  VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)  GCOV_PROFILE := n diff --git a/arch/x86/vdso/checkundef.sh b/arch/x86/vdso/checkundef.sh new file mode 100755 index 000000000000..7ee90a9b549d --- /dev/null +++ b/arch/x86/vdso/checkundef.sh @@ -0,0 +1,10 @@ +#!/bin/sh +nm="$1" +file="$2" +$nm "$file" | grep '^ *U' > /dev/null 2>&1 +if [ $? -eq 1 ]; then +    exit 0 +else +    echo "$file: undefined symbols found" >&2 +    exit 1 +fi diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index 02b442e92007..36df991985b2 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c @@ -374,7 +374,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)  #ifdef CONFIG_X86_64 -__initcall(sysenter_setup); +subsys_initcall(sysenter_setup);  #ifdef CONFIG_SYSCTL  /* Register vsyscall32 into the ABI table */ diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c index ac74869b8140..4b5d26f108bb 100644 --- a/arch/x86/vdso/vma.c +++ b/arch/x86/vdso/vma.c @@ -67,6 +67,7 @@ static int __init init_vdso_vars(void)  	*(typeof(__ ## x) **) var_ref(VDSO64_SYMBOL(vbase, x), #x) = &__ ## x;  #include "vextern.h"  #undef VEXTERN +	vunmap(vbase);  	return 0;   oom: @@ -74,7 +75,7 @@ static int __init init_vdso_vars(void)  	vdso_enabled = 0;  	return -ENOMEM;  } -__initcall(init_vdso_vars); +subsys_initcall(init_vdso_vars);  struct linux_binprm; diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index b83e119fbeb0..68128a1b401a 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -13,6 +13,11 @@ config XEN  	  kernel to boot in a paravirtualized environment under the  	  Xen hypervisor. +config XEN_PVHVM +	def_bool y +	depends on XEN +	depends on X86_LOCAL_APIC +  config XEN_MAX_DOMAIN_MEMORY         int "Maximum allowed size of a domain in gigabytes"         default 8 if X86_32 diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 3bb4fc21f4f2..779385158915 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -12,9 +12,10 @@ CFLAGS_mmu.o			:= $(nostackp)  obj-y		:= enlighten.o setup.o multicalls.o mmu.o irq.o \  			time.o xen-asm.o xen-asm_$(BITS).o \ -			grant-table.o suspend.o +			grant-table.o suspend.o platform-pci-unplug.o  obj-$(CONFIG_SMP)		+= smp.o  obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o  obj-$(CONFIG_XEN_DEBUG_FS)	+= debugfs.o +obj-$(CONFIG_SWIOTLB_XEN)	+= pci-swiotlb-xen.o diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 65d8d79b46a8..7d46c8441418 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -11,6 +11,7 @@   * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007   */ +#include <linux/cpu.h>  #include <linux/kernel.h>  #include <linux/init.h>  #include <linux/smp.h> @@ -35,8 +36,10 @@  #include <xen/interface/version.h>  #include <xen/interface/physdev.h>  #include <xen/interface/vcpu.h> +#include <xen/interface/memory.h>  #include <xen/features.h>  #include <xen/page.h> +#include <xen/hvm.h>  #include <xen/hvc-console.h>  #include <asm/paravirt.h> @@ -55,7 +58,9 @@  #include <asm/pgtable.h>  #include <asm/tlbflush.h>  #include <asm/reboot.h> +#include <asm/setup.h>  #include <asm/stackprotector.h> +#include <asm/hypervisor.h>  #include "xen-ops.h"  #include "mmu.h" @@ -76,6 +81,10 @@ struct shared_info xen_dummy_shared_info;  void *xen_initial_gdt; +RESERVE_BRK(shared_info_page_brk, PAGE_SIZE); +__read_mostly int xen_have_vector_callback; +EXPORT_SYMBOL_GPL(xen_have_vector_callback); +  /*   * Point at some empty memory to start with. We map the real shared_info   * page as soon as fixmap is up and running. @@ -97,6 +106,14 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info;   */  static int have_vcpu_info_placement = 1; +static void clamp_max_cpus(void) +{ +#ifdef CONFIG_SMP +	if (setup_max_cpus > MAX_VIRT_CPUS) +		setup_max_cpus = MAX_VIRT_CPUS; +#endif +} +  static void xen_vcpu_setup(int cpu)  {  	struct vcpu_register_vcpu_info info; @@ -104,13 +121,17 @@ static void xen_vcpu_setup(int cpu)  	struct vcpu_info *vcpup;  	BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); -	per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; -	if (!have_vcpu_info_placement) -		return;		/* already tested, not available */ +	if (cpu < MAX_VIRT_CPUS) +		per_cpu(xen_vcpu,cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; -	vcpup = &per_cpu(xen_vcpu_info, cpu); +	if (!have_vcpu_info_placement) { +		if (cpu >= MAX_VIRT_CPUS) +			clamp_max_cpus(); +		return; +	} +	vcpup = &per_cpu(xen_vcpu_info, cpu);  	info.mfn = arbitrary_virt_to_mfn(vcpup);  	info.offset = offset_in_page(vcpup); @@ -125,6 +146,7 @@ static void xen_vcpu_setup(int cpu)  	if (err) {  		printk(KERN_DEBUG "register_vcpu_info failed: err=%d\n", err);  		have_vcpu_info_placement = 0; +		clamp_max_cpus();  	} else {  		/* This cpu is using the registered vcpu info, even if  		   later ones fail to. */ @@ -731,7 +753,6 @@ static void set_xen_basic_apic_ops(void)  #endif -  static void xen_clts(void)  {  	struct multicall_space mcs; @@ -926,10 +947,6 @@ static const struct pv_init_ops xen_init_ops __initdata = {  	.patch = xen_patch,  }; -static const struct pv_time_ops xen_time_ops __initdata = { -	.sched_clock = xen_sched_clock, -}; -  static const struct pv_cpu_ops xen_cpu_ops __initdata = {  	.cpuid = xen_cpuid, @@ -1028,6 +1045,23 @@ static void xen_crash_shutdown(struct pt_regs *regs)  	xen_reboot(SHUTDOWN_crash);  } +static int +xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr) +{ +	xen_reboot(SHUTDOWN_crash); +	return NOTIFY_DONE; +} + +static struct notifier_block xen_panic_block = { +	.notifier_call= xen_panic_event, +}; + +int xen_panic_handler_init(void) +{ +	atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block); +	return 0; +} +  static const struct machine_ops __initdata xen_machine_ops = {  	.restart = xen_restart,  	.halt = xen_machine_halt, @@ -1067,7 +1101,6 @@ asmlinkage void __init xen_start_kernel(void)  	/* Install Xen paravirt ops */  	pv_info = xen_info;  	pv_init_ops = xen_init_ops; -	pv_time_ops = xen_time_ops;  	pv_cpu_ops = xen_cpu_ops;  	pv_apic_ops = xen_apic_ops; @@ -1075,13 +1108,7 @@ asmlinkage void __init xen_start_kernel(void)  	x86_init.oem.arch_setup = xen_arch_setup;  	x86_init.oem.banner = xen_banner; -	x86_init.timers.timer_init = xen_time_init; -	x86_init.timers.setup_percpu_clockev = x86_init_noop; -	x86_cpuinit.setup_percpu_clockev = x86_init_noop; - -	x86_platform.calibrate_tsc = xen_tsc_khz; -	x86_platform.get_wallclock = xen_get_wallclock; -	x86_platform.set_wallclock = xen_set_wallclock; +	xen_init_time_ops();  	/*  	 * Set up some pagetable state before starting to set any ptes. @@ -1145,6 +1172,10 @@ asmlinkage void __init xen_start_kernel(void)  	pgd = (pgd_t *)xen_start_info->pt_base; +	if (!xen_initial_domain()) +		__supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); + +	__supported_pte_mask |= _PAGE_IOMAP;  	/* Don't do the full vcpu_info placement stuff until we have a  	   possible map and a non-dummy shared_info. */  	per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; @@ -1206,3 +1237,139 @@ asmlinkage void __init xen_start_kernel(void)  	x86_64_start_reservations((char *)__pa_symbol(&boot_params));  #endif  } + +static uint32_t xen_cpuid_base(void) +{ +	uint32_t base, eax, ebx, ecx, edx; +	char signature[13]; + +	for (base = 0x40000000; base < 0x40010000; base += 0x100) { +		cpuid(base, &eax, &ebx, &ecx, &edx); +		*(uint32_t *)(signature + 0) = ebx; +		*(uint32_t *)(signature + 4) = ecx; +		*(uint32_t *)(signature + 8) = edx; +		signature[12] = 0; + +		if (!strcmp("XenVMMXenVMM", signature) && ((eax - base) >= 2)) +			return base; +	} + +	return 0; +} + +static int init_hvm_pv_info(int *major, int *minor) +{ +	uint32_t eax, ebx, ecx, edx, pages, msr, base; +	u64 pfn; + +	base = xen_cpuid_base(); +	cpuid(base + 1, &eax, &ebx, &ecx, &edx); + +	*major = eax >> 16; +	*minor = eax & 0xffff; +	printk(KERN_INFO "Xen version %d.%d.\n", *major, *minor); + +	cpuid(base + 2, &pages, &msr, &ecx, &edx); + +	pfn = __pa(hypercall_page); +	wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32)); + +	xen_setup_features(); + +	pv_info = xen_info; +	pv_info.kernel_rpl = 0; + +	xen_domain_type = XEN_HVM_DOMAIN; + +	return 0; +} + +void xen_hvm_init_shared_info(void) +{ +	int cpu; +	struct xen_add_to_physmap xatp; +	static struct shared_info *shared_info_page = 0; + +	if (!shared_info_page) +		shared_info_page = (struct shared_info *) +			extend_brk(PAGE_SIZE, PAGE_SIZE); +	xatp.domid = DOMID_SELF; +	xatp.idx = 0; +	xatp.space = XENMAPSPACE_shared_info; +	xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT; +	if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) +		BUG(); + +	HYPERVISOR_shared_info = (struct shared_info *)shared_info_page; + +	/* xen_vcpu is a pointer to the vcpu_info struct in the shared_info +	 * page, we use it in the event channel upcall and in some pvclock +	 * related functions. We don't need the vcpu_info placement +	 * optimizations because we don't use any pv_mmu or pv_irq op on +	 * HVM. +	 * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is +	 * online but xen_hvm_init_shared_info is run at resume time too and +	 * in that case multiple vcpus might be online. */ +	for_each_online_cpu(cpu) { +		per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; +	} +} + +#ifdef CONFIG_XEN_PVHVM +static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self, +				    unsigned long action, void *hcpu) +{ +	int cpu = (long)hcpu; +	switch (action) { +	case CPU_UP_PREPARE: +		per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; +		break; +	default: +		break; +	} +	return NOTIFY_OK; +} + +static struct notifier_block __cpuinitdata xen_hvm_cpu_notifier = { +	.notifier_call	= xen_hvm_cpu_notify, +}; + +static void __init xen_hvm_guest_init(void) +{ +	int r; +	int major, minor; + +	r = init_hvm_pv_info(&major, &minor); +	if (r < 0) +		return; + +	xen_hvm_init_shared_info(); + +	if (xen_feature(XENFEAT_hvm_callback_vector)) +		xen_have_vector_callback = 1; +	register_cpu_notifier(&xen_hvm_cpu_notifier); +	xen_unplug_emulated_devices(); +	have_vcpu_info_placement = 0; +	x86_init.irqs.intr_init = xen_init_IRQ; +	xen_hvm_init_time_ops(); +	xen_hvm_init_mmu_ops(); +} + +static bool __init xen_hvm_platform(void) +{ +	if (xen_pv_domain()) +		return false; + +	if (!xen_cpuid_base()) +		return false; + +	return true; +} + +const __refconst struct hypervisor_x86 x86_hyper_xen_hvm = { +	.name			= "Xen HVM", +	.detect			= xen_hvm_platform, +	.init_platform		= xen_hvm_guest_init, +}; +EXPORT_SYMBOL(x86_hyper_xen_hvm); +#endif diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 914f04695ce5..42086ac406af 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -42,6 +42,7 @@  #include <linux/highmem.h>  #include <linux/debugfs.h>  #include <linux/bug.h> +#include <linux/vmalloc.h>  #include <linux/module.h>  #include <linux/gfp.h> @@ -51,14 +52,19 @@  #include <asm/mmu_context.h>  #include <asm/setup.h>  #include <asm/paravirt.h> +#include <asm/e820.h>  #include <asm/linkage.h> +#include <asm/page.h>  #include <asm/xen/hypercall.h>  #include <asm/xen/hypervisor.h> +#include <xen/xen.h>  #include <xen/page.h>  #include <xen/interface/xen.h> +#include <xen/interface/hvm/hvm_op.h>  #include <xen/interface/version.h> +#include <xen/interface/memory.h>  #include <xen/hvc-console.h>  #include "multicalls.h" @@ -67,6 +73,13 @@  #define MMU_UPDATE_HISTO	30 +/* + * Protects atomic reservation decrease/increase against concurrent increases. + * Also protects non-atomic updates of current_pages and driver_pages, and + * balloon lists. + */ +DEFINE_SPINLOCK(xen_reservation_lock); +  #ifdef CONFIG_XEN_DEBUG_FS  static struct { @@ -377,6 +390,28 @@ static bool xen_page_pinned(void *ptr)  	return PagePinned(page);  } +static bool xen_iomap_pte(pte_t pte) +{ +	return pte_flags(pte) & _PAGE_IOMAP; +} + +static void xen_set_iomap_pte(pte_t *ptep, pte_t pteval) +{ +	struct multicall_space mcs; +	struct mmu_update *u; + +	mcs = xen_mc_entry(sizeof(*u)); +	u = mcs.args; + +	/* ptep might be kmapped when using 32-bit HIGHPTE */ +	u->ptr = arbitrary_virt_to_machine(ptep).maddr; +	u->val = pte_val_ma(pteval); + +	MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_IO); + +	xen_mc_issue(PARAVIRT_LAZY_MMU); +} +  static void xen_extend_mmu_update(const struct mmu_update *update)  {  	struct multicall_space mcs; @@ -453,6 +488,11 @@ void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)  void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,  		    pte_t *ptep, pte_t pteval)  { +	if (xen_iomap_pte(pteval)) { +		xen_set_iomap_pte(ptep, pteval); +		goto out; +	} +  	ADD_STATS(set_pte_at, 1);  //	ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep));  	ADD_STATS(set_pte_at_current, mm == current->mm); @@ -523,8 +563,25 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)  	return val;  } +static pteval_t iomap_pte(pteval_t val) +{ +	if (val & _PAGE_PRESENT) { +		unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT; +		pteval_t flags = val & PTE_FLAGS_MASK; + +		/* We assume the pte frame number is a MFN, so +		   just use it as-is. */ +		val = ((pteval_t)pfn << PAGE_SHIFT) | flags; +	} + +	return val; +} +  pteval_t xen_pte_val(pte_t pte)  { +	if (xen_initial_domain() && (pte.pte & _PAGE_IOMAP)) +		return pte.pte; +  	return pte_mfn_to_pfn(pte.pte);  }  PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val); @@ -537,7 +594,22 @@ PV_CALLEE_SAVE_REGS_THUNK(xen_pgd_val);  pte_t xen_make_pte(pteval_t pte)  { -	pte = pte_pfn_to_mfn(pte); +	phys_addr_t addr = (pte & PTE_PFN_MASK); + +	/* +	 * Unprivileged domains are allowed to do IOMAPpings for +	 * PCI passthrough, but not map ISA space.  The ISA +	 * mappings are just dummy local mappings to keep other +	 * parts of the kernel happy. +	 */ +	if (unlikely(pte & _PAGE_IOMAP) && +	    (xen_initial_domain() || addr >= ISA_END_ADDRESS)) { +		pte = iomap_pte(pte); +	} else { +		pte &= ~_PAGE_IOMAP; +		pte = pte_pfn_to_mfn(pte); +	} +  	return native_make_pte(pte);  }  PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte); @@ -593,6 +665,11 @@ void xen_set_pud(pud_t *ptr, pud_t val)  void xen_set_pte(pte_t *ptep, pte_t pte)  { +	if (xen_iomap_pte(pte)) { +		xen_set_iomap_pte(ptep, pte); +		return; +	} +  	ADD_STATS(pte_update, 1);  //	ADD_STATS(pte_update_pinned, xen_page_pinned(ptep));  	ADD_STATS(pte_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU); @@ -609,6 +686,11 @@ void xen_set_pte(pte_t *ptep, pte_t pte)  #ifdef CONFIG_X86_PAE  void xen_set_pte_atomic(pte_t *ptep, pte_t pte)  { +	if (xen_iomap_pte(pte)) { +		xen_set_iomap_pte(ptep, pte); +		return; +	} +  	set_64bit((u64 *)ptep, native_pte_val(pte));  } @@ -935,8 +1017,6 @@ static int xen_pin_page(struct mm_struct *mm, struct page *page,     read-only, and can be pinned. */  static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)  { -	vm_unmap_aliases(); -  	xen_mc_batch();  	if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) { @@ -1500,7 +1580,6 @@ static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned l  	if (PagePinned(virt_to_page(mm->pgd))) {  		SetPagePinned(page); -		vm_unmap_aliases();  		if (!PageHighMem(page)) {  			make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn)));  			if (level == PT_PTE && USE_SPLIT_PTLOCKS) @@ -1811,9 +1890,16 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)  		pte = pfn_pte(phys, prot);  		break; -	default: +	case FIX_PARAVIRT_BOOTMAP: +		/* This is an MFN, but it isn't an IO mapping from the +		   IO domain */  		pte = mfn_pte(phys, prot);  		break; + +	default: +		/* By default, set_fixmap is used for hardware mappings */ +		pte = mfn_pte(phys, __pgprot(pgprot_val(prot) | _PAGE_IOMAP)); +		break;  	}  	__native_set_fixmap(idx, pte); @@ -1939,8 +2025,240 @@ void __init xen_init_mmu_ops(void)  	x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start;  	x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;  	pv_mmu_ops = xen_mmu_ops; + +	vmap_lazy_unmap = false; +} + +/* Protected by xen_reservation_lock. */ +#define MAX_CONTIG_ORDER 9 /* 2MB */ +static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER]; + +#define VOID_PTE (mfn_pte(0, __pgprot(0))) +static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order, +				unsigned long *in_frames, +				unsigned long *out_frames) +{ +	int i; +	struct multicall_space mcs; + +	xen_mc_batch(); +	for (i = 0; i < (1UL<<order); i++, vaddr += PAGE_SIZE) { +		mcs = __xen_mc_entry(0); + +		if (in_frames) +			in_frames[i] = virt_to_mfn(vaddr); + +		MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0); +		set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY); + +		if (out_frames) +			out_frames[i] = virt_to_pfn(vaddr); +	} +	xen_mc_issue(0); +} + +/* + * Update the pfn-to-mfn mappings for a virtual address range, either to + * point to an array of mfns, or contiguously from a single starting + * mfn. + */ +static void xen_remap_exchanged_ptes(unsigned long vaddr, int order, +				     unsigned long *mfns, +				     unsigned long first_mfn) +{ +	unsigned i, limit; +	unsigned long mfn; + +	xen_mc_batch(); + +	limit = 1u << order; +	for (i = 0; i < limit; i++, vaddr += PAGE_SIZE) { +		struct multicall_space mcs; +		unsigned flags; + +		mcs = __xen_mc_entry(0); +		if (mfns) +			mfn = mfns[i]; +		else +			mfn = first_mfn + i; + +		if (i < (limit - 1)) +			flags = 0; +		else { +			if (order == 0) +				flags = UVMF_INVLPG | UVMF_ALL; +			else +				flags = UVMF_TLB_FLUSH | UVMF_ALL; +		} + +		MULTI_update_va_mapping(mcs.mc, vaddr, +				mfn_pte(mfn, PAGE_KERNEL), flags); + +		set_phys_to_machine(virt_to_pfn(vaddr), mfn); +	} + +	xen_mc_issue(0); +} + +/* + * Perform the hypercall to exchange a region of our pfns to point to + * memory with the required contiguous alignment.  Takes the pfns as + * input, and populates mfns as output. + * + * Returns a success code indicating whether the hypervisor was able to + * satisfy the request or not. + */ +static int xen_exchange_memory(unsigned long extents_in, unsigned int order_in, +			       unsigned long *pfns_in, +			       unsigned long extents_out, +			       unsigned int order_out, +			       unsigned long *mfns_out, +			       unsigned int address_bits) +{ +	long rc; +	int success; + +	struct xen_memory_exchange exchange = { +		.in = { +			.nr_extents   = extents_in, +			.extent_order = order_in, +			.extent_start = pfns_in, +			.domid        = DOMID_SELF +		}, +		.out = { +			.nr_extents   = extents_out, +			.extent_order = order_out, +			.extent_start = mfns_out, +			.address_bits = address_bits, +			.domid        = DOMID_SELF +		} +	}; + +	BUG_ON(extents_in << order_in != extents_out << order_out); + +	rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange); +	success = (exchange.nr_exchanged == extents_in); + +	BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0))); +	BUG_ON(success && (rc != 0)); + +	return success;  } +int xen_create_contiguous_region(unsigned long vstart, unsigned int order, +				 unsigned int address_bits) +{ +	unsigned long *in_frames = discontig_frames, out_frame; +	unsigned long  flags; +	int            success; + +	/* +	 * Currently an auto-translated guest will not perform I/O, nor will +	 * it require PAE page directories below 4GB. Therefore any calls to +	 * this function are redundant and can be ignored. +	 */ + +	if (xen_feature(XENFEAT_auto_translated_physmap)) +		return 0; + +	if (unlikely(order > MAX_CONTIG_ORDER)) +		return -ENOMEM; + +	memset((void *) vstart, 0, PAGE_SIZE << order); + +	spin_lock_irqsave(&xen_reservation_lock, flags); + +	/* 1. Zap current PTEs, remembering MFNs. */ +	xen_zap_pfn_range(vstart, order, in_frames, NULL); + +	/* 2. Get a new contiguous memory extent. */ +	out_frame = virt_to_pfn(vstart); +	success = xen_exchange_memory(1UL << order, 0, in_frames, +				      1, order, &out_frame, +				      address_bits); + +	/* 3. Map the new extent in place of old pages. */ +	if (success) +		xen_remap_exchanged_ptes(vstart, order, NULL, out_frame); +	else +		xen_remap_exchanged_ptes(vstart, order, in_frames, 0); + +	spin_unlock_irqrestore(&xen_reservation_lock, flags); + +	return success ? 0 : -ENOMEM; +} +EXPORT_SYMBOL_GPL(xen_create_contiguous_region); + +void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order) +{ +	unsigned long *out_frames = discontig_frames, in_frame; +	unsigned long  flags; +	int success; + +	if (xen_feature(XENFEAT_auto_translated_physmap)) +		return; + +	if (unlikely(order > MAX_CONTIG_ORDER)) +		return; + +	memset((void *) vstart, 0, PAGE_SIZE << order); + +	spin_lock_irqsave(&xen_reservation_lock, flags); + +	/* 1. Find start MFN of contiguous extent. */ +	in_frame = virt_to_mfn(vstart); + +	/* 2. Zap current PTEs. */ +	xen_zap_pfn_range(vstart, order, NULL, out_frames); + +	/* 3. Do the exchange for non-contiguous MFNs. */ +	success = xen_exchange_memory(1, order, &in_frame, 1UL << order, +					0, out_frames, 0); + +	/* 4. Map new pages in place of old pages. */ +	if (success) +		xen_remap_exchanged_ptes(vstart, order, out_frames, 0); +	else +		xen_remap_exchanged_ptes(vstart, order, NULL, in_frame); + +	spin_unlock_irqrestore(&xen_reservation_lock, flags); +} +EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region); + +#ifdef CONFIG_XEN_PVHVM +static void xen_hvm_exit_mmap(struct mm_struct *mm) +{ +	struct xen_hvm_pagetable_dying a; +	int rc; + +	a.domid = DOMID_SELF; +	a.gpa = __pa(mm->pgd); +	rc = HYPERVISOR_hvm_op(HVMOP_pagetable_dying, &a); +	WARN_ON_ONCE(rc < 0); +} + +static int is_pagetable_dying_supported(void) +{ +	struct xen_hvm_pagetable_dying a; +	int rc = 0; + +	a.domid = DOMID_SELF; +	a.gpa = 0x00; +	rc = HYPERVISOR_hvm_op(HVMOP_pagetable_dying, &a); +	if (rc < 0) { +		printk(KERN_DEBUG "HVMOP_pagetable_dying not supported\n"); +		return 0; +	} +	return 1; +} + +void __init xen_hvm_init_mmu_ops(void) +{ +	if (is_pagetable_dying_supported()) +		pv_mmu_ops.exit_mmap = xen_hvm_exit_mmap; +} +#endif +  #ifdef CONFIG_XEN_DEBUG_FS  static struct dentry *d_mmu_debug; diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index 5fe6bc7f5ecf..fa938c4aa2f7 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h @@ -60,4 +60,5 @@ void  xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,  unsigned long xen_read_cr2_direct(void);  extern void xen_init_mmu_ops(void); +extern void xen_hvm_init_mmu_ops(void);  #endif	/* _XEN_MMU_H */ diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c new file mode 100644 index 000000000000..a013ec9d0c54 --- /dev/null +++ b/arch/x86/xen/pci-swiotlb-xen.c @@ -0,0 +1,58 @@ +/* Glue code to lib/swiotlb-xen.c */ + +#include <linux/dma-mapping.h> +#include <xen/swiotlb-xen.h> + +#include <asm/xen/hypervisor.h> +#include <xen/xen.h> + +int xen_swiotlb __read_mostly; + +static struct dma_map_ops xen_swiotlb_dma_ops = { +	.mapping_error = xen_swiotlb_dma_mapping_error, +	.alloc_coherent = xen_swiotlb_alloc_coherent, +	.free_coherent = xen_swiotlb_free_coherent, +	.sync_single_for_cpu = xen_swiotlb_sync_single_for_cpu, +	.sync_single_for_device = xen_swiotlb_sync_single_for_device, +	.sync_sg_for_cpu = xen_swiotlb_sync_sg_for_cpu, +	.sync_sg_for_device = xen_swiotlb_sync_sg_for_device, +	.map_sg = xen_swiotlb_map_sg_attrs, +	.unmap_sg = xen_swiotlb_unmap_sg_attrs, +	.map_page = xen_swiotlb_map_page, +	.unmap_page = xen_swiotlb_unmap_page, +	.dma_supported = xen_swiotlb_dma_supported, +}; + +/* + * pci_xen_swiotlb_detect - set xen_swiotlb to 1 if necessary + * + * This returns non-zero if we are forced to use xen_swiotlb (by the boot + * option). + */ +int __init pci_xen_swiotlb_detect(void) +{ + +	/* If running as PV guest, either iommu=soft, or swiotlb=force will +	 * activate this IOMMU. If running as PV privileged, activate it +	 * irregardlesss. +	 */ +	if ((xen_initial_domain() || swiotlb || swiotlb_force) && +	    (xen_pv_domain())) +		xen_swiotlb = 1; + +	/* If we are running under Xen, we MUST disable the native SWIOTLB. +	 * Don't worry about swiotlb_force flag activating the native, as +	 * the 'swiotlb' flag is the only one turning it on. */ +	if (xen_pv_domain()) +		swiotlb = 0; + +	return xen_swiotlb; +} + +void __init pci_xen_swiotlb_init(void) +{ +	if (xen_swiotlb) { +		xen_swiotlb_init(1); +		dma_ops = &xen_swiotlb_dma_ops; +	} +} diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c new file mode 100644 index 000000000000..554c002a1e1a --- /dev/null +++ b/arch/x86/xen/platform-pci-unplug.c @@ -0,0 +1,137 @@ +/****************************************************************************** + * platform-pci-unplug.c + * + * Xen platform PCI device driver + * Copyright (c) 2010, Citrix + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +#include <linux/init.h> +#include <linux/io.h> +#include <linux/module.h> + +#include <xen/platform_pci.h> + +#define XEN_PLATFORM_ERR_MAGIC -1 +#define XEN_PLATFORM_ERR_PROTOCOL -2 +#define XEN_PLATFORM_ERR_BLACKLIST -3 + +/* store the value of xen_emul_unplug after the unplug is done */ +int xen_platform_pci_unplug; +EXPORT_SYMBOL_GPL(xen_platform_pci_unplug); +#ifdef CONFIG_XEN_PVHVM +static int xen_emul_unplug; + +static int __init check_platform_magic(void) +{ +	short magic; +	char protocol; + +	magic = inw(XEN_IOPORT_MAGIC); +	if (magic != XEN_IOPORT_MAGIC_VAL) { +		printk(KERN_ERR "Xen Platform PCI: unrecognised magic value\n"); +		return XEN_PLATFORM_ERR_MAGIC; +	} + +	protocol = inb(XEN_IOPORT_PROTOVER); + +	printk(KERN_DEBUG "Xen Platform PCI: I/O protocol version %d\n", +			protocol); + +	switch (protocol) { +	case 1: +		outw(XEN_IOPORT_LINUX_PRODNUM, XEN_IOPORT_PRODNUM); +		outl(XEN_IOPORT_LINUX_DRVVER, XEN_IOPORT_DRVVER); +		if (inw(XEN_IOPORT_MAGIC) != XEN_IOPORT_MAGIC_VAL) { +			printk(KERN_ERR "Xen Platform: blacklisted by host\n"); +			return XEN_PLATFORM_ERR_BLACKLIST; +		} +		break; +	default: +		printk(KERN_WARNING "Xen Platform PCI: unknown I/O protocol version"); +		return XEN_PLATFORM_ERR_PROTOCOL; +	} + +	return 0; +} + +void __init xen_unplug_emulated_devices(void) +{ +	int r; + +	/* check the version of the xen platform PCI device */ +	r = check_platform_magic(); +	/* If the version matches enable the Xen platform PCI driver. +	 * Also enable the Xen platform PCI driver if the version is really old +	 * and the user told us to ignore it. */ +	if (r && !(r == XEN_PLATFORM_ERR_MAGIC && +			(xen_emul_unplug & XEN_UNPLUG_IGNORE))) +		return; +	/* Set the default value of xen_emul_unplug depending on whether or +	 * not the Xen PV frontends and the Xen platform PCI driver have +	 * been compiled for this kernel (modules or built-in are both OK). */ +	if (!xen_emul_unplug) { +		if (xen_must_unplug_nics()) { +			printk(KERN_INFO "Netfront and the Xen platform PCI driver have " +					"been compiled for this kernel: unplug emulated NICs.\n"); +			xen_emul_unplug |= XEN_UNPLUG_ALL_NICS; +		} +		if (xen_must_unplug_disks()) { +			printk(KERN_INFO "Blkfront and the Xen platform PCI driver have " +					"been compiled for this kernel: unplug emulated disks.\n" +					"You might have to change the root device\n" +					"from /dev/hd[a-d] to /dev/xvd[a-d]\n" +					"in your root= kernel command line option\n"); +			xen_emul_unplug |= XEN_UNPLUG_ALL_IDE_DISKS; +		} +	} +	/* Now unplug the emulated devices */ +	if (!(xen_emul_unplug & XEN_UNPLUG_IGNORE)) +		outw(xen_emul_unplug, XEN_IOPORT_UNPLUG); +	xen_platform_pci_unplug = xen_emul_unplug; +} + +static int __init parse_xen_emul_unplug(char *arg) +{ +	char *p, *q; +	int l; + +	for (p = arg; p; p = q) { +		q = strchr(p, ','); +		if (q) { +			l = q - p; +			q++; +		} else { +			l = strlen(p); +		} +		if (!strncmp(p, "all", l)) +			xen_emul_unplug |= XEN_UNPLUG_ALL; +		else if (!strncmp(p, "ide-disks", l)) +			xen_emul_unplug |= XEN_UNPLUG_ALL_IDE_DISKS; +		else if (!strncmp(p, "aux-ide-disks", l)) +			xen_emul_unplug |= XEN_UNPLUG_AUX_IDE_DISKS; +		else if (!strncmp(p, "nics", l)) +			xen_emul_unplug |= XEN_UNPLUG_ALL_NICS; +		else if (!strncmp(p, "ignore", l)) +			xen_emul_unplug |= XEN_UNPLUG_IGNORE; +		else +			printk(KERN_WARNING "unrecognised option '%s' " +				 "in parameter 'xen_emul_unplug'\n", p); +	} +	return 0; +} +early_param("xen_emul_unplug", parse_xen_emul_unplug); +#endif diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index ad0047f47cd4..328b00305426 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -20,6 +20,7 @@  #include <xen/page.h>  #include <xen/interface/callback.h>  #include <xen/interface/physdev.h> +#include <xen/interface/memory.h>  #include <xen/features.h>  #include "xen-ops.h" @@ -32,6 +33,73 @@ extern void xen_sysenter_target(void);  extern void xen_syscall_target(void);  extern void xen_syscall32_target(void); +static unsigned long __init xen_release_chunk(phys_addr_t start_addr, +					      phys_addr_t end_addr) +{ +	struct xen_memory_reservation reservation = { +		.address_bits = 0, +		.extent_order = 0, +		.domid        = DOMID_SELF +	}; +	unsigned long start, end; +	unsigned long len = 0; +	unsigned long pfn; +	int ret; + +	start = PFN_UP(start_addr); +	end = PFN_DOWN(end_addr); + +	if (end <= start) +		return 0; + +	printk(KERN_INFO "xen_release_chunk: looking at area pfn %lx-%lx: ", +	       start, end); +	for(pfn = start; pfn < end; pfn++) { +		unsigned long mfn = pfn_to_mfn(pfn); + +		/* Make sure pfn exists to start with */ +		if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn) +			continue; + +		set_xen_guest_handle(reservation.extent_start, &mfn); +		reservation.nr_extents = 1; + +		ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, +					   &reservation); +		WARN(ret != 1, "Failed to release memory %lx-%lx err=%d\n", +		     start, end, ret); +		if (ret == 1) { +			set_phys_to_machine(pfn, INVALID_P2M_ENTRY); +			len++; +		} +	} +	printk(KERN_CONT "%ld pages freed\n", len); + +	return len; +} + +static unsigned long __init xen_return_unused_memory(unsigned long max_pfn, +						     const struct e820map *e820) +{ +	phys_addr_t max_addr = PFN_PHYS(max_pfn); +	phys_addr_t last_end = 0; +	unsigned long released = 0; +	int i; + +	for (i = 0; i < e820->nr_map && last_end < max_addr; i++) { +		phys_addr_t end = e820->map[i].addr; +		end = min(max_addr, end); + +		released += xen_release_chunk(last_end, end); +		last_end = e820->map[i].addr + e820->map[i].size; +	} + +	if (last_end < max_addr) +		released += xen_release_chunk(last_end, max_addr); + +	printk(KERN_INFO "released %ld pages of unused memory\n", released); +	return released; +}  /**   * machine_specific_memory_setup - Hook for machine specific memory setup. @@ -67,6 +135,8 @@ char * __init xen_memory_setup(void)  	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); +	xen_return_unused_memory(xen_start_info->nr_pages, &e820); +  	return "Xen";  } @@ -156,6 +226,8 @@ void __init xen_arch_setup(void)  	struct physdev_set_iopl set_iopl;  	int rc; +	xen_panic_handler_init(); +  	HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);  	HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index a29693fd3138..25f232b18a82 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -394,6 +394,8 @@ static void stop_self(void *v)  	load_cr3(swapper_pg_dir);  	/* should set up a minimal gdt */ +	set_cpu_online(cpu, false); +  	HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL);  	BUG();  } diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index a9c661108034..1d789d56877c 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -26,6 +26,18 @@ void xen_pre_suspend(void)  		BUG();  } +void xen_hvm_post_suspend(int suspend_cancelled) +{ +	int cpu; +	xen_hvm_init_shared_info(); +	xen_callback_vector(); +	if (xen_feature(XENFEAT_hvm_safe_pvclock)) { +		for_each_online_cpu(cpu) { +			xen_setup_runstate_info(cpu); +		} +	} +} +  void xen_post_suspend(int suspend_cancelled)  {  	xen_build_mfn_list_list(); diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index b3c6c59ed302..1a5353a753fc 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -20,6 +20,7 @@  #include <asm/xen/hypercall.h>  #include <xen/events.h> +#include <xen/features.h>  #include <xen/interface/xen.h>  #include <xen/interface/vcpu.h> @@ -155,47 +156,8 @@ static void do_stolen_accounting(void)  	account_idle_ticks(ticks);  } -/* - * Xen sched_clock implementation.  Returns the number of unstolen - * nanoseconds, which is nanoseconds the VCPU spent in RUNNING+BLOCKED - * states. - */ -unsigned long long xen_sched_clock(void) -{ -	struct vcpu_runstate_info state; -	cycle_t now; -	u64 ret; -	s64 offset; - -	/* -	 * Ideally sched_clock should be called on a per-cpu basis -	 * anyway, so preempt should already be disabled, but that's -	 * not current practice at the moment. -	 */ -	preempt_disable(); - -	now = xen_clocksource_read(); - -	get_runstate_snapshot(&state); - -	WARN_ON(state.state != RUNSTATE_running); - -	offset = now - state.state_entry_time; -	if (offset < 0) -		offset = 0; - -	ret = state.time[RUNSTATE_blocked] + -		state.time[RUNSTATE_running] + -		offset; - -	preempt_enable(); - -	return ret; -} - -  /* Get the TSC speed from Xen */ -unsigned long xen_tsc_khz(void) +static unsigned long xen_tsc_khz(void)  {  	struct pvclock_vcpu_time_info *info =  		&HYPERVISOR_shared_info->vcpu_info[0].time; @@ -230,7 +192,7 @@ static void xen_read_wallclock(struct timespec *ts)  	put_cpu_var(xen_vcpu);  } -unsigned long xen_get_wallclock(void) +static unsigned long xen_get_wallclock(void)  {  	struct timespec ts; @@ -238,7 +200,7 @@ unsigned long xen_get_wallclock(void)  	return ts.tv_sec;  } -int xen_set_wallclock(unsigned long now) +static int xen_set_wallclock(unsigned long now)  {  	/* do nothing for domU */  	return -1; @@ -473,7 +435,11 @@ void xen_timer_resume(void)  	}  } -__init void xen_time_init(void) +static const struct pv_time_ops xen_time_ops __initdata = { +	.sched_clock = xen_clocksource_read, +}; + +static __init void xen_time_init(void)  {  	int cpu = smp_processor_id();  	struct timespec tp; @@ -497,3 +463,47 @@ __init void xen_time_init(void)  	xen_setup_timer(cpu);  	xen_setup_cpu_clockevents();  } + +__init void xen_init_time_ops(void) +{ +	pv_time_ops = xen_time_ops; + +	x86_init.timers.timer_init = xen_time_init; +	x86_init.timers.setup_percpu_clockev = x86_init_noop; +	x86_cpuinit.setup_percpu_clockev = x86_init_noop; + +	x86_platform.calibrate_tsc = xen_tsc_khz; +	x86_platform.get_wallclock = xen_get_wallclock; +	x86_platform.set_wallclock = xen_set_wallclock; +} + +#ifdef CONFIG_XEN_PVHVM +static void xen_hvm_setup_cpu_clockevents(void) +{ +	int cpu = smp_processor_id(); +	xen_setup_runstate_info(cpu); +	xen_setup_timer(cpu); +	xen_setup_cpu_clockevents(); +} + +__init void xen_hvm_init_time_ops(void) +{ +	/* vector callback is needed otherwise we cannot receive interrupts +	 * on cpu > 0 */ +	if (!xen_have_vector_callback && num_present_cpus() > 1) +		return; +	if (!xen_feature(XENFEAT_hvm_safe_pvclock)) { +		printk(KERN_INFO "Xen doesn't support pvclock on HVM," +				"disable pv timer\n"); +		return; +	} + +	pv_time_ops = xen_time_ops; +	x86_init.timers.setup_percpu_clockev = xen_time_init; +	x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents; + +	x86_platform.calibrate_tsc = xen_tsc_khz; +	x86_platform.get_wallclock = xen_get_wallclock; +	x86_platform.set_wallclock = xen_set_wallclock; +} +#endif diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index f9153a300bce..7c8ab86163e9 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -38,6 +38,10 @@ void xen_enable_sysenter(void);  void xen_enable_syscall(void);  void xen_vcpu_restore(void); +void xen_callback_vector(void); +void xen_hvm_init_shared_info(void); +void __init xen_unplug_emulated_devices(void); +  void __init xen_build_dynamic_phys_to_machine(void);  void xen_init_irq_ops(void); @@ -46,11 +50,8 @@ void xen_setup_runstate_info(int cpu);  void xen_teardown_timer(int cpu);  cycle_t xen_clocksource_read(void);  void xen_setup_cpu_clockevents(void); -unsigned long xen_tsc_khz(void); -void __init xen_time_init(void); -unsigned long xen_get_wallclock(void); -int xen_set_wallclock(unsigned long time); -unsigned long long xen_sched_clock(void); +void __init xen_init_time_ops(void); +void __init xen_hvm_init_time_ops(void);  irqreturn_t xen_debug_interrupt(int irq, void *dev_id); @@ -101,4 +102,6 @@ void xen_sysret32(void);  void xen_sysret64(void);  void xen_adjust_exception_frame(void); +extern int xen_panic_handler_init(void); +  #endif /* XEN_OPS_H */  |