diff options
Diffstat (limited to 'arch/x86/include')
40 files changed, 495 insertions, 356 deletions
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index a30316bf801a..c80f6b6f3da2 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -23,6 +23,11 @@ #define APIC_VERBOSE 1 #define APIC_DEBUG 2 +/* Macros for apic_extnmi which controls external NMI masking */ +#define APIC_EXTNMI_BSP 0 /* Default */ +#define APIC_EXTNMI_ALL 1 +#define APIC_EXTNMI_NONE 2 + /* * Define the default level of output to be very little * This can be turned up by using apic=verbose for more @@ -303,6 +308,7 @@ struct apic { unsigned int *apicid); /* ipi */ + void (*send_IPI)(int cpu, int vector); void (*send_IPI_mask)(const struct cpumask *mask, int vector); void (*send_IPI_mask_allbutself)(const struct cpumask *mask, int vector); diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index ae5fb83e6d91..3e8674288198 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -3,7 +3,6 @@ #include <linux/compiler.h> #include <linux/types.h> -#include <asm/processor.h> #include <asm/alternative.h> #include <asm/cmpxchg.h> #include <asm/rmwcc.h> diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h index a11c30b77fb5..a984111135b1 100644 --- a/arch/x86/include/asm/atomic64_32.h +++ b/arch/x86/include/asm/atomic64_32.h @@ -3,7 +3,6 @@ #include <linux/compiler.h> #include <linux/types.h> -#include <asm/processor.h> //#include <asm/cmpxchg.h> /* An 64bit atomic type */ diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index 4fa687a47a62..6b8d6e8cd449 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h @@ -27,7 +27,7 @@ #define BOOT_HEAP_SIZE 0x400000 #else /* !CONFIG_KERNEL_BZIP2 */ -#define BOOT_HEAP_SIZE 0x8000 +#define BOOT_HEAP_SIZE 0x10000 #endif /* !CONFIG_KERNEL_BZIP2 */ diff --git a/arch/x86/include/asm/calgary.h b/arch/x86/include/asm/calgary.h index 0d467b338835..a8303ebe089f 100644 --- a/arch/x86/include/asm/calgary.h +++ b/arch/x86/include/asm/calgary.h @@ -31,7 +31,7 @@ #include <asm/types.h> struct iommu_table { - struct cal_chipset_ops *chip_ops; /* chipset specific funcs */ + const struct cal_chipset_ops *chip_ops; /* chipset specific funcs */ unsigned long it_base; /* mapped address of tce table */ unsigned long it_hint; /* Hint for next alloc */ unsigned long *it_map; /* A simple allocation bitmap for now */ diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index f7e142926481..e4959d023af8 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -109,6 +109,6 @@ static inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new) #endif -#define system_has_cmpxchg_double() cpu_has_cx8 +#define system_has_cmpxchg_double() boot_cpu_has(X86_FEATURE_CX8) #endif /* _ASM_X86_CMPXCHG_32_H */ diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h index 1af94697aae5..caa23a34c963 100644 --- a/arch/x86/include/asm/cmpxchg_64.h +++ b/arch/x86/include/asm/cmpxchg_64.h @@ -18,6 +18,6 @@ static inline void set_64bit(volatile u64 *ptr, u64 val) cmpxchg_local((ptr), (o), (n)); \ }) -#define system_has_cmpxchg_double() cpu_has_cx16 +#define system_has_cmpxchg_double() boot_cpu_has(X86_FEATURE_CX16) #endif /* _ASM_X86_CMPXCHG_64_H */ diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index bf2caa1dedc5..678637ad7476 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -36,4 +36,7 @@ extern int _debug_hotplug_cpu(int cpu, int action); int mwait_usable(const struct cpuinfo_x86 *); +unsigned int x86_family(unsigned int sig); +unsigned int x86_model(unsigned int sig); +unsigned int x86_stepping(unsigned int sig); #endif /* _ASM_X86_CPU_H */ diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index e4f8010f22e0..7ad8c9464297 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -12,7 +12,7 @@ #include <asm/disabled-features.h> #endif -#define NCAPINTS 14 /* N 32-bit words worth of info */ +#define NCAPINTS 16 /* N 32-bit words worth of info */ #define NBUGINTS 1 /* N 32-bit bug flags */ /* @@ -181,22 +181,17 @@ /* * Auxiliary flags: Linux defined - For features scattered in various - * CPUID levels like 0x6, 0xA etc, word 7 + * CPUID levels like 0x6, 0xA etc, word 7. + * + * Reuse free bits when adding new feature flags! */ -#define X86_FEATURE_IDA ( 7*32+ 0) /* Intel Dynamic Acceleration */ -#define X86_FEATURE_ARAT ( 7*32+ 1) /* Always Running APIC Timer */ + #define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ #define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ -#define X86_FEATURE_PLN ( 7*32+ 5) /* Intel Power Limit Notification */ -#define X86_FEATURE_PTS ( 7*32+ 6) /* Intel Package Thermal Status */ -#define X86_FEATURE_DTHERM ( 7*32+ 7) /* Digital Thermal Sensor */ + #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ -#define X86_FEATURE_HWP ( 7*32+ 10) /* "hwp" Intel HWP */ -#define X86_FEATURE_HWP_NOTIFY ( 7*32+ 11) /* Intel HWP_NOTIFY */ -#define X86_FEATURE_HWP_ACT_WINDOW ( 7*32+ 12) /* Intel HWP_ACT_WINDOW */ -#define X86_FEATURE_HWP_EPP ( 7*32+13) /* Intel HWP_EPP */ -#define X86_FEATURE_HWP_PKG_REQ ( 7*32+14) /* Intel HWP_PKG_REQ */ + #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ /* Virtualization flags: Linux defined, word 8 */ @@ -205,17 +200,9 @@ #define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */ #define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */ #define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */ -#define X86_FEATURE_NPT ( 8*32+ 5) /* AMD Nested Page Table support */ -#define X86_FEATURE_LBRV ( 8*32+ 6) /* AMD LBR Virtualization support */ -#define X86_FEATURE_SVML ( 8*32+ 7) /* "svm_lock" AMD SVM locking MSR */ -#define X86_FEATURE_NRIPS ( 8*32+ 8) /* "nrip_save" AMD SVM next_rip save */ -#define X86_FEATURE_TSCRATEMSR ( 8*32+ 9) /* "tsc_scale" AMD TSC scaling support */ -#define X86_FEATURE_VMCBCLEAN ( 8*32+10) /* "vmcb_clean" AMD VMCB clean bits support */ -#define X86_FEATURE_FLUSHBYASID ( 8*32+11) /* AMD flush-by-ASID support */ -#define X86_FEATURE_DECODEASSISTS ( 8*32+12) /* AMD Decode Assists support */ -#define X86_FEATURE_PAUSEFILTER ( 8*32+13) /* AMD filtered pause intercept */ -#define X86_FEATURE_PFTHRESHOLD ( 8*32+14) /* AMD pause filter threshold */ + #define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */ +#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ @@ -258,6 +245,30 @@ /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */ #define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ +/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */ +#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ +#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */ +#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */ +#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */ +#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */ +#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */ +#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */ +#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */ +#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */ +#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */ + +/* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */ +#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */ +#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */ +#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */ +#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */ +#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */ +#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */ +#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */ +#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */ +#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */ +#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ + /* * BUG word(s) */ @@ -278,6 +289,26 @@ #include <asm/asm.h> #include <linux/bitops.h> +enum cpuid_leafs +{ + CPUID_1_EDX = 0, + CPUID_8000_0001_EDX, + CPUID_8086_0001_EDX, + CPUID_LNX_1, + CPUID_1_ECX, + CPUID_C000_0001_EDX, + CPUID_8000_0001_ECX, + CPUID_LNX_2, + CPUID_LNX_3, + CPUID_7_0_EBX, + CPUID_D_1_EAX, + CPUID_F_0_EDX, + CPUID_F_1_EDX, + CPUID_8000_0008_EBX, + CPUID_6_EAX, + CPUID_8000_000A_EDX, +}; + #ifdef CONFIG_X86_FEATURE_NAMES extern const char * const x86_cap_flags[NCAPINTS*32]; extern const char * const x86_power_flags[32]; @@ -355,60 +386,31 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; } while (0) #define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU) -#define cpu_has_de boot_cpu_has(X86_FEATURE_DE) #define cpu_has_pse boot_cpu_has(X86_FEATURE_PSE) #define cpu_has_tsc boot_cpu_has(X86_FEATURE_TSC) #define cpu_has_pge boot_cpu_has(X86_FEATURE_PGE) #define cpu_has_apic boot_cpu_has(X86_FEATURE_APIC) -#define cpu_has_sep boot_cpu_has(X86_FEATURE_SEP) -#define cpu_has_mtrr boot_cpu_has(X86_FEATURE_MTRR) -#define cpu_has_mmx boot_cpu_has(X86_FEATURE_MMX) #define cpu_has_fxsr boot_cpu_has(X86_FEATURE_FXSR) #define cpu_has_xmm boot_cpu_has(X86_FEATURE_XMM) #define cpu_has_xmm2 boot_cpu_has(X86_FEATURE_XMM2) -#define cpu_has_xmm3 boot_cpu_has(X86_FEATURE_XMM3) -#define cpu_has_ssse3 boot_cpu_has(X86_FEATURE_SSSE3) #define cpu_has_aes boot_cpu_has(X86_FEATURE_AES) #define cpu_has_avx boot_cpu_has(X86_FEATURE_AVX) #define cpu_has_avx2 boot_cpu_has(X86_FEATURE_AVX2) -#define cpu_has_ht boot_cpu_has(X86_FEATURE_HT) -#define cpu_has_nx boot_cpu_has(X86_FEATURE_NX) -#define cpu_has_xstore boot_cpu_has(X86_FEATURE_XSTORE) -#define cpu_has_xstore_enabled boot_cpu_has(X86_FEATURE_XSTORE_EN) -#define cpu_has_xcrypt boot_cpu_has(X86_FEATURE_XCRYPT) -#define cpu_has_xcrypt_enabled boot_cpu_has(X86_FEATURE_XCRYPT_EN) -#define cpu_has_ace2 boot_cpu_has(X86_FEATURE_ACE2) -#define cpu_has_ace2_enabled boot_cpu_has(X86_FEATURE_ACE2_EN) -#define cpu_has_phe boot_cpu_has(X86_FEATURE_PHE) -#define cpu_has_phe_enabled boot_cpu_has(X86_FEATURE_PHE_EN) -#define cpu_has_pmm boot_cpu_has(X86_FEATURE_PMM) -#define cpu_has_pmm_enabled boot_cpu_has(X86_FEATURE_PMM_EN) -#define cpu_has_ds boot_cpu_has(X86_FEATURE_DS) -#define cpu_has_pebs boot_cpu_has(X86_FEATURE_PEBS) #define cpu_has_clflush boot_cpu_has(X86_FEATURE_CLFLUSH) -#define cpu_has_bts boot_cpu_has(X86_FEATURE_BTS) #define cpu_has_gbpages boot_cpu_has(X86_FEATURE_GBPAGES) #define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) #define cpu_has_pat boot_cpu_has(X86_FEATURE_PAT) -#define cpu_has_xmm4_1 boot_cpu_has(X86_FEATURE_XMM4_1) -#define cpu_has_xmm4_2 boot_cpu_has(X86_FEATURE_XMM4_2) #define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) -#define cpu_has_xsaveopt boot_cpu_has(X86_FEATURE_XSAVEOPT) #define cpu_has_xsaves boot_cpu_has(X86_FEATURE_XSAVES) #define cpu_has_osxsave boot_cpu_has(X86_FEATURE_OSXSAVE) #define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) -#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ) -#define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE) -#define cpu_has_perfctr_nb boot_cpu_has(X86_FEATURE_PERFCTR_NB) -#define cpu_has_perfctr_l2 boot_cpu_has(X86_FEATURE_PERFCTR_L2) -#define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8) -#define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16) -#define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU) -#define cpu_has_topoext boot_cpu_has(X86_FEATURE_TOPOEXT) -#define cpu_has_bpext boot_cpu_has(X86_FEATURE_BPEXT) - -#if __GNUC__ >= 4 +/* + * Do not add any more of those clumsy macros - use static_cpu_has_safe() for + * fast paths and boot_cpu_has() otherwise! + */ + +#if __GNUC__ >= 4 && defined(CONFIG_X86_FAST_FEATURE_TESTS) extern void warn_pre_alternatives(void); extern bool __static_cpu_has_safe(u16 bit); diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index f80d70009ff8..6d7d0e52ed5a 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -19,7 +19,6 @@ #include <asm/acpi.h> #include <asm/apicdef.h> #include <asm/page.h> -#include <asm/pvclock.h> #ifdef CONFIG_X86_32 #include <linux/threads.h> #include <asm/kmap_types.h> @@ -72,10 +71,6 @@ enum fixed_addresses { #ifdef CONFIG_X86_VSYSCALL_EMULATION VSYSCALL_PAGE = (FIXADDR_TOP - VSYSCALL_ADDR) >> PAGE_SHIFT, #endif -#ifdef CONFIG_PARAVIRT_CLOCK - PVCLOCK_FIXMAP_BEGIN, - PVCLOCK_FIXMAP_END = PVCLOCK_FIXMAP_BEGIN+PVCLOCK_VSYSCALL_NR_PAGES-1, -#endif #endif FIX_DBGP_BASE, FIX_EARLYCON_MEM_BASE, diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 3c3550c3a4a3..0fd440df63f1 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -42,6 +42,7 @@ extern void fpu__init_cpu_xstate(void); extern void fpu__init_system(struct cpuinfo_x86 *c); extern void fpu__init_check_bugs(void); extern void fpu__resume_cpu(void); +extern u64 fpu__get_supported_xfeatures_mask(void); /* * Debugging facility: @@ -224,18 +225,67 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu) #define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f" #define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f" -/* xstate instruction fault handler: */ -#define xstate_fault(__err) \ - \ - ".section .fixup,\"ax\"\n" \ - \ - "3: movl $-2,%[_err]\n" \ - " jmp 2b\n" \ - \ - ".previous\n" \ - \ - _ASM_EXTABLE(1b, 3b) \ - : [_err] "=r" (__err) +#define XSTATE_OP(op, st, lmask, hmask, err) \ + asm volatile("1:" op "\n\t" \ + "xor %[err], %[err]\n" \ + "2:\n\t" \ + ".pushsection .fixup,\"ax\"\n\t" \ + "3: movl $-2,%[err]\n\t" \ + "jmp 2b\n\t" \ + ".popsection\n\t" \ + _ASM_EXTABLE(1b, 3b) \ + : [err] "=r" (err) \ + : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ + : "memory") + +/* + * If XSAVES is enabled, it replaces XSAVEOPT because it supports a compact + * format and supervisor states in addition to modified optimization in + * XSAVEOPT. + * + * Otherwise, if XSAVEOPT is enabled, XSAVEOPT replaces XSAVE because XSAVEOPT + * supports modified optimization which is not supported by XSAVE. + * + * We use XSAVE as a fallback. + * + * The 661 label is defined in the ALTERNATIVE* macros as the address of the + * original instruction which gets replaced. We need to use it here as the + * address of the instruction where we might get an exception at. + */ +#define XSTATE_XSAVE(st, lmask, hmask, err) \ + asm volatile(ALTERNATIVE_2(XSAVE, \ + XSAVEOPT, X86_FEATURE_XSAVEOPT, \ + XSAVES, X86_FEATURE_XSAVES) \ + "\n" \ + "xor %[err], %[err]\n" \ + "3:\n" \ + ".pushsection .fixup,\"ax\"\n" \ + "4: movl $-2, %[err]\n" \ + "jmp 3b\n" \ + ".popsection\n" \ + _ASM_EXTABLE(661b, 4b) \ + : [err] "=r" (err) \ + : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ + : "memory") + +/* + * Use XRSTORS to restore context if it is enabled. XRSTORS supports compact + * XSAVE area format. + */ +#define XSTATE_XRESTORE(st, lmask, hmask, err) \ + asm volatile(ALTERNATIVE(XRSTOR, \ + XRSTORS, X86_FEATURE_XSAVES) \ + "\n" \ + "xor %[err], %[err]\n" \ + "3:\n" \ + ".pushsection .fixup,\"ax\"\n" \ + "4: movl $-2, %[err]\n" \ + "jmp 3b\n" \ + ".popsection\n" \ + _ASM_EXTABLE(661b, 4b) \ + : [err] "=r" (err) \ + : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ + : "memory") /* * This function is called only during boot time when x86 caps are not set @@ -246,22 +296,14 @@ static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate) u64 mask = -1; u32 lmask = mask; u32 hmask = mask >> 32; - int err = 0; + int err; WARN_ON(system_state != SYSTEM_BOOTING); - if (boot_cpu_has(X86_FEATURE_XSAVES)) - asm volatile("1:"XSAVES"\n\t" - "2:\n\t" - xstate_fault(err) - : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err) - : "memory"); + if (static_cpu_has_safe(X86_FEATURE_XSAVES)) + XSTATE_OP(XSAVES, xstate, lmask, hmask, err); else - asm volatile("1:"XSAVE"\n\t" - "2:\n\t" - xstate_fault(err) - : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err) - : "memory"); + XSTATE_OP(XSAVE, xstate, lmask, hmask, err); /* We should never fault when copying to a kernel buffer: */ WARN_ON_FPU(err); @@ -276,22 +318,14 @@ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate) u64 mask = -1; u32 lmask = mask; u32 hmask = mask >> 32; - int err = 0; + int err; WARN_ON(system_state != SYSTEM_BOOTING); - if (boot_cpu_has(X86_FEATURE_XSAVES)) - asm volatile("1:"XRSTORS"\n\t" - "2:\n\t" - xstate_fault(err) - : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err) - : "memory"); + if (static_cpu_has_safe(X86_FEATURE_XSAVES)) + XSTATE_OP(XRSTORS, xstate, lmask, hmask, err); else - asm volatile("1:"XRSTOR"\n\t" - "2:\n\t" - xstate_fault(err) - : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err) - : "memory"); + XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); /* We should never fault when copying from a kernel buffer: */ WARN_ON_FPU(err); @@ -305,33 +339,11 @@ static inline void copy_xregs_to_kernel(struct xregs_state *xstate) u64 mask = -1; u32 lmask = mask; u32 hmask = mask >> 32; - int err = 0; + int err; WARN_ON(!alternatives_patched); - /* - * If xsaves is enabled, xsaves replaces xsaveopt because - * it supports compact format and supervisor states in addition to - * modified optimization in xsaveopt. - * - * Otherwise, if xsaveopt is enabled, xsaveopt replaces xsave - * because xsaveopt supports modified optimization which is not - * supported by xsave. - * - * If none of xsaves and xsaveopt is enabled, use xsave. - */ - alternative_input_2( - "1:"XSAVE, - XSAVEOPT, - X86_FEATURE_XSAVEOPT, - XSAVES, - X86_FEATURE_XSAVES, - [xstate] "D" (xstate), "a" (lmask), "d" (hmask) : - "memory"); - asm volatile("2:\n\t" - xstate_fault(err) - : "0" (err) - : "memory"); + XSTATE_XSAVE(xstate, lmask, hmask, err); /* We should never fault when copying to a kernel buffer: */ WARN_ON_FPU(err); @@ -344,23 +356,9 @@ static inline void copy_kernel_to_xregs(struct xregs_state *xstate, u64 mask) { u32 lmask = mask; u32 hmask = mask >> 32; - int err = 0; + int err; - /* - * Use xrstors to restore context if it is enabled. xrstors supports - * compacted format of xsave area which is not supported by xrstor. - */ - alternative_input( - "1: " XRSTOR, - XRSTORS, - X86_FEATURE_XSAVES, - "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask) - : "memory"); - - asm volatile("2:\n" - xstate_fault(err) - : "0" (err) - : "memory"); + XSTATE_XRESTORE(xstate, lmask, hmask, err); /* We should never fault when copying from a kernel buffer: */ WARN_ON_FPU(err); @@ -388,12 +386,10 @@ static inline int copy_xregs_to_user(struct xregs_state __user *buf) if (unlikely(err)) return -EFAULT; - __asm__ __volatile__(ASM_STAC "\n" - "1:"XSAVE"\n" - "2: " ASM_CLAC "\n" - xstate_fault(err) - : "D" (buf), "a" (-1), "d" (-1), "0" (err) - : "memory"); + stac(); + XSTATE_OP(XSAVE, buf, -1, -1, err); + clac(); + return err; } @@ -405,14 +401,12 @@ static inline int copy_user_to_xregs(struct xregs_state __user *buf, u64 mask) struct xregs_state *xstate = ((__force struct xregs_state *)buf); u32 lmask = mask; u32 hmask = mask >> 32; - int err = 0; - - __asm__ __volatile__(ASM_STAC "\n" - "1:"XRSTOR"\n" - "2: " ASM_CLAC "\n" - xstate_fault(err) - : "D" (xstate), "a" (lmask), "d" (hmask), "0" (err) - : "memory"); /* memory required? */ + int err; + + stac(); + XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); + clac(); + return err; } diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 3a6c89b70307..af30fdeb140d 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -20,15 +20,16 @@ /* Supported features which support lazy state saving */ #define XFEATURE_MASK_LAZY (XFEATURE_MASK_FP | \ - XFEATURE_MASK_SSE | \ + XFEATURE_MASK_SSE) + +/* Supported features which require eager state saving */ +#define XFEATURE_MASK_EAGER (XFEATURE_MASK_BNDREGS | \ + XFEATURE_MASK_BNDCSR | \ XFEATURE_MASK_YMM | \ - XFEATURE_MASK_OPMASK | \ + XFEATURE_MASK_OPMASK | \ XFEATURE_MASK_ZMM_Hi256 | \ XFEATURE_MASK_Hi16_ZMM) -/* Supported features which require eager state saving */ -#define XFEATURE_MASK_EAGER (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR) - /* All currently supported features */ #define XCNTXT_MASK (XFEATURE_MASK_LAZY | XFEATURE_MASK_EAGER) diff --git a/arch/x86/include/asm/intel_pt.h b/arch/x86/include/asm/intel_pt.h new file mode 100644 index 000000000000..e1a411786bf5 --- /dev/null +++ b/arch/x86/include/asm/intel_pt.h @@ -0,0 +1,10 @@ +#ifndef _ASM_X86_INTEL_PT_H +#define _ASM_X86_INTEL_PT_H + +#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL) +void cpu_emergency_stop_pt(void); +#else +static inline void cpu_emergency_stop_pt(void) {} +#endif + +#endif /* _ASM_X86_INTEL_PT_H */ diff --git a/arch/x86/include/asm/iosf_mbi.h b/arch/x86/include/asm/iosf_mbi.h index b72ad0faa6c5..b41ee164930a 100644 --- a/arch/x86/include/asm/iosf_mbi.h +++ b/arch/x86/include/asm/iosf_mbi.h @@ -1,5 +1,5 @@ /* - * iosf_mbi.h: Intel OnChip System Fabric MailBox access support + * Intel OnChip System Fabric MailBox access support */ #ifndef IOSF_MBI_SYMS_H @@ -16,6 +16,18 @@ #define MBI_MASK_LO 0x000000FF #define MBI_ENABLE 0xF0 +/* IOSF SB read/write opcodes */ +#define MBI_MMIO_READ 0x00 +#define MBI_MMIO_WRITE 0x01 +#define MBI_CFG_READ 0x04 +#define MBI_CFG_WRITE 0x05 +#define MBI_CR_READ 0x06 +#define MBI_CR_WRITE 0x07 +#define MBI_REG_READ 0x10 +#define MBI_REG_WRITE 0x11 +#define MBI_ESRAM_READ 0x12 +#define MBI_ESRAM_WRITE 0x13 + /* Baytrail available units */ #define BT_MBI_UNIT_AUNIT 0x00 #define BT_MBI_UNIT_SMC 0x01 @@ -28,50 +40,13 @@ #define BT_MBI_UNIT_SATA 0xA3 #define BT_MBI_UNIT_PCIE 0xA6 -/* Baytrail read/write opcodes */ -#define BT_MBI_AUNIT_READ 0x10 -#define BT_MBI_AUNIT_WRITE 0x11 -#define BT_MBI_SMC_READ 0x10 -#define BT_MBI_SMC_WRITE 0x11 -#define BT_MBI_CPU_READ 0x10 -#define BT_MBI_CPU_WRITE 0x11 -#define BT_MBI_BUNIT_READ 0x10 -#define BT_MBI_BUNIT_WRITE 0x11 -#define BT_MBI_PMC_READ 0x06 -#define BT_MBI_PMC_WRITE 0x07 -#define BT_MBI_GFX_READ 0x00 -#define BT_MBI_GFX_WRITE 0x01 -#define BT_MBI_SMIO_READ 0x06 -#define BT_MBI_SMIO_WRITE 0x07 -#define BT_MBI_USB_READ 0x06 -#define BT_MBI_USB_WRITE 0x07 -#define BT_MBI_SATA_READ 0x00 -#define BT_MBI_SATA_WRITE 0x01 -#define BT_MBI_PCIE_READ 0x00 -#define BT_MBI_PCIE_WRITE 0x01 - /* Quark available units */ #define QRK_MBI_UNIT_HBA 0x00 #define QRK_MBI_UNIT_HB 0x03 #define QRK_MBI_UNIT_RMU 0x04 #define QRK_MBI_UNIT_MM 0x05 -#define QRK_MBI_UNIT_MMESRAM 0x05 #define QRK_MBI_UNIT_SOC 0x31 -/* Quark read/write opcodes */ -#define QRK_MBI_HBA_READ 0x10 -#define QRK_MBI_HBA_WRITE 0x11 -#define QRK_MBI_HB_READ 0x10 -#define QRK_MBI_HB_WRITE 0x11 -#define QRK_MBI_RMU_READ 0x10 -#define QRK_MBI_RMU_WRITE 0x11 -#define QRK_MBI_MM_READ 0x10 -#define QRK_MBI_MM_WRITE 0x11 -#define QRK_MBI_MMESRAM_READ 0x12 -#define QRK_MBI_MMESRAM_WRITE 0x13 -#define QRK_MBI_SOC_READ 0x06 -#define QRK_MBI_SOC_WRITE 0x07 - #if IS_ENABLED(CONFIG_IOSF_MBI) bool iosf_mbi_available(void); diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h index 615fa9061b57..cfc9a0d2d07c 100644 --- a/arch/x86/include/asm/ipi.h +++ b/arch/x86/include/asm/ipi.h @@ -119,6 +119,8 @@ static inline void native_apic_mem_write(APIC_ICR, cfg); } +extern void default_send_IPI_single(int cpu, int vector); +extern void default_send_IPI_single_phys(int cpu, int vector); extern void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector); extern void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index 5daeca3d0f9e..adc54c12cbd1 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -1,12 +1,18 @@ #ifndef _ASM_X86_JUMP_LABEL_H #define _ASM_X86_JUMP_LABEL_H -#ifndef __ASSEMBLY__ - -#include <linux/stringify.h> -#include <linux/types.h> -#include <asm/nops.h> -#include <asm/asm.h> +#ifndef HAVE_JUMP_LABEL +/* + * For better or for worse, if jump labels (the gcc extension) are missing, + * then the entire static branch patching infrastructure is compiled out. + * If that happens, the code in here will malfunction. Raise a compiler + * error instead. + * + * In theory, jump labels and the static branch patching infrastructure + * could be decoupled to fix this. + */ +#error asm/jump_label.h included on a non-jump-label kernel +#endif #define JUMP_LABEL_NOP_SIZE 5 @@ -16,6 +22,14 @@ # define STATIC_KEY_INIT_NOP GENERIC_NOP5_ATOMIC #endif +#include <asm/asm.h> +#include <asm/nops.h> + +#ifndef __ASSEMBLY__ + +#include <linux/stringify.h> +#include <linux/types.h> + static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { asm_volatile_goto("1:" @@ -59,5 +73,40 @@ struct jump_entry { jump_label_t key; }; -#endif /* __ASSEMBLY__ */ +#else /* __ASSEMBLY__ */ + +.macro STATIC_JUMP_IF_TRUE target, key, def +.Lstatic_jump_\@: + .if \def + /* Equivalent to "jmp.d32 \target" */ + .byte 0xe9 + .long \target - .Lstatic_jump_after_\@ +.Lstatic_jump_after_\@: + .else + .byte STATIC_KEY_INIT_NOP + .endif + .pushsection __jump_table, "aw" + _ASM_ALIGN + _ASM_PTR .Lstatic_jump_\@, \target, \key + .popsection +.endm + +.macro STATIC_JUMP_IF_FALSE target, key, def +.Lstatic_jump_\@: + .if \def + .byte STATIC_KEY_INIT_NOP + .else + /* Equivalent to "jmp.d32 \target" */ + .byte 0xe9 + .long \target - .Lstatic_jump_after_\@ +.Lstatic_jump_after_\@: + .endif + .pushsection __jump_table, "aw" + _ASM_ALIGN + _ASM_PTR .Lstatic_jump_\@, \target, \key + 1 + .popsection +.endm + +#endif /* __ASSEMBLY__ */ + #endif diff --git a/arch/x86/include/asm/lguest.h b/arch/x86/include/asm/lguest.h index 3bbc07a57a31..73d0c9b92087 100644 --- a/arch/x86/include/asm/lguest.h +++ b/arch/x86/include/asm/lguest.h @@ -12,7 +12,9 @@ #define GUEST_PL 1 /* Page for Switcher text itself, then two pages per cpu */ -#define TOTAL_SWITCHER_PAGES (1 + 2 * nr_cpu_ids) +#define SWITCHER_TEXT_PAGES (1) +#define SWITCHER_STACK_PAGES (2 * nr_cpu_ids) +#define TOTAL_SWITCHER_PAGES (SWITCHER_TEXT_PAGES + SWITCHER_STACK_PAGES) /* Where we map the Switcher, in both Host and Guest. */ extern unsigned long switcher_addr; diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 34e62b1dcfce..1e1b07a5a738 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -1,6 +1,7 @@ #ifndef _ASM_X86_MICROCODE_H #define _ASM_X86_MICROCODE_H +#include <asm/cpu.h> #include <linux/earlycpio.h> #define native_rdmsr(msr, val1, val2) \ @@ -95,14 +96,14 @@ static inline void __exit exit_amd_microcode(void) {} /* * In early loading microcode phase on BSP, boot_cpu_data is not set up yet. - * x86_vendor() gets vendor id for BSP. + * x86_cpuid_vendor() gets vendor id for BSP. * * In 32 bit AP case, accessing boot_cpu_data needs linear address. To simplify - * coding, we still use x86_vendor() to get vendor id for AP. + * coding, we still use x86_cpuid_vendor() to get vendor id for AP. * - * x86_vendor() gets vendor information directly from CPUID. + * x86_cpuid_vendor() gets vendor information directly from CPUID. */ -static inline int x86_vendor(void) +static inline int x86_cpuid_vendor(void) { u32 eax = 0x00000000; u32 ebx, ecx = 0, edx; @@ -118,40 +119,14 @@ static inline int x86_vendor(void) return X86_VENDOR_UNKNOWN; } -static inline unsigned int __x86_family(unsigned int sig) -{ - unsigned int x86; - - x86 = (sig >> 8) & 0xf; - - if (x86 == 0xf) - x86 += (sig >> 20) & 0xff; - - return x86; -} - -static inline unsigned int x86_family(void) +static inline unsigned int x86_cpuid_family(void) { u32 eax = 0x00000001; u32 ebx, ecx = 0, edx; native_cpuid(&eax, &ebx, &ecx, &edx); - return __x86_family(eax); -} - -static inline unsigned int x86_model(unsigned int sig) -{ - unsigned int x86, model; - - x86 = __x86_family(sig); - - model = (sig >> 4) & 0xf; - - if (x86 == 0x6 || x86 == 0xf) - model += ((sig >> 16) & 0xf) << 4; - - return model; + return x86_family(eax); } #ifdef CONFIG_MICROCODE diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 379cd3658799..bfd9b2a35a0b 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -116,8 +116,36 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, #endif cpumask_set_cpu(cpu, mm_cpumask(next)); - /* Re-load page tables */ + /* + * Re-load page tables. + * + * This logic has an ordering constraint: + * + * CPU 0: Write to a PTE for 'next' + * CPU 0: load bit 1 in mm_cpumask. if nonzero, send IPI. + * CPU 1: set bit 1 in next's mm_cpumask + * CPU 1: load from the PTE that CPU 0 writes (implicit) + * + * We need to prevent an outcome in which CPU 1 observes + * the new PTE value and CPU 0 observes bit 1 clear in + * mm_cpumask. (If that occurs, then the IPI will never + * be sent, and CPU 0's TLB will contain a stale entry.) + * + * The bad outcome can occur if either CPU's load is + * reordered before that CPU's store, so both CPUs must + * execute full barriers to prevent this from happening. + * + * Thus, switch_mm needs a full barrier between the + * store to mm_cpumask and any operation that could load + * from next->pgd. TLB fills are special and can happen + * due to instruction fetches or for no reason at all, + * and neither LOCK nor MFENCE orders them. + * Fortunately, load_cr3() is serializing and gives the + * ordering guarantee we need. + * + */ load_cr3(next->pgd); + trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); /* Stop flush ipis for the previous mm */ @@ -156,10 +184,14 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, * schedule, protecting us from simultaneous changes. */ cpumask_set_cpu(cpu, mm_cpumask(next)); + /* * We were in lazy tlb mode and leave_mm disabled * tlb flush IPI delivery. We must reload CR3 * to make sure to use no freed page tables. + * + * As above, load_cr3() is serializing and orders TLB + * fills with respect to the mm_cpumask write. */ load_cr3(next->pgd); trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); diff --git a/arch/x86/include/asm/msi.h b/arch/x86/include/asm/msi.h index 93724cc62177..eb4b09b41df5 100644 --- a/arch/x86/include/asm/msi.h +++ b/arch/x86/include/asm/msi.h @@ -1,7 +1,13 @@ #ifndef _ASM_X86_MSI_H #define _ASM_X86_MSI_H #include <asm/hw_irq.h> +#include <asm/irqdomain.h> typedef struct irq_alloc_info msi_alloc_info_t; +int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec, + msi_alloc_info_t *arg); + +void pci_msi_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc); + #endif /* _ASM_X86_MSI_H */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 690b4027e17c..b05402ef3b84 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -321,6 +321,7 @@ #define MSR_F15H_PERF_CTR 0xc0010201 #define MSR_F15H_NB_PERF_CTL 0xc0010240 #define MSR_F15H_NB_PERF_CTR 0xc0010241 +#define MSR_F15H_IC_CFG 0xc0011021 /* Fam 10h MSRs */ #define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058 diff --git a/arch/x86/include/asm/msr-trace.h b/arch/x86/include/asm/msr-trace.h new file mode 100644 index 000000000000..7567225747d8 --- /dev/null +++ b/arch/x86/include/asm/msr-trace.h @@ -0,0 +1,57 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM msr + +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE msr-trace + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH asm/ + +#if !defined(_TRACE_MSR_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_MSR_H + +#include <linux/tracepoint.h> + +/* + * Tracing for x86 model specific registers. Directly maps to the + * RDMSR/WRMSR instructions. + */ + +DECLARE_EVENT_CLASS(msr_trace_class, + TP_PROTO(unsigned msr, u64 val, int failed), + TP_ARGS(msr, val, failed), + TP_STRUCT__entry( + __field( unsigned, msr ) + __field( u64, val ) + __field( int, failed ) + ), + TP_fast_assign( + __entry->msr = msr; + __entry->val = val; + __entry->failed = failed; + ), + TP_printk("%x, value %llx%s", + __entry->msr, + __entry->val, + __entry->failed ? " #GP" : "") +); + +DEFINE_EVENT(msr_trace_class, read_msr, + TP_PROTO(unsigned msr, u64 val, int failed), + TP_ARGS(msr, val, failed) +); + +DEFINE_EVENT(msr_trace_class, write_msr, + TP_PROTO(unsigned msr, u64 val, int failed), + TP_ARGS(msr, val, failed) +); + +DEFINE_EVENT(msr_trace_class, rdpmc, + TP_PROTO(unsigned msr, u64 val, int failed), + TP_ARGS(msr, val, failed) +); + +#endif /* _TRACE_MSR_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 77d8b284e4a7..93fb7c1cffda 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -32,6 +32,16 @@ struct msr_regs_info { int err; }; +struct saved_msr { + bool valid; + struct msr_info info; +}; + +struct saved_msrs { + unsigned int num; + struct saved_msr *array; +}; + static inline unsigned long long native_read_tscp(unsigned int *aux) { unsigned long low, high; @@ -57,11 +67,34 @@ static inline unsigned long long native_read_tscp(unsigned int *aux) #define EAX_EDX_RET(val, low, high) "=A" (val) #endif +#ifdef CONFIG_TRACEPOINTS +/* + * Be very careful with includes. This header is prone to include loops. + */ +#include <asm/atomic.h> +#include <linux/tracepoint-defs.h> + +extern struct tracepoint __tracepoint_read_msr; +extern struct tracepoint __tracepoint_write_msr; +extern struct tracepoint __tracepoint_rdpmc; +#define msr_tracepoint_active(t) static_key_false(&(t).key) +extern void do_trace_write_msr(unsigned msr, u64 val, int failed); +extern void do_trace_read_msr(unsigned msr, u64 val, int failed); +extern void do_trace_rdpmc(unsigned msr, u64 val, int failed); +#else +#define msr_tracepoint_active(t) false +static inline void do_trace_write_msr(unsigned msr, u64 val, int failed) {} +static inline void do_trace_read_msr(unsigned msr, u64 val, int failed) {} +static inline void do_trace_rdpmc(unsigned msr, u64 val, int failed) {} +#endif + static inline unsigned long long native_read_msr(unsigned int msr) { DECLARE_ARGS(val, low, high); asm volatile("rdmsr" : EAX_EDX_RET(val, low, high) : "c" (msr)); + if (msr_tracepoint_active(__tracepoint_read_msr)) + do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), 0); return EAX_EDX_VAL(val, low, high); } @@ -78,6 +111,8 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr, _ASM_EXTABLE(2b, 3b) : [err] "=r" (*err), EAX_EDX_RET(val, low, high) : "c" (msr), [fault] "i" (-EIO)); + if (msr_tracepoint_active(__tracepoint_read_msr)) + do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), *err); return EAX_EDX_VAL(val, low, high); } @@ -85,6 +120,8 @@ static inline void native_write_msr(unsigned int msr, unsigned low, unsigned high) { asm volatile("wrmsr" : : "c" (msr), "a"(low), "d" (high) : "memory"); + if (msr_tracepoint_active(__tracepoint_read_msr)) + do_trace_write_msr(msr, ((u64)high << 32 | low), 0); } /* Can be uninlined because referenced by paravirt */ @@ -102,6 +139,8 @@ notrace static inline int native_write_msr_safe(unsigned int msr, : "c" (msr), "0" (low), "d" (high), [fault] "i" (-EIO) : "memory"); + if (msr_tracepoint_active(__tracepoint_read_msr)) + do_trace_write_msr(msr, ((u64)high << 32 | low), err); return err; } @@ -160,6 +199,8 @@ static inline unsigned long long native_read_pmc(int counter) DECLARE_ARGS(val, low, high); asm volatile("rdpmc" : EAX_EDX_RET(val, low, high) : "c" (counter)); + if (msr_tracepoint_active(__tracepoint_rdpmc)) + do_trace_rdpmc(counter, EAX_EDX_VAL(val, low, high), 0); return EAX_EDX_VAL(val, low, high); } @@ -190,7 +231,7 @@ static inline void wrmsr(unsigned msr, unsigned low, unsigned high) static inline void wrmsrl(unsigned msr, u64 val) { - native_write_msr(msr, (u32)val, (u32)(val >> 32)); + native_write_msr(msr, (u32)(val & 0xffffffffULL), (u32)(val >> 32)); } /* wrmsr with exception handling */ diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index cc071c6f7d4d..7bd0099384ca 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -5,9 +5,9 @@ #include <linux/types.h> /* PAGE_SHIFT determines the page size */ -#define PAGE_SHIFT 12 -#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE-1)) +#define PAGE_SHIFT 12 +#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE-1)) #define PMD_PAGE_SIZE (_AC(1, UL) << PMD_SHIFT) #define PMD_PAGE_MASK (~(PMD_PAGE_SIZE-1)) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 10d0596433f8..f6192502149e 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -19,6 +19,12 @@ static inline int paravirt_enabled(void) return pv_info.paravirt_enabled; } +static inline int paravirt_has_feature(unsigned int feature) +{ + WARN_ON_ONCE(!pv_info.paravirt_enabled); + return (pv_info.features & feature); +} + static inline void load_sp0(struct tss_struct *tss, struct thread_struct *thread) { @@ -285,15 +291,6 @@ static inline void slow_down_io(void) #endif } -#ifdef CONFIG_SMP -static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip, - unsigned long start_esp) -{ - PVOP_VCALL3(pv_apic_ops.startup_ipi_hook, - phys_apicid, start_eip, start_esp); -} -#endif - static inline void paravirt_activate_mm(struct mm_struct *prev, struct mm_struct *next) { @@ -375,23 +372,6 @@ static inline void pte_update(struct mm_struct *mm, unsigned long addr, { PVOP_VCALL3(pv_mmu_ops.pte_update, mm, addr, ptep); } -static inline void pmd_update(struct mm_struct *mm, unsigned long addr, - pmd_t *pmdp) -{ - PVOP_VCALL3(pv_mmu_ops.pmd_update, mm, addr, pmdp); -} - -static inline void pte_update_defer(struct mm_struct *mm, unsigned long addr, - pte_t *ptep) -{ - PVOP_VCALL3(pv_mmu_ops.pte_update_defer, mm, addr, ptep); -} - -static inline void pmd_update_defer(struct mm_struct *mm, unsigned long addr, - pmd_t *pmdp) -{ - PVOP_VCALL3(pv_mmu_ops.pmd_update_defer, mm, addr, pmdp); -} static inline pte_t __pte(pteval_t val) { @@ -922,23 +902,11 @@ extern void default_banner(void); call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \ PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) -#define USERGS_SYSRET32 \ - PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret32), \ - CLBR_NONE, \ - jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret32)) - #ifdef CONFIG_X86_32 #define GET_CR0_INTO_EAX \ push %ecx; push %edx; \ call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \ pop %edx; pop %ecx - -#define ENABLE_INTERRUPTS_SYSEXIT \ - PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \ - CLBR_NONE, \ - jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit)) - - #else /* !CONFIG_X86_32 */ /* diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 31247b5bff7c..77db5616a473 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -70,9 +70,14 @@ struct pv_info { #endif int paravirt_enabled; + unsigned int features; /* valid only if paravirt_enabled is set */ const char *name; }; +#define paravirt_has(x) paravirt_has_feature(PV_SUPPORTED_##x) +/* Supported features */ +#define PV_SUPPORTED_RTC (1<<0) + struct pv_init_ops { /* * Patch may replace one of the defined code sequences with @@ -157,15 +162,6 @@ struct pv_cpu_ops { u64 (*read_pmc)(int counter); -#ifdef CONFIG_X86_32 - /* - * Atomically enable interrupts and return to userspace. This - * is only used in 32-bit kernels. 64-bit kernels use - * usergs_sysret32 instead. - */ - void (*irq_enable_sysexit)(void); -#endif - /* * Switch to usermode gs and return to 64-bit usermode using * sysret. Only used in 64-bit kernels to return to 64-bit @@ -174,14 +170,6 @@ struct pv_cpu_ops { */ void (*usergs_sysret64)(void); - /* - * Switch to usermode gs and return to 32-bit usermode using - * sysret. Used to return to 32-on-64 compat processes. - * Other usermode register state, including %esp, must already - * be restored. - */ - void (*usergs_sysret32)(void); - /* Normal iret. Jump to this with the standard iret stack frame set up. */ void (*iret)(void); @@ -215,14 +203,6 @@ struct pv_irq_ops { #endif }; -struct pv_apic_ops { -#ifdef CONFIG_X86_LOCAL_APIC - void (*startup_ipi_hook)(int phys_apicid, - unsigned long start_eip, - unsigned long start_esp); -#endif -}; - struct pv_mmu_ops { unsigned long (*read_cr2)(void); void (*write_cr2)(unsigned long); @@ -274,12 +254,6 @@ struct pv_mmu_ops { pmd_t *pmdp, pmd_t pmdval); void (*pte_update)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); - void (*pte_update_defer)(struct mm_struct *mm, - unsigned long addr, pte_t *ptep); - void (*pmd_update)(struct mm_struct *mm, unsigned long addr, - pmd_t *pmdp); - void (*pmd_update_defer)(struct mm_struct *mm, - unsigned long addr, pmd_t *pmdp); pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); @@ -354,7 +328,6 @@ struct paravirt_patch_template { struct pv_time_ops pv_time_ops; struct pv_cpu_ops pv_cpu_ops; struct pv_irq_ops pv_irq_ops; - struct pv_apic_ops pv_apic_ops; struct pv_mmu_ops pv_mmu_ops; struct pv_lock_ops pv_lock_ops; }; @@ -364,7 +337,6 @@ extern struct pv_init_ops pv_init_ops; extern struct pv_time_ops pv_time_ops; extern struct pv_cpu_ops pv_cpu_ops; extern struct pv_irq_ops pv_irq_ops; -extern struct pv_apic_ops pv_apic_ops; extern struct pv_mmu_ops pv_mmu_ops; extern struct pv_lock_ops pv_lock_ops; @@ -402,10 +374,8 @@ extern struct pv_lock_ops pv_lock_ops; __visible extern const char start_##ops##_##name[], end_##ops##_##name[]; \ asm(NATIVE_LABEL("start_", ops, name) code NATIVE_LABEL("end_", ops, name)) -unsigned paravirt_patch_nop(void); unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len); unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len); -unsigned paravirt_patch_ignore(unsigned len); unsigned paravirt_patch_call(void *insnbuf, const void *target, u16 tgt_clobbers, unsigned long addr, u16 site_clobbers, diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 6ec0c8b2e9df..d3eee663c41f 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -69,9 +69,6 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page); #define pmd_clear(pmd) native_pmd_clear(pmd) #define pte_update(mm, addr, ptep) do { } while (0) -#define pte_update_defer(mm, addr, ptep) do { } while (0) -#define pmd_update(mm, addr, ptep) do { } while (0) -#define pmd_update_defer(mm, addr, ptep) do { } while (0) #define pgd_val(x) native_pgd_val(x) #define __pgd(x) native_make_pgd(x) @@ -731,14 +728,9 @@ static inline void native_set_pmd_at(struct mm_struct *mm, unsigned long addr, * updates should either be sets, clears, or set_pte_atomic for P->P * transitions, which means this hook should only be called for user PTEs. * This hook implies a P->P protection or access change has taken place, which - * requires a subsequent TLB flush. The notification can optionally be delayed - * until the TLB flush event by using the pte_update_defer form of the - * interface, but care must be taken to assure that the flush happens while - * still holding the same page table lock so that the shadow and primary pages - * do not become out of sync on SMP. + * requires a subsequent TLB flush. */ #define pte_update(mm, addr, ptep) do { } while (0) -#define pte_update_defer(mm, addr, ptep) do { } while (0) #endif /* @@ -830,9 +822,7 @@ static inline int pmd_write(pmd_t pmd) static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { - pmd_t pmd = native_pmdp_get_and_clear(pmdp); - pmd_update(mm, addr, pmdp); - return pmd; + return native_pmdp_get_and_clear(pmdp); } #define __HAVE_ARCH_PMDP_SET_WRPROTECT @@ -840,7 +830,6 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { clear_bit(_PAGE_BIT_RW, (unsigned long *)pmdp); - pmd_update(mm, addr, pmdp); } /* diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 67522256c7ff..2d5a50cb61a2 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -472,6 +472,7 @@ static inline unsigned long current_top_of_stack(void) #else #define __cpuid native_cpuid #define paravirt_enabled() 0 +#define paravirt_has(x) 0 static inline void load_sp0(struct tss_struct *tss, struct thread_struct *thread) diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index 7a6bed5c08bc..fdcc04020636 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -4,6 +4,15 @@ #include <linux/clocksource.h> #include <asm/pvclock-abi.h> +#ifdef CONFIG_KVM_GUEST +extern struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void); +#else +static inline struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void) +{ + return NULL; +} +#endif + /* some helper functions for xen and kvm pv clock sources */ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src); u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src); @@ -91,10 +100,5 @@ struct pvclock_vsyscall_time_info { } __attribute__((__aligned__(SMP_CACHE_BYTES))); #define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info) -#define PVCLOCK_VSYSCALL_NR_PAGES (((NR_CPUS-1)/(PAGE_SIZE/PVTI_SIZE))+1) - -int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i, - int size); -struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu); #endif /* _ASM_X86_PVCLOCK_H */ diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h index b002e711ba88..9f92c180ed2f 100644 --- a/arch/x86/include/asm/qspinlock_paravirt.h +++ b/arch/x86/include/asm/qspinlock_paravirt.h @@ -1,6 +1,65 @@ #ifndef __ASM_QSPINLOCK_PARAVIRT_H #define __ASM_QSPINLOCK_PARAVIRT_H +/* + * For x86-64, PV_CALLEE_SAVE_REGS_THUNK() saves and restores 8 64-bit + * registers. For i386, however, only 1 32-bit register needs to be saved + * and restored. So an optimized version of __pv_queued_spin_unlock() is + * hand-coded for 64-bit, but it isn't worthwhile to do it for 32-bit. + */ +#ifdef CONFIG_64BIT + +PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath); +#define __pv_queued_spin_unlock __pv_queued_spin_unlock +#define PV_UNLOCK "__raw_callee_save___pv_queued_spin_unlock" +#define PV_UNLOCK_SLOWPATH "__raw_callee_save___pv_queued_spin_unlock_slowpath" + +/* + * Optimized assembly version of __raw_callee_save___pv_queued_spin_unlock + * which combines the registers saving trunk and the body of the following + * C code: + * + * void __pv_queued_spin_unlock(struct qspinlock *lock) + * { + * struct __qspinlock *l = (void *)lock; + * u8 lockval = cmpxchg(&l->locked, _Q_LOCKED_VAL, 0); + * + * if (likely(lockval == _Q_LOCKED_VAL)) + * return; + * pv_queued_spin_unlock_slowpath(lock, lockval); + * } + * + * For x86-64, + * rdi = lock (first argument) + * rsi = lockval (second argument) + * rdx = internal variable (set to 0) + */ +asm (".pushsection .text;" + ".globl " PV_UNLOCK ";" + ".align 4,0x90;" + PV_UNLOCK ": " + "push %rdx;" + "mov $0x1,%eax;" + "xor %edx,%edx;" + "lock cmpxchg %dl,(%rdi);" + "cmp $0x1,%al;" + "jne .slowpath;" + "pop %rdx;" + "ret;" + ".slowpath: " + "push %rsi;" + "movzbl %al,%esi;" + "call " PV_UNLOCK_SLOWPATH ";" + "pop %rsi;" + "pop %rdx;" + "ret;" + ".size " PV_UNLOCK ", .-" PV_UNLOCK ";" + ".popsection"); + +#else /* CONFIG_64BIT */ + +extern void __pv_queued_spin_unlock(struct qspinlock *lock); PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock); +#endif /* CONFIG_64BIT */ #endif diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h index a82c4f1b4d83..2cb1cc253d51 100644 --- a/arch/x86/include/asm/reboot.h +++ b/arch/x86/include/asm/reboot.h @@ -25,5 +25,6 @@ void __noreturn machine_real_restart(unsigned int type); typedef void (*nmi_shootdown_cb)(int, struct pt_regs*); void nmi_shootdown_cpus(nmi_shootdown_cb callback); +void run_crash_ipi_callback(struct pt_regs *regs); #endif /* _ASM_X86_REBOOT_H */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 222a6a3ca2b5..dfcf0727623b 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -21,15 +21,6 @@ extern int smp_num_siblings; extern unsigned int num_processors; -static inline bool cpu_has_ht_siblings(void) -{ - bool has_siblings = false; -#ifdef CONFIG_SMP - has_siblings = cpu_has_ht && smp_num_siblings > 1; -#endif - return has_siblings; -} - DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map); DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map); /* cpus sharing the last level cache: */ @@ -74,9 +65,6 @@ struct smp_ops { extern void set_cpu_sibling_map(int cpu); #ifdef CONFIG_SMP -#ifndef CONFIG_PARAVIRT -#define startup_ipi_hook(phys_apicid, start_eip, start_esp) do { } while (0) -#endif extern struct smp_ops smp_ops; static inline void smp_send_stop(void) diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h index d1793f06854d..8e9dbe7b73a1 100644 --- a/arch/x86/include/asm/suspend_32.h +++ b/arch/x86/include/asm/suspend_32.h @@ -15,6 +15,7 @@ struct saved_context { unsigned long cr0, cr2, cr3, cr4; u64 misc_enable; bool misc_enable_saved; + struct saved_msrs saved_msrs; struct desc_ptr gdt_desc; struct desc_ptr idt; u16 ldt; diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h index 7ebf0ebe4e68..6136a18152af 100644 --- a/arch/x86/include/asm/suspend_64.h +++ b/arch/x86/include/asm/suspend_64.h @@ -24,6 +24,7 @@ struct saved_context { unsigned long cr0, cr2, cr3, cr4, cr8; u64 misc_enable; bool misc_enable_saved; + struct saved_msrs saved_msrs; unsigned long efer; u16 gdt_pad; /* Unused */ struct desc_ptr gdt_desc; diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 09b1b0ab94b7..660458af425d 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -745,5 +745,14 @@ copy_to_user(void __user *to, const void *from, unsigned long n) #undef __copy_from_user_overflow #undef __copy_to_user_overflow +/* + * We rely on the nested NMI work to allow atomic faults from the NMI path; the + * nested NMI paths are careful to preserve CR2. + * + * Caller must use pagefault_enable/disable, or run in interrupt context, + * and also do a uaccess_ok() check + */ +#define __copy_from_user_nmi __copy_from_user_inatomic + #endif /* _ASM_X86_UACCESS_H */ diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h index 756de9190aec..deabaf9759b6 100644 --- a/arch/x86/include/asm/vdso.h +++ b/arch/x86/include/asm/vdso.h @@ -22,6 +22,7 @@ struct vdso_image { long sym_vvar_page; long sym_hpet_page; + long sym_pvclock_page; long sym_VDSO32_NOTE_MASK; long sym___kernel_sigreturn; long sym___kernel_rt_sigreturn; diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index cd0fc0cc78bc..1ae89a2721d6 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -82,13 +82,11 @@ struct x86_init_paging { * struct x86_init_timers - platform specific timer setup * @setup_perpcu_clockev: set up the per cpu clock event device for the * boot cpu - * @tsc_pre_init: platform function called before TSC init * @timer_init: initialize the platform timer (default PIT/HPET) * @wallclock_init: init the wallclock device */ struct x86_init_timers { void (*setup_percpu_clockev)(void); - void (*tsc_pre_init)(void); void (*timer_init)(void); void (*wallclock_init)(void); }; diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 4c20dd333412..3bcdcc84259d 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -310,10 +310,10 @@ HYPERVISOR_mca(struct xen_mc *mc_op) } static inline int -HYPERVISOR_dom0_op(struct xen_platform_op *platform_op) +HYPERVISOR_platform_op(struct xen_platform_op *op) { - platform_op->interface_version = XENPF_INTERFACE_VERSION; - return _hypercall1(int, dom0_op, platform_op); + op->interface_version = XENPF_INTERFACE_VERSION; + return _hypercall1(int, platform_op, op); } static inline int diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h index 5a08bc8bff33..c54beb44c4c1 100644 --- a/arch/x86/include/asm/xor_32.h +++ b/arch/x86/include/asm/xor_32.h @@ -553,7 +553,7 @@ do { \ if (cpu_has_xmm) { \ xor_speed(&xor_block_pIII_sse); \ xor_speed(&xor_block_sse_pf64); \ - } else if (cpu_has_mmx) { \ + } else if (boot_cpu_has(X86_FEATURE_MMX)) { \ xor_speed(&xor_block_pII_mmx); \ xor_speed(&xor_block_p5_mmx); \ } else { \ diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h index 03429da2fa80..2184943341bf 100644 --- a/arch/x86/include/uapi/asm/mce.h +++ b/arch/x86/include/uapi/asm/mce.h @@ -16,7 +16,7 @@ struct mce { __u8 cpuvendor; /* cpu vendor as encoded in system.h */ __u8 inject_flags; /* software inject flags */ __u8 severity; - __u8 usable_addr; + __u8 pad; __u32 cpuid; /* CPUID 1 EAX */ __u8 cs; /* code segment */ __u8 bank; /* machine check bank */ |