diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-10-03 13:22:39 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-10-03 13:22:39 -0700 |
commit | e606d81d2d9596ab2b4fd0dc052eea0485b7e8c2 (patch) | |
tree | 6c4d4d9077e10b8d53412fa5d171623b06c4b3c3 /arch/x86/include | |
parent | 12b7bcb43e6ea834ab2f5dc52d971e379a0ca109 (diff) | |
parent | b199ac6c4943aa0db246163bf6b483e2bb53431b (diff) | |
download | linux-e606d81d2d9596ab2b4fd0dc052eea0485b7e8c2.tar.bz2 |
Merge branch 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull RAS updates from Ingo Molnar:
"The main changes were:
- Lots of enhancements for AMD SMCA (Scalable MCA
features/extensions) systems: extract, decode and print more
hardware error information and add matching support on the
injection/testing side as well. (Yazn Ghannam)
- Various MCE handling improvements on modern Intel Xeons. (Tony
Luck)
- Plus misc fixes and enhancements"
* 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (21 commits)
x86/RAS/mce_amd_inj: Remove debugfs dir recursively on exit
x86/RAS/mce_amd_inj: Fix signed wrap around when decrementing index 'i'
x86/RAS/mce_amd_inj: Fix some W= warnings
x86/MCE/AMD, EDAC: Handle reserved bank 4 on Fam17h properly
x86/mce/AMD: Extract the error address on SMCA systems
x86/mce, EDAC/mce_amd: Print MCA_SYND and MCA_IPID during MCE on SMCA systems
x86/mce/AMD: Save MCA_IPID in MCE struct on SMCA systems
x86/mce/AMD: Ensure the deferred error interrupt is of type APIC on SMCA systems
x86/mce/AMD: Update sysfs bank names for SMCA systems
x86/mce/AMD, EDAC/mce_amd: Define and use tables for known SMCA IP types
EDAC/mce_amd: Use SMCA prefix for error descriptions arrays
EDAC/mce_amd: Add missing SMCA error descriptions
x86/mce/AMD: Read MSRs on the CPU allocating the threshold blocks
x86/RAS: Add syndrome support to mce_amd_inj
EDAC/mce_amd: Print syndrome register value on SMCA systems
x86/mce: Add support for new MCA_SYND register
x86/mce/AMD: Use msr_ops.misc() in allocate_threshold_blocks()
x86/mce: Drop X86_FEATURE_MCE_RECOVERY and the related model string test
x86/mce: Improve memcpy_mcsafe()
x86/mce: Add PCI quirks to identify Xeons with machine check recovery
...
Diffstat (limited to 'arch/x86/include')
-rw-r--r-- | arch/x86/include/asm/cpufeatures.h | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/mce.h | 66 | ||||
-rw-r--r-- | arch/x86/include/asm/pmem.h | 5 | ||||
-rw-r--r-- | arch/x86/include/asm/string_64.h | 19 | ||||
-rw-r--r-- | arch/x86/include/uapi/asm/mce.h | 2 |
5 files changed, 57 insertions, 36 deletions
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 92a8308b96f6..1188bc849ee3 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -106,7 +106,6 @@ #define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ #define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */ #define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ -#define X86_FEATURE_MCE_RECOVERY ( 3*32+31) /* cpu has recoverable machine checks */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */ diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 8bf766ef0e18..9bd7ff5ffbcc 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -40,9 +40,10 @@ #define MCI_STATUS_AR (1ULL<<55) /* Action required */ /* AMD-specific bits */ +#define MCI_STATUS_TCC (1ULL<<55) /* Task context corrupt */ +#define MCI_STATUS_SYNDV (1ULL<<53) /* synd reg. valid */ #define MCI_STATUS_DEFERRED (1ULL<<44) /* uncorrected error, deferred exception */ #define MCI_STATUS_POISON (1ULL<<43) /* access poisonous data */ -#define MCI_STATUS_TCC (1ULL<<55) /* Task context corrupt */ /* * McaX field if set indicates a given bank supports MCA extensions: @@ -110,6 +111,7 @@ #define MSR_AMD64_SMCA_MC0_MISC0 0xc0002003 #define MSR_AMD64_SMCA_MC0_CONFIG 0xc0002004 #define MSR_AMD64_SMCA_MC0_IPID 0xc0002005 +#define MSR_AMD64_SMCA_MC0_SYND 0xc0002006 #define MSR_AMD64_SMCA_MC0_DESTAT 0xc0002008 #define MSR_AMD64_SMCA_MC0_DEADDR 0xc0002009 #define MSR_AMD64_SMCA_MC0_MISC1 0xc000200a @@ -119,6 +121,7 @@ #define MSR_AMD64_SMCA_MCx_MISC(x) (MSR_AMD64_SMCA_MC0_MISC0 + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_CONFIG(x) (MSR_AMD64_SMCA_MC0_CONFIG + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_IPID(x) (MSR_AMD64_SMCA_MC0_IPID + 0x10*(x)) +#define MSR_AMD64_SMCA_MCx_SYND(x) (MSR_AMD64_SMCA_MC0_SYND + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_DESTAT(x) (MSR_AMD64_SMCA_MC0_DESTAT + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_DEADDR(x) (MSR_AMD64_SMCA_MC0_DEADDR + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_MISCy(x, y) ((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x))) @@ -334,44 +337,47 @@ extern void apei_mce_report_mem_error(int corrected, * Scalable MCA. */ #ifdef CONFIG_X86_MCE_AMD -enum amd_ip_types { - SMCA_F17H_CORE = 0, /* Core errors */ - SMCA_DF, /* Data Fabric */ - SMCA_UMC, /* Unified Memory Controller */ - SMCA_PB, /* Parameter Block */ - SMCA_PSP, /* Platform Security Processor */ - SMCA_SMU, /* System Management Unit */ - N_AMD_IP_TYPES -}; - -struct amd_hwid { - const char *name; - unsigned int hwid; -}; - -extern struct amd_hwid amd_hwids[N_AMD_IP_TYPES]; -enum amd_core_mca_blocks { +/* These may be used by multiple smca_hwid_mcatypes */ +enum smca_bank_types { SMCA_LS = 0, /* Load Store */ SMCA_IF, /* Instruction Fetch */ - SMCA_L2_CACHE, /* L2 cache */ - SMCA_DE, /* Decoder unit */ - RES, /* Reserved */ - SMCA_EX, /* Execution unit */ + SMCA_L2_CACHE, /* L2 Cache */ + SMCA_DE, /* Decoder Unit */ + SMCA_EX, /* Execution Unit */ SMCA_FP, /* Floating Point */ - SMCA_L3_CACHE, /* L3 cache */ - N_CORE_MCA_BLOCKS + SMCA_L3_CACHE, /* L3 Cache */ + SMCA_CS, /* Coherent Slave */ + SMCA_PIE, /* Power, Interrupts, etc. */ + SMCA_UMC, /* Unified Memory Controller */ + SMCA_PB, /* Parameter Block */ + SMCA_PSP, /* Platform Security Processor */ + SMCA_SMU, /* System Management Unit */ + N_SMCA_BANK_TYPES }; -extern const char * const amd_core_mcablock_names[N_CORE_MCA_BLOCKS]; +struct smca_bank_name { + const char *name; /* Short name for sysfs */ + const char *long_name; /* Long name for pretty-printing */ +}; + +extern struct smca_bank_name smca_bank_names[N_SMCA_BANK_TYPES]; + +#define HWID_MCATYPE(hwid, mcatype) ((hwid << 16) | mcatype) -enum amd_df_mca_blocks { - SMCA_CS = 0, /* Coherent Slave */ - SMCA_PIE, /* Power management, Interrupts, etc */ - N_DF_BLOCKS +struct smca_hwid_mcatype { + unsigned int bank_type; /* Use with smca_bank_types for easy indexing. */ + u32 hwid_mcatype; /* (hwid,mcatype) tuple */ + u32 xec_bitmap; /* Bitmap of valid ExtErrorCodes; current max is 21. */ }; -extern const char * const amd_df_mcablock_names[N_DF_BLOCKS]; +struct smca_bank_info { + struct smca_hwid_mcatype *type; + u32 type_instance; +}; + +extern struct smca_bank_info smca_banks[MAX_NR_BANKS]; + #endif #endif /* _ASM_X86_MCE_H */ diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h index 643eba42d620..2c1ebeb4d737 100644 --- a/arch/x86/include/asm/pmem.h +++ b/arch/x86/include/asm/pmem.h @@ -46,10 +46,7 @@ static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n) static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n) { - if (static_cpu_has(X86_FEATURE_MCE_RECOVERY)) - return memcpy_mcsafe(dst, src, n); - memcpy(dst, src, n); - return 0; + return memcpy_mcsafe(dst, src, n); } /** diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index 90dbbd9666d4..a164862d77e3 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h @@ -2,6 +2,7 @@ #define _ASM_X86_STRING_64_H #ifdef __KERNEL__ +#include <linux/jump_label.h> /* Written 2002 by Andi Kleen */ @@ -78,6 +79,9 @@ int strcmp(const char *cs, const char *ct); #define memset(s, c, n) __memset(s, c, n) #endif +__must_check int memcpy_mcsafe_unrolled(void *dst, const void *src, size_t cnt); +DECLARE_STATIC_KEY_FALSE(mcsafe_key); + /** * memcpy_mcsafe - copy memory with indication if a machine check happened * @@ -86,10 +90,23 @@ int strcmp(const char *cs, const char *ct); * @cnt: number of bytes to copy * * Low level memory copy function that catches machine checks + * We only call into the "safe" function on systems that can + * actually do machine check recovery. Everyone else can just + * use memcpy(). * * Return 0 for success, -EFAULT for fail */ -int memcpy_mcsafe(void *dst, const void *src, size_t cnt); +static __always_inline __must_check int +memcpy_mcsafe(void *dst, const void *src, size_t cnt) +{ +#ifdef CONFIG_X86_MCE + if (static_branch_unlikely(&mcsafe_key)) + return memcpy_mcsafe_unrolled(dst, src, cnt); + else +#endif + memcpy(dst, src, cnt); + return 0; +} #endif /* __KERNEL__ */ diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h index 2184943341bf..69a6e07e3149 100644 --- a/arch/x86/include/uapi/asm/mce.h +++ b/arch/x86/include/uapi/asm/mce.h @@ -26,6 +26,8 @@ struct mce { __u32 socketid; /* CPU socket ID */ __u32 apicid; /* CPU initial apic ID */ __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */ + __u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */ + __u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */ }; #define MCE_GET_RECORD_LEN _IOR('M', 1, int) |