From 2be64bfac71378e1aa8c20031a499bd55e391244 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 17 Sep 2010 19:11:47 +0200 Subject: EDAC, MCE: Select extended error code mask F15h enlarges the extended error code of an MCE to a 5-bit field (MCi_STATUS[20:16]). Add a mask variable which default 0xf is overridden on F15h. Signed-off-by: Borislav Petkov --- drivers/edac/mce_amd.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index c0181093b490..01853eed2019 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -5,6 +5,7 @@ static struct amd_decoder_ops *fam_ops; +static u8 xec_mask = 0xf; static u8 nb_err_cpumask = 0xf; static bool report_gart_errors; @@ -172,7 +173,7 @@ static bool f14h_dc_mce(u16 ec) static void amd_decode_dc_mce(struct mce *m) { u16 ec = m->status & 0xffff; - u8 xec = (m->status >> 16) & 0xf; + u8 xec = (m->status >> 16) & xec_mask; pr_emerg(HW_ERR "Data Cache Error: "); @@ -257,7 +258,7 @@ static bool f14h_ic_mce(u16 ec) static void amd_decode_ic_mce(struct mce *m) { u16 ec = m->status & 0xffff; - u8 xec = (m->status >> 16) & 0xf; + u8 xec = (m->status >> 16) & xec_mask; pr_emerg(HW_ERR "Instruction Cache Error: "); @@ -277,7 +278,7 @@ static void amd_decode_ic_mce(struct mce *m) static void amd_decode_bu_mce(struct mce *m) { u32 ec = m->status & 0xffff; - u32 xec = (m->status >> 16) & 0xf; + u32 xec = (m->status >> 16) & xec_mask; pr_emerg(HW_ERR "Bus Unit Error"); @@ -319,7 +320,7 @@ wrong_bu_mce: static void amd_decode_ls_mce(struct mce *m) { u16 ec = m->status & 0xffff; - u8 xec = (m->status >> 16) & 0xf; + u8 xec = (m->status >> 16) & xec_mask; if (boot_cpu_data.x86 == 0x14) { pr_emerg("You shouldn't be seeing an LS MCE on this cpu family," @@ -651,6 +652,10 @@ static int __init mce_amd_init(void) fam_ops->nb_mce = nb_noop_mce; break; + case 0x15: + xec_mask = 0x1f; + break; + default: printk(KERN_WARNING "Huh? What family is that: %d?!\n", boot_cpu_data.x86); -- cgit v1.2.3 From 25a4f8b05917f8137bfff8a3f8c6c8c1ac561208 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 17 Sep 2010 19:22:34 +0200 Subject: EDAC, MCE: Add F15h DC MCE decoder Add a decoder for F15h DC MCEs to support the new types of DC MCEs introduced by the BD microarchitecture. Signed-off-by: Borislav Petkov --- drivers/edac/mce_amd.c | 79 ++++++++++++++++++++++++++++++++++++++------------ drivers/edac/mce_amd.h | 2 +- 2 files changed, 62 insertions(+), 19 deletions(-) (limited to 'drivers') diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index 01853eed2019..12bae3b18e3d 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -75,7 +75,7 @@ static const char *f10h_nb_mce_desc[] = { "ECC Error in the Probe Filter directory" }; -static bool f12h_dc_mce(u16 ec) +static bool f12h_dc_mce(u16 ec, u8 xec) { bool ret = false; @@ -93,7 +93,7 @@ static bool f12h_dc_mce(u16 ec) return ret; } -static bool f10h_dc_mce(u16 ec) +static bool f10h_dc_mce(u16 ec, u8 xec) { u8 r4 = (ec >> 4) & 0xf; u8 ll = ec & 0x3; @@ -102,20 +102,20 @@ static bool f10h_dc_mce(u16 ec) pr_cont("during data scrub.\n"); return true; } - return f12h_dc_mce(ec); + return f12h_dc_mce(ec, xec); } -static bool k8_dc_mce(u16 ec) +static bool k8_dc_mce(u16 ec, u8 xec) { if (BUS_ERROR(ec)) { pr_cont("during system linefill.\n"); return true; } - return f10h_dc_mce(ec); + return f10h_dc_mce(ec, xec); } -static bool f14h_dc_mce(u16 ec) +static bool f14h_dc_mce(u16 ec, u8 xec) { u8 r4 = (ec >> 4) & 0xf; u8 ll = ec & 0x3; @@ -170,6 +170,54 @@ static bool f14h_dc_mce(u16 ec) return ret; } +static bool f15h_dc_mce(u16 ec, u8 xec) +{ + bool ret = true; + + if (MEM_ERROR(ec)) { + + switch (xec) { + case 0x0: + pr_cont("Data Array access error.\n"); + break; + + case 0x1: + pr_cont("UC error during a linefill from L2/NB.\n"); + break; + + case 0x2: + case 0x11: + pr_cont("STQ access error.\n"); + break; + + case 0x3: + pr_cont("SCB access error.\n"); + break; + + case 0x10: + pr_cont("Tag error.\n"); + break; + + case 0x12: + pr_cont("LDQ access error.\n"); + break; + + default: + ret = false; + } + } else if (BUS_ERROR(ec)) { + + if (!xec) + pr_cont("during system linefill.\n"); + else + pr_cont(" Internal %s condition.\n", + ((xec == 1) ? "livelock" : "deadlock")); + } else + ret = false; + + return ret; +} + static void amd_decode_dc_mce(struct mce *m) { u16 ec = m->status & 0xffff; @@ -183,20 +231,14 @@ static void amd_decode_dc_mce(struct mce *m) if (tt == TT_DATA) { pr_cont("%s TLB %s.\n", LL_MSG(ec), - (xec ? "multimatch" : "parity error")); + ((xec == 2) ? "locked miss" + : (xec ? "multimatch" : "parity"))); return; } - else - goto wrong_dc_mce; - } - - if (!fam_ops->dc_mce(ec)) - goto wrong_dc_mce; - - return; - -wrong_dc_mce: - pr_emerg(HW_ERR "Corrupted DC MCE info?\n"); + } else if (fam_ops->dc_mce(ec, xec)) + ; + else + pr_emerg(HW_ERR "Corrupted DC MCE info?\n"); } static bool k8_ic_mce(u16 ec) @@ -654,6 +696,7 @@ static int __init mce_amd_init(void) case 0x15: xec_mask = 0x1f; + fam_ops->dc_mce = f15h_dc_mce; break; default: diff --git a/drivers/edac/mce_amd.h b/drivers/edac/mce_amd.h index 35f6e0e3b297..c12394db9bdf 100644 --- a/drivers/edac/mce_amd.h +++ b/drivers/edac/mce_amd.h @@ -100,7 +100,7 @@ struct err_regs { * per-family decoder ops */ struct amd_decoder_ops { - bool (*dc_mce)(u16); + bool (*dc_mce)(u16, u8); bool (*ic_mce)(u16); bool (*nb_mce)(u16, u8); }; -- cgit v1.2.3 From 86039cd401e1780573733870f9c0bd458fc96ea2 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 8 Nov 2010 15:03:35 +0100 Subject: EDAC, MCE: Add F15h IC MCE decoder Add support for decoding F15h IC MCEs. Signed-off-by: Borislav Petkov --- drivers/edac/mce_amd.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++--- drivers/edac/mce_amd.h | 2 +- 2 files changed, 51 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index 12bae3b18e3d..158cd5fa2146 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -75,6 +75,26 @@ static const char *f10h_nb_mce_desc[] = { "ECC Error in the Probe Filter directory" }; +static const char * const f15h_ic_mce_desc[] = { + "UC during a demand linefill from L2", + "Parity error during data load from IC", + "Parity error for IC valid bit", + "Main tag parity error", + "Parity error in prediction queue", + "PFB data/address parity error", + "Parity error in the branch status reg", + "PFB promotion address error", + "Tag error during probe/victimization", + "Parity error for IC probe tag valid bit", + "PFB non-cacheable bit parity error", + "PFB valid bit parity error", /* xec = 0xd */ + "patch RAM", /* xec = 010 */ + "uop queue", + "insn buffer", + "predecode buffer", + "fetch address FIFO" +}; + static bool f12h_dc_mce(u16 ec, u8 xec) { bool ret = false; @@ -241,7 +261,7 @@ static void amd_decode_dc_mce(struct mce *m) pr_emerg(HW_ERR "Corrupted DC MCE info?\n"); } -static bool k8_ic_mce(u16 ec) +static bool k8_ic_mce(u16 ec, u8 xec) { u8 ll = ec & 0x3; u8 r4 = (ec >> 4) & 0xf; @@ -276,7 +296,7 @@ static bool k8_ic_mce(u16 ec) return ret; } -static bool f14h_ic_mce(u16 ec) +static bool f14h_ic_mce(u16 ec, u8 xec) { u8 ll = ec & 0x3; u8 tt = (ec >> 2) & 0x3; @@ -297,6 +317,32 @@ static bool f14h_ic_mce(u16 ec) return ret; } +static bool f15h_ic_mce(u16 ec, u8 xec) +{ + bool ret = true; + + if (!MEM_ERROR(ec)) + return false; + + switch (xec) { + case 0x0 ... 0xa: + pr_cont("%s.\n", f15h_ic_mce_desc[xec]); + break; + + case 0xd: + pr_cont("%s.\n", f15h_ic_mce_desc[xec-2]); + break; + + case 0x10 ... 0x14: + pr_cont("Decoder %s parity error.\n", f15h_ic_mce_desc[xec-4]); + break; + + default: + ret = false; + } + return ret; +} + static void amd_decode_ic_mce(struct mce *m) { u16 ec = m->status & 0xffff; @@ -311,7 +357,7 @@ static void amd_decode_ic_mce(struct mce *m) bool k8 = (boot_cpu_data.x86 == 0xf && (m->status & BIT_64(58))); pr_cont("during %s.\n", (k8 ? "system linefill" : "NB data read")); - } else if (fam_ops->ic_mce(ec)) + } else if (fam_ops->ic_mce(ec, xec)) ; else pr_emerg(HW_ERR "Corrupted IC MCE info?\n"); @@ -697,6 +743,7 @@ static int __init mce_amd_init(void) case 0x15: xec_mask = 0x1f; fam_ops->dc_mce = f15h_dc_mce; + fam_ops->ic_mce = f15h_ic_mce; break; default: diff --git a/drivers/edac/mce_amd.h b/drivers/edac/mce_amd.h index c12394db9bdf..83988471123f 100644 --- a/drivers/edac/mce_amd.h +++ b/drivers/edac/mce_amd.h @@ -101,7 +101,7 @@ struct err_regs { */ struct amd_decoder_ops { bool (*dc_mce)(u16, u8); - bool (*ic_mce)(u16); + bool (*ic_mce)(u16, u8); bool (*nb_mce)(u16, u8); }; -- cgit v1.2.3 From 70fdb494aa8c82f76745d5a32b8abc505813557c Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 21 Sep 2010 20:45:10 +0200 Subject: EDAC, MCE: Add F15h CU MCE decoder MCE bank 2 is redefined from a BU to a CU (Combined Unit) bank on F15h. Add a decoder function for CU MCEs. Signed-off-by: Borislav Petkov --- drivers/edac/mce_amd.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 61 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index 158cd5fa2146..7eb27062f2ef 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -95,6 +95,23 @@ static const char * const f15h_ic_mce_desc[] = { "fetch address FIFO" }; +static const char * const f15h_cu_mce_desc[] = { + "Fill ECC error on data fills", /* xec = 0x4 */ + "Fill parity error on insn fills", + "Prefetcher request FIFO parity error", + "PRQ address parity error", + "PRQ data parity error", + "WCC Tag ECC error", + "WCC Data ECC error", + "WCB Data parity error", + "VB Data/ECC error", + "L2 Tag ECC error", /* xec = 0x10 */ + "Hard L2 Tag ECC error", + "Multiple hits on L2 tag", + "XAB parity error", + "PRB address parity error" +}; + static bool f12h_dc_mce(u16 ec, u8 xec) { bool ret = false; @@ -405,6 +422,46 @@ wrong_bu_mce: pr_emerg(HW_ERR "Corrupted BU MCE info?\n"); } +static void amd_decode_cu_mce(struct mce *m) +{ + u16 ec = m->status & 0xffff; + u8 xec = (m->status >> 16) & xec_mask; + + pr_emerg(HW_ERR "Combined Unit Error: "); + + if (TLB_ERROR(ec)) { + if (xec == 0x0) + pr_cont("Data parity TLB read error.\n"); + else if (xec == 0x1) + pr_cont("Poison data provided for TLB fill.\n"); + else + goto wrong_cu_mce; + } else if (BUS_ERROR(ec)) { + if (xec > 2) + goto wrong_cu_mce; + + pr_cont("Error during attempted NB data read.\n"); + } else if (MEM_ERROR(ec)) { + switch (xec) { + case 0x4 ... 0xc: + pr_cont("%s.\n", f15h_cu_mce_desc[xec - 0x4]); + break; + + case 0x10 ... 0x14: + pr_cont("%s.\n", f15h_cu_mce_desc[xec - 0x7]); + break; + + default: + goto wrong_cu_mce; + } + } + + return; + +wrong_cu_mce: + pr_emerg(HW_ERR "Corrupted CU MCE info?\n"); +} + static void amd_decode_ls_mce(struct mce *m) { u16 ec = m->status & 0xffff; @@ -665,7 +722,10 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) break; case 2: - amd_decode_bu_mce(m); + if (boot_cpu_data.x86 == 0x15) + amd_decode_cu_mce(m); + else + amd_decode_bu_mce(m); break; case 3: -- cgit v1.2.3 From b18434cad1740466f7a1c304ea4af0f4d3c874f1 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 22 Sep 2010 11:53:32 +0200 Subject: EDAC, MCE: No F15h LS MCE decoder F15h BD doesn't generate LS MCEs so warn about it. Signed-off-by: Borislav Petkov --- drivers/edac/mce_amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index 7eb27062f2ef..44e6cdaff663 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -467,7 +467,7 @@ static void amd_decode_ls_mce(struct mce *m) u16 ec = m->status & 0xffff; u8 xec = (m->status >> 16) & xec_mask; - if (boot_cpu_data.x86 == 0x14) { + if (boot_cpu_data.x86 >= 0x14) { pr_emerg("You shouldn't be seeing an LS MCE on this cpu family," " please report on LKML.\n"); return; -- cgit v1.2.3 From 05cd667d668eb08845dd49c02130e5223121b715 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 22 Sep 2010 15:06:24 +0200 Subject: EDAC, MCE: Add an F15h NB MCE decoder by (almost) reusing the F10h one since the signatures are the same. Signed-off-by: Borislav Petkov --- drivers/edac/mce_amd.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'drivers') diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index 44e6cdaff663..efbcb5389e80 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -556,6 +556,15 @@ static bool f10h_nb_mce(u16 ec, u8 xec) goto out; break; + case 0x19: + if (boot_cpu_data.x86 == 0x15) + pr_cont("Compute Unit Data Error.\n"); + else + ret = false; + + goto out; + break; + case 0x1c ... 0x1f: offset = 24; break; @@ -804,6 +813,7 @@ static int __init mce_amd_init(void) xec_mask = 0x1f; fam_ops->dc_mce = f15h_dc_mce; fam_ops->ic_mce = f15h_ic_mce; + fam_ops->nb_mce = f10h_nb_mce; break; default: -- cgit v1.2.3 From 8259a7e5724c42c89d927b92cda3e0ab15b9ade9 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 22 Sep 2010 15:28:59 +0200 Subject: EDAC, MCE: Add F15 EX MCE decoder Integrate the single FIROB signature into an expanded table along with the new BD MCE types. Signed-off-by: Borislav Petkov --- drivers/edac/mce_amd.c | 41 ++++++++++++++++++++++++++++++++++------- 1 file changed, 34 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index efbcb5389e80..afda6de5d7e3 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -112,6 +112,22 @@ static const char * const f15h_cu_mce_desc[] = { "PRB address parity error" }; +static const char * const fr_ex_mce_desc[] = { + "CPU Watchdog timer expire", + "Wakeup array dest tag", + "AG payload array", + "EX payload array", + "IDRF array", + "Retire dispatch queue", + "Mapper checkpoint array", + "Physical register file EX0 port", + "Physical register file EX1 port", + "Physical register file AG0 port", + "Physical register file AG1 port", + "Flag register file", + "DE correctable error could not be corrected" +}; + static bool f12h_dc_mce(u16 ec, u8 xec) { bool ret = false; @@ -651,15 +667,26 @@ EXPORT_SYMBOL_GPL(amd_decode_nb_mce); static void amd_decode_fr_mce(struct mce *m) { - if (boot_cpu_data.x86 == 0xf || - boot_cpu_data.x86 == 0x11) + struct cpuinfo_x86 *c = &boot_cpu_data; + u8 xec = (m->status >> 16) & xec_mask; + + if (c->x86 == 0xf || c->x86 == 0x11) goto wrong_fr_mce; - /* we have only one error signature so match all fields at once. */ - if ((m->status & 0xffff) == 0x0f0f) { - pr_emerg(HW_ERR "FR Error: CPU Watchdog timer expire.\n"); - return; - } + if (c->x86 != 0x15 && xec != 0x0) + goto wrong_fr_mce; + + pr_emerg(HW_ERR "%s Error: ", + (c->x86 == 0x15 ? "Execution Unit" : "FIROB")); + + if (xec == 0x0 || xec == 0xc) + pr_cont("%s.\n", fr_ex_mce_desc[xec]); + else if (xec < 0xd) + pr_cont("%s parity error.\n", fr_ex_mce_desc[xec]); + else + goto wrong_fr_mce; + + return; wrong_fr_mce: pr_emerg(HW_ERR "Corrupted FR MCE info?\n"); -- cgit v1.2.3 From b8f85c477bdf1fec98ea7cbe952fdb5f40eb0aa7 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 22 Sep 2010 15:37:58 +0200 Subject: EDAC, MCE: Add F15h FP MCE decoder Add decoder for FP MCEs. Signed-off-by: Borislav Petkov --- drivers/edac/mce_amd.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'drivers') diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index afda6de5d7e3..c14abe3e4074 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -692,6 +692,46 @@ wrong_fr_mce: pr_emerg(HW_ERR "Corrupted FR MCE info?\n"); } +static void amd_decode_fp_mce(struct mce *m) +{ + u8 xec = (m->status >> 16) & xec_mask; + + pr_emerg(HW_ERR "Floating Point Unit Error: "); + + switch (xec) { + case 0x1: + pr_cont("Free List"); + break; + + case 0x2: + pr_cont("Physical Register File"); + break; + + case 0x3: + pr_cont("Retire Queue"); + break; + + case 0x4: + pr_cont("Scheduler table"); + break; + + case 0x5: + pr_cont("Status Register File"); + break; + + default: + goto wrong_fp_mce; + break; + } + + pr_cont(" parity error.\n"); + + return; + +wrong_fp_mce: + pr_emerg(HW_ERR "Corrupted FP MCE info?\n"); +} + static inline void amd_decode_err_code(u16 ec) { if (TLB_ERROR(ec)) { @@ -777,6 +817,10 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) amd_decode_fr_mce(m); break; + case 6: + amd_decode_fp_mce(m); + break; + default: break; } -- cgit v1.2.3 From 6245288232516aadf293f575d1812dafb4696aee Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 22 Sep 2010 16:08:37 +0200 Subject: EDAC, MCE: Overhaul error fields extraction macros Make macro names shorter thus making code shorter and more clear. Signed-off-by: Borislav Petkov --- drivers/edac/amd64_edac.c | 4 +-- drivers/edac/mce_amd.c | 83 ++++++++++++++++++++--------------------------- drivers/edac/mce_amd.h | 10 +++--- 3 files changed, 43 insertions(+), 54 deletions(-) (limited to 'drivers') diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index eca9ba193e94..0212232e1597 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -2055,8 +2055,8 @@ static void amd64_handle_ue(struct mem_ctl_info *mci, static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci, struct err_regs *info) { - u32 ec = ERROR_CODE(info->nbsl); - u32 xec = EXT_ERROR_CODE(info->nbsl); + u16 ec = EC(info->nbsl); + u8 xec = XEC(info->nbsl, 0x1f); int ecc_type = (info->nbsh >> 13) & 0x3; /* Bail early out if this was an 'observed' error */ diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index c14abe3e4074..53d4dc0de343 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -133,13 +133,13 @@ static bool f12h_dc_mce(u16 ec, u8 xec) bool ret = false; if (MEM_ERROR(ec)) { - u8 ll = ec & 0x3; + u8 ll = LL(ec); ret = true; if (ll == LL_L2) pr_cont("during L1 linefill from L2.\n"); else if (ll == LL_L1) - pr_cont("Data/Tag %s error.\n", RRRR_MSG(ec)); + pr_cont("Data/Tag %s error.\n", R4_MSG(ec)); else ret = false; } @@ -148,10 +148,7 @@ static bool f12h_dc_mce(u16 ec, u8 xec) static bool f10h_dc_mce(u16 ec, u8 xec) { - u8 r4 = (ec >> 4) & 0xf; - u8 ll = ec & 0x3; - - if (r4 == R4_GEN && ll == LL_L1) { + if (R4(ec) == R4_GEN && LL(ec) == LL_L1) { pr_cont("during data scrub.\n"); return true; } @@ -170,15 +167,12 @@ static bool k8_dc_mce(u16 ec, u8 xec) static bool f14h_dc_mce(u16 ec, u8 xec) { - u8 r4 = (ec >> 4) & 0xf; - u8 ll = ec & 0x3; - u8 tt = (ec >> 2) & 0x3; - u8 ii = tt; + u8 r4 = R4(ec); bool ret = true; if (MEM_ERROR(ec)) { - if (tt != TT_DATA || ll != LL_L1) + if (TT(ec) != TT_DATA || LL(ec) != LL_L1) return false; switch (r4) { @@ -198,7 +192,7 @@ static bool f14h_dc_mce(u16 ec, u8 xec) } } else if (BUS_ERROR(ec)) { - if ((ii != II_MEM && ii != II_IO) || ll != LL_LG) + if ((II(ec) != II_MEM && II(ec) != II_IO) || LL(ec) != LL_LG) return false; pr_cont("System read data error on a "); @@ -273,16 +267,14 @@ static bool f15h_dc_mce(u16 ec, u8 xec) static void amd_decode_dc_mce(struct mce *m) { - u16 ec = m->status & 0xffff; - u8 xec = (m->status >> 16) & xec_mask; + u16 ec = EC(m->status); + u8 xec = XEC(m->status, xec_mask); pr_emerg(HW_ERR "Data Cache Error: "); /* TLB error signatures are the same across families */ if (TLB_ERROR(ec)) { - u8 tt = (ec >> 2) & 0x3; - - if (tt == TT_DATA) { + if (TT(ec) == TT_DATA) { pr_cont("%s TLB %s.\n", LL_MSG(ec), ((xec == 2) ? "locked miss" : (xec ? "multimatch" : "parity"))); @@ -296,8 +288,7 @@ static void amd_decode_dc_mce(struct mce *m) static bool k8_ic_mce(u16 ec, u8 xec) { - u8 ll = ec & 0x3; - u8 r4 = (ec >> 4) & 0xf; + u8 ll = LL(ec); bool ret = true; if (!MEM_ERROR(ec)) @@ -306,7 +297,7 @@ static bool k8_ic_mce(u16 ec, u8 xec) if (ll == 0x2) pr_cont("during a linefill from L2.\n"); else if (ll == 0x1) { - switch (r4) { + switch (R4(ec)) { case R4_IRD: pr_cont("Parity error during data load.\n"); break; @@ -331,13 +322,11 @@ static bool k8_ic_mce(u16 ec, u8 xec) static bool f14h_ic_mce(u16 ec, u8 xec) { - u8 ll = ec & 0x3; - u8 tt = (ec >> 2) & 0x3; - u8 r4 = (ec >> 4) & 0xf; + u8 r4 = R4(ec); bool ret = true; if (MEM_ERROR(ec)) { - if (tt != 0 || ll != 1) + if (TT(ec) != 0 || LL(ec) != 1) ret = false; if (r4 == R4_IRD) @@ -378,8 +367,8 @@ static bool f15h_ic_mce(u16 ec, u8 xec) static void amd_decode_ic_mce(struct mce *m) { - u16 ec = m->status & 0xffff; - u8 xec = (m->status >> 16) & xec_mask; + u16 ec = EC(m->status); + u8 xec = XEC(m->status, xec_mask); pr_emerg(HW_ERR "Instruction Cache Error: "); @@ -398,8 +387,8 @@ static void amd_decode_ic_mce(struct mce *m) static void amd_decode_bu_mce(struct mce *m) { - u32 ec = m->status & 0xffff; - u32 xec = (m->status >> 16) & xec_mask; + u16 ec = EC(m->status); + u8 xec = XEC(m->status, xec_mask); pr_emerg(HW_ERR "Bus Unit Error"); @@ -408,23 +397,23 @@ static void amd_decode_bu_mce(struct mce *m) else if (xec == 0x3) pr_cont(" in the victim data buffers.\n"); else if (xec == 0x2 && MEM_ERROR(ec)) - pr_cont(": %s error in the L2 cache tags.\n", RRRR_MSG(ec)); + pr_cont(": %s error in the L2 cache tags.\n", R4_MSG(ec)); else if (xec == 0x0) { if (TLB_ERROR(ec)) pr_cont(": %s error in a Page Descriptor Cache or " "Guest TLB.\n", TT_MSG(ec)); else if (BUS_ERROR(ec)) pr_cont(": %s/ECC error in data read from NB: %s.\n", - RRRR_MSG(ec), PP_MSG(ec)); + R4_MSG(ec), PP_MSG(ec)); else if (MEM_ERROR(ec)) { - u8 rrrr = (ec >> 4) & 0xf; + u8 r4 = R4(ec); - if (rrrr >= 0x7) + if (r4 >= 0x7) pr_cont(": %s error during data copyback.\n", - RRRR_MSG(ec)); - else if (rrrr <= 0x1) + R4_MSG(ec)); + else if (r4 <= 0x1) pr_cont(": %s parity/ECC error during data " - "access from L2.\n", RRRR_MSG(ec)); + "access from L2.\n", R4_MSG(ec)); else goto wrong_bu_mce; } else @@ -440,8 +429,8 @@ wrong_bu_mce: static void amd_decode_cu_mce(struct mce *m) { - u16 ec = m->status & 0xffff; - u8 xec = (m->status >> 16) & xec_mask; + u16 ec = EC(m->status); + u8 xec = XEC(m->status, xec_mask); pr_emerg(HW_ERR "Combined Unit Error: "); @@ -480,8 +469,8 @@ wrong_cu_mce: static void amd_decode_ls_mce(struct mce *m) { - u16 ec = m->status & 0xffff; - u8 xec = (m->status >> 16) & xec_mask; + u16 ec = EC(m->status); + u8 xec = XEC(m->status, xec_mask); if (boot_cpu_data.x86 >= 0x14) { pr_emerg("You shouldn't be seeing an LS MCE on this cpu family," @@ -492,12 +481,12 @@ static void amd_decode_ls_mce(struct mce *m) pr_emerg(HW_ERR "Load Store Error"); if (xec == 0x0) { - u8 r4 = (ec >> 4) & 0xf; + u8 r4 = R4(ec); if (!BUS_ERROR(ec) || (r4 != R4_DRD && r4 != R4_DWR)) goto wrong_ls_mce; - pr_cont(" during %s.\n", RRRR_MSG(ec)); + pr_cont(" during %s.\n", R4_MSG(ec)); } else goto wrong_ls_mce; @@ -605,8 +594,8 @@ static bool nb_noop_mce(u16 ec, u8 xec) void amd_decode_nb_mce(int node_id, struct mce *m, u32 nbcfg) { - u8 xec = (m->status >> 16) & 0x1f; - u16 ec = m->status & 0xffff; + u16 ec = EC(m->status); + u8 xec = XEC(m->status, 0x1f); u32 nbsh = (u32)(m->status >> 32); pr_emerg(HW_ERR "Northbridge Error, node %d: ", node_id); @@ -668,7 +657,7 @@ EXPORT_SYMBOL_GPL(amd_decode_nb_mce); static void amd_decode_fr_mce(struct mce *m) { struct cpuinfo_x86 *c = &boot_cpu_data; - u8 xec = (m->status >> 16) & xec_mask; + u8 xec = XEC(m->status, xec_mask); if (c->x86 == 0xf || c->x86 == 0x11) goto wrong_fr_mce; @@ -694,7 +683,7 @@ wrong_fr_mce: static void amd_decode_fp_mce(struct mce *m) { - u8 xec = (m->status >> 16) & xec_mask; + u8 xec = XEC(m->status, xec_mask); pr_emerg(HW_ERR "Floating Point Unit Error: "); @@ -739,11 +728,11 @@ static inline void amd_decode_err_code(u16 ec) TT_MSG(ec), LL_MSG(ec)); } else if (MEM_ERROR(ec)) { pr_emerg(HW_ERR "Transaction: %s, Type: %s, Cache Level: %s\n", - RRRR_MSG(ec), TT_MSG(ec), LL_MSG(ec)); + R4_MSG(ec), TT_MSG(ec), LL_MSG(ec)); } else if (BUS_ERROR(ec)) { pr_emerg(HW_ERR "Transaction: %s (%s), %s, Cache Level: %s, " "Participating Processor: %s\n", - RRRR_MSG(ec), II_MSG(ec), TO_MSG(ec), LL_MSG(ec), + R4_MSG(ec), II_MSG(ec), TO_MSG(ec), LL_MSG(ec), PP_MSG(ec)); } else pr_emerg(HW_ERR "Huh? Unknown MCE error 0x%x\n", ec); diff --git a/drivers/edac/mce_amd.h b/drivers/edac/mce_amd.h index 83988471123f..45dda47173f2 100644 --- a/drivers/edac/mce_amd.h +++ b/drivers/edac/mce_amd.h @@ -7,8 +7,8 @@ #define BIT_64(n) (U64_C(1) << (n)) -#define ERROR_CODE(x) ((x) & 0xffff) -#define EXT_ERROR_CODE(x) (((x) >> 16) & 0x1f) +#define EC(x) ((x) & 0xffff) +#define XEC(x, mask) (((x) >> 16) & mask) #define LOW_SYNDROME(x) (((x) >> 15) & 0xff) #define HIGH_SYNDROME(x) (((x) >> 24) & 0xff) @@ -21,15 +21,15 @@ #define TT_MSG(x) tt_msgs[TT(x)] #define II(x) (((x) >> 2) & 0x3) #define II_MSG(x) ii_msgs[II(x)] -#define LL(x) (((x) >> 0) & 0x3) +#define LL(x) ((x) & 0x3) #define LL_MSG(x) ll_msgs[LL(x)] #define TO(x) (((x) >> 8) & 0x1) #define TO_MSG(x) to_msgs[TO(x)] #define PP(x) (((x) >> 9) & 0x3) #define PP_MSG(x) pp_msgs[PP(x)] -#define RRRR(x) (((x) >> 4) & 0xf) -#define RRRR_MSG(x) ((RRRR(x) < 9) ? rrrr_msgs[RRRR(x)] : "Wrong R4!") +#define R4(x) (((x) >> 4) & 0xf) +#define R4_MSG(x) ((R4(x) < 9) ? rrrr_msgs[R4(x)] : "Wrong R4!") #define K8_NBSH 0x4C -- cgit v1.2.3 From fa7ae8cc8c88c0679eab24c5a1b5d3b134a5f542 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 22 Sep 2010 17:42:27 +0200 Subject: EDAC, MCE: Shorten error report formatting Shorten up MCi_STATUS flags and add BD's new deferred and poison types. Also, simplify formatting. Signed-off-by: Borislav Petkov --- drivers/edac/mce_amd.c | 54 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 22 deletions(-) (limited to 'drivers') diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index 53d4dc0de343..0a19d2626686 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -723,19 +723,22 @@ wrong_fp_mce: static inline void amd_decode_err_code(u16 ec) { - if (TLB_ERROR(ec)) { - pr_emerg(HW_ERR "Transaction: %s, Cache Level: %s\n", - TT_MSG(ec), LL_MSG(ec)); - } else if (MEM_ERROR(ec)) { - pr_emerg(HW_ERR "Transaction: %s, Type: %s, Cache Level: %s\n", - R4_MSG(ec), TT_MSG(ec), LL_MSG(ec)); - } else if (BUS_ERROR(ec)) { - pr_emerg(HW_ERR "Transaction: %s (%s), %s, Cache Level: %s, " - "Participating Processor: %s\n", - R4_MSG(ec), II_MSG(ec), TO_MSG(ec), LL_MSG(ec), - PP_MSG(ec)); - } else - pr_emerg(HW_ERR "Huh? Unknown MCE error 0x%x\n", ec); + + pr_emerg(HW_ERR "cache level: %s", LL_MSG(ec)); + + if (BUS_ERROR(ec)) + pr_cont(", mem/io: %s", II_MSG(ec)); + else + pr_cont(", tx: %s", TT_MSG(ec)); + + if (MEM_ERROR(ec) || BUS_ERROR(ec)) { + pr_cont(", mem-tx: %s", R4_MSG(ec)); + + if (BUS_ERROR(ec)) + pr_cont(", part-proc: %s (%s)", PP_MSG(ec), TO_MSG(ec)); + } + + pr_cont("\n"); } /* @@ -757,25 +760,32 @@ static bool amd_filter_mce(struct mce *m) int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) { struct mce *m = (struct mce *)data; + struct cpuinfo_x86 *c = &boot_cpu_data; int node, ecc; if (amd_filter_mce(m)) return NOTIFY_STOP; - pr_emerg(HW_ERR "MC%d_STATUS: ", m->bank); + pr_emerg(HW_ERR "MC%d_STATUS[%s|%s|%s|%s|%s", + m->bank, + ((m->status & MCI_STATUS_OVER) ? "Over" : "-"), + ((m->status & MCI_STATUS_UC) ? "UE" : "CE"), + ((m->status & MCI_STATUS_MISCV) ? "MiscV" : "-"), + ((m->status & MCI_STATUS_PCC) ? "PCC" : "-"), + ((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-")); - pr_cont("%sorrected error, other errors lost: %s, " - "CPU context corrupt: %s", - ((m->status & MCI_STATUS_UC) ? "Unc" : "C"), - ((m->status & MCI_STATUS_OVER) ? "yes" : "no"), - ((m->status & MCI_STATUS_PCC) ? "yes" : "no")); + if (c->x86 == 0x15) + pr_cont("|%s|%s", + ((m->status & BIT(44)) ? "Deferred" : "-"), + ((m->status & BIT(43)) ? "Poison" : "-")); /* do the two bits[14:13] together */ ecc = (m->status >> 45) & 0x3; if (ecc) - pr_cont(", %sECC Error", ((ecc == 2) ? "C" : "U")); + pr_cont("|%sECC", ((ecc == 2) ? "C" : "U")); + + pr_cont("]: 0x%016llx\n", m->status); - pr_cont("\n"); switch (m->bank) { case 0: @@ -787,7 +797,7 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) break; case 2: - if (boot_cpu_data.x86 == 0x15) + if (c->x86 == 0x15) amd_decode_cu_mce(m); else amd_decode_bu_mce(m); -- cgit v1.2.3 From 1b07ca47ff2fbffbe09d8b0a0a25d8747a3cdcae Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 9 Nov 2010 19:41:49 +0100 Subject: EDAC, MCE: Allow F15h bank 6 MCE injection F15h adds a sixth MCE bank: adjust bank number check in the injection code. Signed-off-by: Borislav Petkov --- drivers/edac/mce_amd_inj.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/edac/mce_amd_inj.c b/drivers/edac/mce_amd_inj.c index 39faded3cadd..733a7e7a8d6f 100644 --- a/drivers/edac/mce_amd_inj.c +++ b/drivers/edac/mce_amd_inj.c @@ -88,10 +88,11 @@ static ssize_t edac_inject_bank_store(struct kobject *kobj, return -EINVAL; } - if (value > 5) { - printk(KERN_ERR "Non-existant MCE bank: %lu\n", value); - return -EINVAL; - } + if (value > 5) + if (boot_cpu_data.x86 != 0x15 || value > 6) { + printk(KERN_ERR "Non-existant MCE bank: %lu\n", value); + return -EINVAL; + } i_mce.bank = value; -- cgit v1.2.3 From bad11e031862294265145d87dd4be1ae4af0d57f Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 22 Sep 2010 17:44:51 +0200 Subject: EDAC, MCE: Enable MCE decoding on F15h Now that everything is inplace, enable MCE decoding on F15h. Make initcall routine a bit more readable. Signed-off-by: Borislav Petkov --- drivers/edac/mce_amd.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index 0a19d2626686..59ad34c506a5 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -836,18 +836,21 @@ static struct notifier_block amd_mce_dec_nb = { static int __init mce_amd_init(void) { - if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) + struct cpuinfo_x86 *c = &boot_cpu_data; + + if (c->x86_vendor != X86_VENDOR_AMD) return 0; - if ((boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x12) && - (boot_cpu_data.x86 != 0x14 || boot_cpu_data.x86_model > 0xf)) + if ((c->x86 < 0xf || c->x86 > 0x12) && + (c->x86 != 0x14 || c->x86_model > 0xf) && + (c->x86 != 0x15 || c->x86_model > 0xf)) return 0; fam_ops = kzalloc(sizeof(struct amd_decoder_ops), GFP_KERNEL); if (!fam_ops) return -ENOMEM; - switch (boot_cpu_data.x86) { + switch (c->x86) { case 0xf: fam_ops->dc_mce = k8_dc_mce; fam_ops->ic_mce = k8_ic_mce; @@ -887,8 +890,7 @@ static int __init mce_amd_init(void) break; default: - printk(KERN_WARNING "Huh? What family is that: %d?!\n", - boot_cpu_data.x86); + printk(KERN_WARNING "Huh? What family is that: %d?!\n", c->x86); kfree(fam_ops); return -EINVAL; } -- cgit v1.2.3 From 50adbbd8a8e572ad2533eace228c841ec84028a3 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 13 Nov 2010 11:44:26 -0500 Subject: EDAC, MCE: Use BIT_64() to eliminate warnings on 32-bit Building for X86_32 produces shift count warnings, so use BIT_64() to eliminate the warnings. drivers/edac/mce_amd.c:778: warning: left shift count >= width of type drivers/edac/mce_amd.c:778: warning: left shift count >= width of type Signed-off-by: Randy Dunlap Cc: Doug Thompson Cc: bluesmoke-devel@lists.sourceforge.net Signed-off-by: Borislav Petkov --- drivers/edac/mce_amd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index 59ad34c506a5..61e0e87d1841 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -776,8 +776,8 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) if (c->x86 == 0x15) pr_cont("|%s|%s", - ((m->status & BIT(44)) ? "Deferred" : "-"), - ((m->status & BIT(43)) ? "Poison" : "-")); + ((m->status & BIT_64(44)) ? "Deferred" : "-"), + ((m->status & BIT_64(43)) ? "Poison" : "-")); /* do the two bits[14:13] together */ ecc = (m->status >> 45) & 0x3; -- cgit v1.2.3 From 6d5db4668796d903dc3bad2852c82073509c37d2 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 25 Nov 2010 15:40:27 +0100 Subject: EDAC, MCE: Fix NB error formatting Minor formatting fixup since the information which core was associated with the MCE is not always valid. Signed-off-by: Borislav Petkov --- drivers/edac/mce_amd.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index 61e0e87d1841..f6cf73d93359 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -597,24 +597,27 @@ void amd_decode_nb_mce(int node_id, struct mce *m, u32 nbcfg) u16 ec = EC(m->status); u8 xec = XEC(m->status, 0x1f); u32 nbsh = (u32)(m->status >> 32); + int core = -1; - pr_emerg(HW_ERR "Northbridge Error, node %d: ", node_id); + pr_emerg(HW_ERR "Northbridge Error (node %d", node_id); - /* - * F10h, revD can disable ErrCpu[3:0] so check that first and also the - * value encoding has changed so interpret those differently - */ + /* F10h, revD can disable ErrCpu[3:0] through ErrCpuVal */ if ((boot_cpu_data.x86 == 0x10) && (boot_cpu_data.x86_model > 7)) { if (nbsh & K8_NBSH_ERR_CPU_VAL) - pr_cont(", core: %u", (u8)(nbsh & nb_err_cpumask)); + core = nbsh & nb_err_cpumask; } else { u8 assoc_cpus = nbsh & nb_err_cpumask; if (assoc_cpus > 0) - pr_cont(", core: %d", fls(assoc_cpus) - 1); + core = fls(assoc_cpus) - 1; } + if (core >= 0) + pr_cont(", core %d): ", core); + else + pr_cont("): "); + switch (xec) { case 0x2: pr_cont("Sync error (sync packets on HT link detected).\n"); -- cgit v1.2.3