From 89a76171bf50bd20d44338408b8c09433c302956 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Fri, 10 Jan 2020 01:56:47 +0000 Subject: x86/MCE/AMD, EDAC/mce_amd: Add new Load Store unit McaType Add support for a new version of the Load Store unit bank type as indicated by its McaType value, which will be present in future SMCA systems. Add the new (HWID, MCATYPE) tuple. Reuse the same name, since this is logically the same to the user. Also, add the new error descriptions to edac_mce_amd. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20200110015651.14887-2-Yazen.Ghannam@amd.com --- drivers/edac/mce_amd.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'drivers/edac') diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index ea622c6f3a39..aa6ea53f7d14 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -175,6 +175,33 @@ static const char * const smca_ls_mce_desc[] = { "L2 Fill Data error", }; +static const char * const smca_ls2_mce_desc[] = { + "An ECC error was detected on a data cache read by a probe or victimization", + "An ECC error or L2 poison was detected on a data cache read by a load", + "An ECC error was detected on a data cache read-modify-write by a store", + "An ECC error or poison bit mismatch was detected on a tag read by a probe or victimization", + "An ECC error or poison bit mismatch was detected on a tag read by a load", + "An ECC error or poison bit mismatch was detected on a tag read by a store", + "An ECC error was detected on an EMEM read by a load", + "An ECC error was detected on an EMEM read-modify-write by a store", + "A parity error was detected in an L1 TLB entry by any access", + "A parity error was detected in an L2 TLB entry by any access", + "A parity error was detected in a PWC entry by any access", + "A parity error was detected in an STQ entry by any access", + "A parity error was detected in an LDQ entry by any access", + "A parity error was detected in a MAB entry by any access", + "A parity error was detected in an SCB entry state field by any access", + "A parity error was detected in an SCB entry address field by any access", + "A parity error was detected in an SCB entry data field by any access", + "A parity error was detected in a WCB entry by any access", + "A poisoned line was detected in an SCB entry by any access", + "A SystemReadDataError error was reported on read data returned from L2 for a load", + "A SystemReadDataError error was reported on read data returned from L2 for an SCB store", + "A SystemReadDataError error was reported on read data returned from L2 for a WCB store", + "A hardware assertion error was reported", + "A parity error was detected in an STLF, SCB EMEM entry or SRB store data by any access", +}; + static const char * const smca_if_mce_desc[] = { "Op Cache Microtag Probe Port Parity Error", "IC Microtag or Full Tag Multi-hit Error", @@ -378,6 +405,7 @@ struct smca_mce_desc { static struct smca_mce_desc smca_mce_descs[] = { [SMCA_LS] = { smca_ls_mce_desc, ARRAY_SIZE(smca_ls_mce_desc) }, + [SMCA_LS_V2] = { smca_ls2_mce_desc, ARRAY_SIZE(smca_ls2_mce_desc) }, [SMCA_IF] = { smca_if_mce_desc, ARRAY_SIZE(smca_if_mce_desc) }, [SMCA_L2_CACHE] = { smca_l2_mce_desc, ARRAY_SIZE(smca_l2_mce_desc) }, [SMCA_DE] = { smca_de_mce_desc, ARRAY_SIZE(smca_de_mce_desc) }, -- cgit v1.2.3 From 9f6aef86315ac31481a288ba1b3f43b2aac93757 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Fri, 10 Jan 2020 01:56:48 +0000 Subject: EDAC/mce_amd: Always load on SMCA systems MCA error decoding on SMCA systems is not dependent on family. Return success early if the system supports the SMCA feature. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20200110015651.14887-3-Yazen.Ghannam@amd.com --- drivers/edac/mce_amd.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'drivers/edac') diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index aa6ea53f7d14..524c63fdad42 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -1189,6 +1189,11 @@ static int __init mce_amd_init(void) if (!fam_ops) return -ENOMEM; + if (boot_cpu_has(X86_FEATURE_SMCA)) { + xec_mask = 0x3f; + goto out; + } + switch (c->x86) { case 0xf: fam_ops->mc0_mce = k8_mc0_mce; @@ -1237,11 +1242,8 @@ static int __init mce_amd_init(void) case 0x17: case 0x18: - xec_mask = 0x3f; - if (!boot_cpu_has(X86_FEATURE_SMCA)) { - printk(KERN_WARNING "Decoding supported only on Scalable MCA processors.\n"); - goto err_out; - } + pr_warn("Decoding supported only on Scalable MCA processors.\n"); + goto err_out; break; default: @@ -1249,6 +1251,7 @@ static int __init mce_amd_init(void) goto err_out; } +out: pr_info("MCE: In-kernel MCE decoding enabled.\n"); mce_register_decode_chain(&amd_mce_dec_nb); -- cgit v1.2.3 From 2eb61c91c3e2738218e55f2eaf7e78a4435c233d Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Fri, 10 Jan 2020 01:56:50 +0000 Subject: EDAC/amd64: Add family ops for Family 19h Models 00h-0Fh Add family ops to support AMD Family 19h systems. Existing Family 17h functions can be used. Also, add Family 19h to the list of families to automatically load the module. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20200110015651.14887-5-Yazen.Ghannam@amd.com --- drivers/edac/amd64_edac.c | 17 +++++++++++++++++ drivers/edac/amd64_edac.h | 3 +++ 2 files changed, 20 insertions(+) (limited to 'drivers/edac') diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 428ce98f6776..2488cbf76814 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -2336,6 +2336,16 @@ static struct amd64_family_type family_types[] = { .dbam_to_cs = f17_addr_mask_to_cs_size, } }, + [F19_CPUS] = { + .ctl_name = "F19h", + .f0_id = PCI_DEVICE_ID_AMD_19H_DF_F0, + .f6_id = PCI_DEVICE_ID_AMD_19H_DF_F6, + .max_mcs = 8, + .ops = { + .early_channel_count = f17_early_channel_count, + .dbam_to_cs = f17_addr_mask_to_cs_size, + } + }, }; /* @@ -3368,6 +3378,12 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt) family_types[F17_CPUS].ctl_name = "F18h"; break; + case 0x19: + fam_type = &family_types[F19_CPUS]; + pvt->ops = &family_types[F19_CPUS].ops; + family_types[F19_CPUS].ctl_name = "F19h"; + break; + default: amd64_err("Unsupported family!\n"); return NULL; @@ -3626,6 +3642,7 @@ static const struct x86_cpu_id amd64_cpuids[] = { { X86_VENDOR_AMD, 0x16, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, { X86_VENDOR_AMD, 0x17, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, { X86_VENDOR_HYGON, 0x18, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, + { X86_VENDOR_AMD, 0x19, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, { } }; MODULE_DEVICE_TABLE(x86cpu, amd64_cpuids); diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 9be31688110b..abbf3c274d74 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -122,6 +122,8 @@ #define PCI_DEVICE_ID_AMD_17H_M30H_DF_F6 0x1496 #define PCI_DEVICE_ID_AMD_17H_M70H_DF_F0 0x1440 #define PCI_DEVICE_ID_AMD_17H_M70H_DF_F6 0x1446 +#define PCI_DEVICE_ID_AMD_19H_DF_F0 0x1650 +#define PCI_DEVICE_ID_AMD_19H_DF_F6 0x1656 /* * Function 1 - Address Map @@ -292,6 +294,7 @@ enum amd_families { F17_M10H_CPUS, F17_M30H_CPUS, F17_M70H_CPUS, + F19_CPUS, NUM_FAMILIES, }; -- cgit v1.2.3 From dcd01394ce7cd7d25bb15c81ad2e804d8090611f Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Fri, 10 Jan 2020 01:56:51 +0000 Subject: EDAC/amd64: Drop some family checks for newer systems In general, "pvt->umc != NULL" is used to check if the system is Family 17h+. However, there are a few places that are using direct family checks. Replace the remaining family checks with a check for "pvt->umc != NULL". Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20200110015651.14887-6-Yazen.Ghannam@amd.com --- drivers/edac/amd64_edac.c | 45 +++++++++++++++++++-------------------------- 1 file changed, 19 insertions(+), 26 deletions(-) (limited to 'drivers/edac') diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 2488cbf76814..4fc9f0b06ed7 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -214,7 +214,7 @@ static int __set_scrub_rate(struct amd64_pvt *pvt, u32 new_bw, u32 min_rate) scrubval = scrubrates[i].scrubval; - if (pvt->fam == 0x17 || pvt->fam == 0x18) { + if (pvt->umc) { __f17h_set_scrubval(pvt, scrubval); } else if (pvt->fam == 0x15 && pvt->model == 0x60) { f15h_select_dct(pvt, 0); @@ -256,18 +256,7 @@ static int get_scrub_rate(struct mem_ctl_info *mci) int i, retval = -EINVAL; u32 scrubval = 0; - switch (pvt->fam) { - case 0x15: - /* Erratum #505 */ - if (pvt->model < 0x10) - f15h_select_dct(pvt, 0); - - if (pvt->model == 0x60) - amd64_read_pci_cfg(pvt->F2, F15H_M60H_SCRCTRL, &scrubval); - break; - - case 0x17: - case 0x18: + if (pvt->umc) { amd64_read_pci_cfg(pvt->F6, F17H_SCR_BASE_ADDR, &scrubval); if (scrubval & BIT(0)) { amd64_read_pci_cfg(pvt->F6, F17H_SCR_LIMIT_ADDR, &scrubval); @@ -276,11 +265,15 @@ static int get_scrub_rate(struct mem_ctl_info *mci) } else { scrubval = 0; } - break; + } else if (pvt->fam == 0x15) { + /* Erratum #505 */ + if (pvt->model < 0x10) + f15h_select_dct(pvt, 0); - default: + if (pvt->model == 0x60) + amd64_read_pci_cfg(pvt->F2, F15H_M60H_SCRCTRL, &scrubval); + } else { amd64_read_pci_cfg(pvt->F3, SCRCTRL, &scrubval); - break; } scrubval = scrubval & 0x001F; @@ -1055,6 +1048,16 @@ static void determine_memory_type(struct amd64_pvt *pvt) { u32 dram_ctrl, dcsm; + if (pvt->umc) { + if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(5)) + pvt->dram_type = MEM_LRDDR4; + else if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(4)) + pvt->dram_type = MEM_RDDR4; + else + pvt->dram_type = MEM_DDR4; + return; + } + switch (pvt->fam) { case 0xf: if (pvt->ext_model >= K8_REV_F) @@ -1100,16 +1103,6 @@ static void determine_memory_type(struct amd64_pvt *pvt) case 0x16: goto ddr3; - case 0x17: - case 0x18: - if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(5)) - pvt->dram_type = MEM_LRDDR4; - else if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(4)) - pvt->dram_type = MEM_RDDR4; - else - pvt->dram_type = MEM_DDR4; - return; - default: WARN(1, KERN_ERR "%s: Family??? 0x%x\n", __func__, pvt->fam); pvt->dram_type = MEM_EMPTY; -- cgit v1.2.3 From 86e9f9d60eb5e0c5d99ddf6b79f4d308d6453bd0 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 16 Jan 2020 17:28:39 +0100 Subject: EDAC/mce_amd: Make fam_ops static global ... and do not kmalloc a three-pointer struct. Which simplifies mce_amd_init() a bit. No functional changes. Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20200116163403.GF27148@zn.tnic --- drivers/edac/mce_amd.c | 68 +++++++++++++++++++++----------------------------- 1 file changed, 29 insertions(+), 39 deletions(-) (limited to 'drivers/edac') diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index 524c63fdad42..ea980c556f2e 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -6,7 +6,7 @@ #include "mce_amd.h" -static struct amd_decoder_ops *fam_ops; +static struct amd_decoder_ops fam_ops; static u8 xec_mask = 0xf; @@ -583,7 +583,7 @@ static void decode_mc0_mce(struct mce *m) : (xec ? "multimatch" : "parity"))); return; } - } else if (fam_ops->mc0_mce(ec, xec)) + } else if (fam_ops.mc0_mce(ec, xec)) ; else pr_emerg(HW_ERR "Corrupted MC0 MCE info?\n"); @@ -697,7 +697,7 @@ static void decode_mc1_mce(struct mce *m) pr_cont("Hardware Assert.\n"); else goto wrong_mc1_mce; - } else if (fam_ops->mc1_mce(ec, xec)) + } else if (fam_ops.mc1_mce(ec, xec)) ; else goto wrong_mc1_mce; @@ -831,7 +831,7 @@ static void decode_mc2_mce(struct mce *m) pr_emerg(HW_ERR "MC2 Error: "); - if (!fam_ops->mc2_mce(ec, xec)) + if (!fam_ops.mc2_mce(ec, xec)) pr_cont(HW_ERR "Corrupted MC2 MCE info?\n"); } @@ -1130,7 +1130,8 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) if (m->tsc) pr_emerg(HW_ERR "TSC: %llu\n", m->tsc); - if (!fam_ops) + /* Doesn't matter which member to test. */ + if (!fam_ops.mc0_mce) goto err_code; switch (m->bank) { @@ -1185,10 +1186,6 @@ static int __init mce_amd_init(void) c->x86_vendor != X86_VENDOR_HYGON) return -ENODEV; - fam_ops = kzalloc(sizeof(struct amd_decoder_ops), GFP_KERNEL); - if (!fam_ops) - return -ENOMEM; - if (boot_cpu_has(X86_FEATURE_SMCA)) { xec_mask = 0x3f; goto out; @@ -1196,59 +1193,58 @@ static int __init mce_amd_init(void) switch (c->x86) { case 0xf: - fam_ops->mc0_mce = k8_mc0_mce; - fam_ops->mc1_mce = k8_mc1_mce; - fam_ops->mc2_mce = k8_mc2_mce; + fam_ops.mc0_mce = k8_mc0_mce; + fam_ops.mc1_mce = k8_mc1_mce; + fam_ops.mc2_mce = k8_mc2_mce; break; case 0x10: - fam_ops->mc0_mce = f10h_mc0_mce; - fam_ops->mc1_mce = k8_mc1_mce; - fam_ops->mc2_mce = k8_mc2_mce; + fam_ops.mc0_mce = f10h_mc0_mce; + fam_ops.mc1_mce = k8_mc1_mce; + fam_ops.mc2_mce = k8_mc2_mce; break; case 0x11: - fam_ops->mc0_mce = k8_mc0_mce; - fam_ops->mc1_mce = k8_mc1_mce; - fam_ops->mc2_mce = k8_mc2_mce; + fam_ops.mc0_mce = k8_mc0_mce; + fam_ops.mc1_mce = k8_mc1_mce; + fam_ops.mc2_mce = k8_mc2_mce; break; case 0x12: - fam_ops->mc0_mce = f12h_mc0_mce; - fam_ops->mc1_mce = k8_mc1_mce; - fam_ops->mc2_mce = k8_mc2_mce; + fam_ops.mc0_mce = f12h_mc0_mce; + fam_ops.mc1_mce = k8_mc1_mce; + fam_ops.mc2_mce = k8_mc2_mce; break; case 0x14: - fam_ops->mc0_mce = cat_mc0_mce; - fam_ops->mc1_mce = cat_mc1_mce; - fam_ops->mc2_mce = k8_mc2_mce; + fam_ops.mc0_mce = cat_mc0_mce; + fam_ops.mc1_mce = cat_mc1_mce; + fam_ops.mc2_mce = k8_mc2_mce; break; case 0x15: xec_mask = c->x86_model == 0x60 ? 0x3f : 0x1f; - fam_ops->mc0_mce = f15h_mc0_mce; - fam_ops->mc1_mce = f15h_mc1_mce; - fam_ops->mc2_mce = f15h_mc2_mce; + fam_ops.mc0_mce = f15h_mc0_mce; + fam_ops.mc1_mce = f15h_mc1_mce; + fam_ops.mc2_mce = f15h_mc2_mce; break; case 0x16: xec_mask = 0x1f; - fam_ops->mc0_mce = cat_mc0_mce; - fam_ops->mc1_mce = cat_mc1_mce; - fam_ops->mc2_mce = f16h_mc2_mce; + fam_ops.mc0_mce = cat_mc0_mce; + fam_ops.mc1_mce = cat_mc1_mce; + fam_ops.mc2_mce = f16h_mc2_mce; break; case 0x17: case 0x18: pr_warn("Decoding supported only on Scalable MCA processors.\n"); - goto err_out; - break; + return -EINVAL; default: printk(KERN_WARNING "Huh? What family is it: 0x%x?!\n", c->x86); - goto err_out; + return -EINVAL; } out: @@ -1257,11 +1253,6 @@ out: mce_register_decode_chain(&amd_mce_dec_nb); return 0; - -err_out: - kfree(fam_ops); - fam_ops = NULL; - return -EINVAL; } early_initcall(mce_amd_init); @@ -1269,7 +1260,6 @@ early_initcall(mce_amd_init); static void __exit mce_amd_exit(void) { mce_unregister_decode_chain(&amd_mce_dec_nb); - kfree(fam_ops); } MODULE_DESCRIPTION("AMD MCE decoder"); -- cgit v1.2.3