diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-06 19:53:11 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-06 19:53:11 -0700 |
commit | 275b103a26e218b3d739e5ab15be6b40303a1428 (patch) | |
tree | ba27f2d4fa77115201acc492dd482009228fefd0 /drivers/edac | |
parent | 4dd2ab9a0f84a446c65ff33c95339f1cd0e21a4b (diff) | |
parent | 8de9930a4618811edfaebc4981a9fafff2af9170 (diff) | |
download | linux-275b103a26e218b3d739e5ab15be6b40303a1428.tar.bz2 |
Merge tag 'edac_for_5.2' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp
Pull EDAC updates from Borislav Petkov:
- amd64_edac: Family 0x17, models 0x30-.. enablement (Yazen Ghannam)
- skx_*: Librarize it so that it can be shared between drivers (Qiuxu Zhuo)
- altera: Stratix10 improvements (Thor Thayer)
- The usual round of fixes, fixlets and cleanups
* tag 'edac_for_5.2' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp:
Revert "EDAC/amd64: Support more than two controllers for chip select handling"
arm64: dts: stratix10: Use new Stratix10 EDAC bindings
Documentation: dt: edac: Add Stratix10 Peripheral bindings
Documentation: dt: edac: Fix Stratix10 IRQ bindings
EDAC/altera, firmware/intel: Add Stratix10 ECC DBE SMC call
EDAC/altera: Initialize peripheral FIFOs in probe()
EDAC/altera: Do less intrusive error injection
EDAC/amd64: Adjust printed chip select sizes when interleaved
EDAC/amd64: Support more than two controllers for chip select handling
EDAC/amd64: Recognize x16 symbol size
EDAC/amd64: Set maximum channel layer size depending on family
EDAC/amd64: Support more than two Unified Memory Controllers
EDAC/amd64: Use a macro for iterating over Unified Memory Controllers
EDAC/amd64: Add Family 17h Model 30h PCI IDs
MAINTAINERS: Add entry for EDAC-I10NM
MAINTAINERS: Update entry for EDAC-SKYLAKE
EDAC, altera: Fix S10 Double Bit Error Notification
EDAC, skx, i10nm: Make skx_common.c a pure library
Diffstat (limited to 'drivers/edac')
-rw-r--r-- | drivers/edac/altera_edac.c | 262 | ||||
-rw-r--r-- | drivers/edac/altera_edac.h | 69 | ||||
-rw-r--r-- | drivers/edac/amd64_edac.c | 135 | ||||
-rw-r--r-- | drivers/edac/amd64_edac.h | 11 | ||||
-rw-r--r-- | drivers/edac/i10nm_base.c | 52 | ||||
-rw-r--r-- | drivers/edac/skx_base.c | 50 | ||||
-rw-r--r-- | drivers/edac/skx_common.c | 57 | ||||
-rw-r--r-- | drivers/edac/skx_common.h | 8 |
8 files changed, 373 insertions, 271 deletions
diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index 1bcf9aea0cdf..8816f74a22b4 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -9,6 +9,7 @@ #include <linux/ctype.h> #include <linux/delay.h> #include <linux/edac.h> +#include <linux/firmware/intel/stratix10-smc.h> #include <linux/genalloc.h> #include <linux/interrupt.h> #include <linux/irqchip/chained_irq.h> @@ -1361,8 +1362,19 @@ static const struct edac_device_prv_data a10_l2ecc_data = { #ifdef CONFIG_EDAC_ALTERA_ETHERNET +static int __init socfpga_init_ethernet_ecc(struct altr_edac_device_dev *dev) +{ + int ret; + + ret = altr_init_a10_ecc_device_type("altr,socfpga-eth-mac-ecc"); + if (ret) + return ret; + + return altr_check_ecc_deps(dev); +} + static const struct edac_device_prv_data a10_enetecc_data = { - .setup = altr_check_ecc_deps, + .setup = socfpga_init_ethernet_ecc, .ce_clear_mask = ALTR_A10_ECC_SERRPENA, .ue_clear_mask = ALTR_A10_ECC_DERRPENA, .ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL, @@ -1374,21 +1386,25 @@ static const struct edac_device_prv_data a10_enetecc_data = { .inject_fops = &altr_edac_a10_device_inject2_fops, }; -static int __init socfpga_init_ethernet_ecc(void) -{ - return altr_init_a10_ecc_device_type("altr,socfpga-eth-mac-ecc"); -} - -early_initcall(socfpga_init_ethernet_ecc); - #endif /* CONFIG_EDAC_ALTERA_ETHERNET */ /********************** NAND Device Functions **********************/ #ifdef CONFIG_EDAC_ALTERA_NAND +static int __init socfpga_init_nand_ecc(struct altr_edac_device_dev *device) +{ + int ret; + + ret = altr_init_a10_ecc_device_type("altr,socfpga-nand-ecc"); + if (ret) + return ret; + + return altr_check_ecc_deps(device); +} + static const struct edac_device_prv_data a10_nandecc_data = { - .setup = altr_check_ecc_deps, + .setup = socfpga_init_nand_ecc, .ce_clear_mask = ALTR_A10_ECC_SERRPENA, .ue_clear_mask = ALTR_A10_ECC_DERRPENA, .ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL, @@ -1400,21 +1416,25 @@ static const struct edac_device_prv_data a10_nandecc_data = { .inject_fops = &altr_edac_a10_device_inject_fops, }; -static int __init socfpga_init_nand_ecc(void) -{ - return altr_init_a10_ecc_device_type("altr,socfpga-nand-ecc"); -} - -early_initcall(socfpga_init_nand_ecc); - #endif /* CONFIG_EDAC_ALTERA_NAND */ /********************** DMA Device Functions **********************/ #ifdef CONFIG_EDAC_ALTERA_DMA +static int __init socfpga_init_dma_ecc(struct altr_edac_device_dev *device) +{ + int ret; + + ret = altr_init_a10_ecc_device_type("altr,socfpga-dma-ecc"); + if (ret) + return ret; + + return altr_check_ecc_deps(device); +} + static const struct edac_device_prv_data a10_dmaecc_data = { - .setup = altr_check_ecc_deps, + .setup = socfpga_init_dma_ecc, .ce_clear_mask = ALTR_A10_ECC_SERRPENA, .ue_clear_mask = ALTR_A10_ECC_DERRPENA, .ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL, @@ -1426,21 +1446,25 @@ static const struct edac_device_prv_data a10_dmaecc_data = { .inject_fops = &altr_edac_a10_device_inject_fops, }; -static int __init socfpga_init_dma_ecc(void) -{ - return altr_init_a10_ecc_device_type("altr,socfpga-dma-ecc"); -} - -early_initcall(socfpga_init_dma_ecc); - #endif /* CONFIG_EDAC_ALTERA_DMA */ /********************** USB Device Functions **********************/ #ifdef CONFIG_EDAC_ALTERA_USB +static int __init socfpga_init_usb_ecc(struct altr_edac_device_dev *device) +{ + int ret; + + ret = altr_init_a10_ecc_device_type("altr,socfpga-usb-ecc"); + if (ret) + return ret; + + return altr_check_ecc_deps(device); +} + static const struct edac_device_prv_data a10_usbecc_data = { - .setup = altr_check_ecc_deps, + .setup = socfpga_init_usb_ecc, .ce_clear_mask = ALTR_A10_ECC_SERRPENA, .ue_clear_mask = ALTR_A10_ECC_DERRPENA, .ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL, @@ -1452,21 +1476,25 @@ static const struct edac_device_prv_data a10_usbecc_data = { .inject_fops = &altr_edac_a10_device_inject2_fops, }; -static int __init socfpga_init_usb_ecc(void) -{ - return altr_init_a10_ecc_device_type("altr,socfpga-usb-ecc"); -} - -early_initcall(socfpga_init_usb_ecc); - #endif /* CONFIG_EDAC_ALTERA_USB */ /********************** QSPI Device Functions **********************/ #ifdef CONFIG_EDAC_ALTERA_QSPI +static int __init socfpga_init_qspi_ecc(struct altr_edac_device_dev *device) +{ + int ret; + + ret = altr_init_a10_ecc_device_type("altr,socfpga-qspi-ecc"); + if (ret) + return ret; + + return altr_check_ecc_deps(device); +} + static const struct edac_device_prv_data a10_qspiecc_data = { - .setup = altr_check_ecc_deps, + .setup = socfpga_init_qspi_ecc, .ce_clear_mask = ALTR_A10_ECC_SERRPENA, .ue_clear_mask = ALTR_A10_ECC_DERRPENA, .ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL, @@ -1478,13 +1506,6 @@ static const struct edac_device_prv_data a10_qspiecc_data = { .inject_fops = &altr_edac_a10_device_inject_fops, }; -static int __init socfpga_init_qspi_ecc(void) -{ - return altr_init_a10_ecc_device_type("altr,socfpga-qspi-ecc"); -} - -early_initcall(socfpga_init_qspi_ecc); - #endif /* CONFIG_EDAC_ALTERA_QSPI */ /********************* SDMMC Device Functions **********************/ @@ -1593,6 +1614,35 @@ err_release_group_1: return rc; } +static int __init socfpga_init_sdmmc_ecc(struct altr_edac_device_dev *device) +{ + int rc = -ENODEV; + struct device_node *child; + + child = of_find_compatible_node(NULL, NULL, "altr,socfpga-sdmmc-ecc"); + if (!child) + return -ENODEV; + + if (!of_device_is_available(child)) + goto exit; + + if (validate_parent_available(child)) + goto exit; + + /* Init portB */ + rc = altr_init_a10_ecc_block(child, ALTR_A10_SDMMC_IRQ_MASK, + a10_sdmmceccb_data.ecc_enable_mask, 1); + if (rc) + goto exit; + + /* Setup portB */ + return altr_portb_setup(device); + +exit: + of_node_put(child); + return rc; +} + static irqreturn_t altr_edac_a10_ecc_irq_portb(int irq, void *dev_id) { struct altr_edac_device_dev *ad = dev_id; @@ -1617,7 +1667,7 @@ static irqreturn_t altr_edac_a10_ecc_irq_portb(int irq, void *dev_id) } static const struct edac_device_prv_data a10_sdmmcecca_data = { - .setup = altr_portb_setup, + .setup = socfpga_init_sdmmc_ecc, .ce_clear_mask = ALTR_A10_ECC_SERRPENA, .ue_clear_mask = ALTR_A10_ECC_DERRPENA, .ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL, @@ -1630,7 +1680,7 @@ static const struct edac_device_prv_data a10_sdmmcecca_data = { }; static const struct edac_device_prv_data a10_sdmmceccb_data = { - .setup = altr_portb_setup, + .setup = socfpga_init_sdmmc_ecc, .ce_clear_mask = ALTR_A10_ECC_SERRPENB, .ue_clear_mask = ALTR_A10_ECC_DERRPENB, .ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL, @@ -1642,35 +1692,6 @@ static const struct edac_device_prv_data a10_sdmmceccb_data = { .inject_fops = &altr_edac_a10_device_inject_fops, }; -static int __init socfpga_init_sdmmc_ecc(void) -{ - int rc = -ENODEV; - struct device_node *child; - - if (!socfpga_is_a10() && !socfpga_is_s10()) - return -ENODEV; - - child = of_find_compatible_node(NULL, NULL, "altr,socfpga-sdmmc-ecc"); - if (!child) { - edac_printk(KERN_WARNING, EDAC_DEVICE, "SDMMC node not found\n"); - return -ENODEV; - } - - if (!of_device_is_available(child)) - goto exit; - - if (validate_parent_available(child)) - goto exit; - - rc = altr_init_a10_ecc_block(child, ALTR_A10_SDMMC_IRQ_MASK, - a10_sdmmcecca_data.ecc_enable_mask, 1); -exit: - of_node_put(child); - return rc; -} - -early_initcall(socfpga_init_sdmmc_ecc); - #endif /* CONFIG_EDAC_ALTERA_SDMMC */ /********************* Arria10 EDAC Device Functions *************************/ @@ -1762,28 +1783,24 @@ static ssize_t altr_edac_a10_device_trig2(struct file *file, if (trig_type == ALTR_UE_TRIGGER_CHAR) { writel(priv->ue_set_mask, set_addr); } else { - /* Setup write of 0 to first 4 bytes */ - writel(0x0, drvdata->base + ECC_BLK_WDATA0_OFST); - writel(0x0, drvdata->base + ECC_BLK_WDATA1_OFST); - writel(0x0, drvdata->base + ECC_BLK_WDATA2_OFST); - writel(0x0, drvdata->base + ECC_BLK_WDATA3_OFST); - /* Setup write of 4 bytes */ + /* Setup read/write of 4 bytes */ writel(ECC_WORD_WRITE, drvdata->base + ECC_BLK_DBYTECTRL_OFST); /* Setup Address to 0 */ - writel(0x0, drvdata->base + ECC_BLK_ADDRESS_OFST); - /* Setup accctrl to write & data override */ - writel(ECC_WRITE_DOVR, drvdata->base + ECC_BLK_ACCCTRL_OFST); - /* Kick it. */ - writel(ECC_XACT_KICK, drvdata->base + ECC_BLK_STARTACC_OFST); - /* Setup accctrl to read & ecc override */ - writel(ECC_READ_EOVR, drvdata->base + ECC_BLK_ACCCTRL_OFST); + writel(0, drvdata->base + ECC_BLK_ADDRESS_OFST); + /* Setup accctrl to read & ecc & data override */ + writel(ECC_READ_EDOVR, drvdata->base + ECC_BLK_ACCCTRL_OFST); /* Kick it. */ writel(ECC_XACT_KICK, drvdata->base + ECC_BLK_STARTACC_OFST); /* Setup write for single bit change */ - writel(0x1, drvdata->base + ECC_BLK_WDATA0_OFST); - writel(0x0, drvdata->base + ECC_BLK_WDATA1_OFST); - writel(0x0, drvdata->base + ECC_BLK_WDATA2_OFST); - writel(0x0, drvdata->base + ECC_BLK_WDATA3_OFST); + writel(readl(drvdata->base + ECC_BLK_RDATA0_OFST) ^ 0x1, + drvdata->base + ECC_BLK_WDATA0_OFST); + writel(readl(drvdata->base + ECC_BLK_RDATA1_OFST), + drvdata->base + ECC_BLK_WDATA1_OFST); + writel(readl(drvdata->base + ECC_BLK_RDATA2_OFST), + drvdata->base + ECC_BLK_WDATA2_OFST); + writel(readl(drvdata->base + ECC_BLK_RDATA3_OFST), + drvdata->base + ECC_BLK_WDATA3_OFST); + /* Copy Read ECC to Write ECC */ writel(readl(drvdata->base + ECC_BLK_RECC0_OFST), drvdata->base + ECC_BLK_WECC0_OFST); @@ -1930,6 +1947,15 @@ static int altr_edac_a10_device_add(struct altr_arria10_edac *edac, goto err_release_group1; } +#ifdef CONFIG_ARCH_STRATIX10 + /* Use IRQ to determine SError origin instead of assigning IRQ */ + rc = of_property_read_u32_index(np, "interrupts", 0, &altdev->db_irq); + if (rc) { + edac_printk(KERN_ERR, EDAC_DEVICE, + "Unable to parse DB IRQ index\n"); + goto err_release_group1; + } +#else altdev->db_irq = irq_of_parse_and_map(np, 1); if (!altdev->db_irq) { edac_printk(KERN_ERR, EDAC_DEVICE, "Error allocating DBIRQ\n"); @@ -1943,6 +1969,7 @@ static int altr_edac_a10_device_add(struct altr_arria10_edac *edac, edac_printk(KERN_ERR, EDAC_DEVICE, "No DBERR IRQ resource\n"); goto err_release_group1; } +#endif rc = edac_device_add_device(dci); if (rc) { @@ -2005,6 +2032,10 @@ static const struct irq_domain_ops a10_eccmgr_ic_ops = { /************** Stratix 10 EDAC Double Bit Error Handler ************/ #define to_a10edac(p, m) container_of(p, struct altr_arria10_edac, m) +#ifdef CONFIG_ARCH_STRATIX10 +/* panic routine issues reboot on non-zero panic_timeout */ +extern int panic_timeout; + /* * The double bit error is handled through SError which is fatal. This is * called as a panic notifier to printout ECC error info as part of the panic. @@ -2018,17 +2049,37 @@ static int s10_edac_dberr_handler(struct notifier_block *this, regmap_read(edac->ecc_mgr_map, S10_SYSMGR_ECC_INTSTAT_DERR_OFST, &dberror); regmap_write(edac->ecc_mgr_map, S10_SYSMGR_UE_VAL_OFST, dberror); - if (dberror & S10_DDR0_IRQ_MASK) { - regmap_read(edac->ecc_mgr_map, A10_DERRADDR_OFST, &err_addr); - regmap_write(edac->ecc_mgr_map, S10_SYSMGR_UE_ADDR_OFST, - err_addr); - edac_printk(KERN_ERR, EDAC_MC, - "EDAC: [Uncorrectable errors @ 0x%08X]\n\n", - err_addr); + if (dberror & S10_DBE_IRQ_MASK) { + struct list_head *position; + struct altr_edac_device_dev *ed; + struct arm_smccc_res result; + + /* Find the matching DBE in the list of devices */ + list_for_each(position, &edac->a10_ecc_devices) { + ed = list_entry(position, struct altr_edac_device_dev, + next); + if (!(BIT(ed->db_irq) & dberror)) + continue; + + writel(ALTR_A10_ECC_DERRPENA, + ed->base + ALTR_A10_ECC_INTSTAT_OFST); + err_addr = readl(ed->base + ALTR_S10_DERR_ADDRA_OFST); + regmap_write(edac->ecc_mgr_map, + S10_SYSMGR_UE_ADDR_OFST, err_addr); + edac_printk(KERN_ERR, EDAC_DEVICE, + "EDAC: [Fatal DBE on %s @ 0x%08X]\n", + ed->edac_dev_name, err_addr); + break; + } + /* Notify the System through SMC. Reboot delay = 1 second */ + panic_timeout = 1; + arm_smccc_smc(INTEL_SIP_SMC_ECC_DBE, dberror, 0, 0, 0, 0, + 0, 0, &result); } return NOTIFY_DONE; } +#endif /****************** Arria 10 EDAC Probe Function *********************/ static int altr_edac_a10_probe(struct platform_device *pdev) @@ -2098,16 +2149,8 @@ static int altr_edac_a10_probe(struct platform_device *pdev) altr_edac_a10_irq_handler, edac); - if (socfpga_is_a10()) { - edac->db_irq = platform_get_irq(pdev, 1); - if (edac->db_irq < 0) { - dev_err(&pdev->dev, "No DBERR IRQ resource\n"); - return edac->db_irq; - } - irq_set_chained_handler_and_data(edac->db_irq, - altr_edac_a10_irq_handler, - edac); - } else { +#ifdef CONFIG_ARCH_STRATIX10 + { int dberror, err_addr; edac->panic_notifier.notifier_call = s10_edac_dberr_handler; @@ -2130,6 +2173,15 @@ static int altr_edac_a10_probe(struct platform_device *pdev) S10_SYSMGR_UE_ADDR_OFST, 0); } } +#else + edac->db_irq = platform_get_irq(pdev, 1); + if (edac->db_irq < 0) { + dev_err(&pdev->dev, "No DBERR IRQ resource\n"); + return edac->db_irq; + } + irq_set_chained_handler_and_data(edac->db_irq, + altr_edac_a10_irq_handler, edac); +#endif for_each_child_of_node(pdev->dev.of_node, child) { if (!of_device_is_available(child)) diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h index f8664bac9fa8..55654cc4bcdf 100644 --- a/drivers/edac/altera_edac.h +++ b/drivers/edac/altera_edac.h @@ -289,6 +289,7 @@ struct altr_sdram_mc_data { #define ALTR_A10_ECC_INIT_WATCHDOG_10US 10000 /************* Stratix10 Defines **************/ +#define ALTR_S10_DERR_ADDRA_OFST 0x2C /* Stratix10 ECC Manager Defines */ #define S10_SYSMGR_ECC_INTMASK_CLR_OFST 0x98 @@ -299,6 +300,7 @@ struct altr_sdram_mc_data { #define S10_SYSMGR_UE_ADDR_OFST 0x224 #define S10_DDR0_IRQ_MASK BIT(16) +#define S10_DBE_IRQ_MASK 0x3FE /* Define ECC Block Offsets for peripherals */ #define ECC_BLK_ADDRESS_OFST 0x40 @@ -319,7 +321,7 @@ struct altr_sdram_mc_data { #define ECC_BLK_STARTACC_OFST 0x7C #define ECC_XACT_KICK 0x10000 -#define ECC_WORD_WRITE 0xF +#define ECC_WORD_WRITE 0xFF #define ECC_WRITE_DOVR 0x101 #define ECC_WRITE_EDOVR 0x103 #define ECC_READ_EOVR 0x2 @@ -370,69 +372,4 @@ struct altr_arria10_edac { struct notifier_block panic_notifier; }; -/* - * Functions specified by ARM SMC Calling convention: - * - * FAST call executes atomic operations, returns when the requested operation - * has completed. - * STD call starts a operation which can be preempted by a non-secure - * interrupt. The call can return before the requested operation has - * completed. - * - * a0..a7 is used as register names in the descriptions below, on arm32 - * that translates to r0..r7 and on arm64 to w0..w7. - */ - -#define INTEL_SIP_SMC_STD_CALL_VAL(func_num) \ - ARM_SMCCC_CALL_VAL(ARM_SMCCC_STD_CALL, ARM_SMCCC_SMC_64, \ - ARM_SMCCC_OWNER_SIP, (func_num)) - -#define INTEL_SIP_SMC_FAST_CALL_VAL(func_num) \ - ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, ARM_SMCCC_SMC_64, \ - ARM_SMCCC_OWNER_SIP, (func_num)) - -#define INTEL_SIP_SMC_RETURN_UNKNOWN_FUNCTION 0xFFFFFFFF -#define INTEL_SIP_SMC_STATUS_OK 0x0 -#define INTEL_SIP_SMC_REG_ERROR 0x5 - -/* - * Request INTEL_SIP_SMC_REG_READ - * - * Read a protected register using SMCCC - * - * Call register usage: - * a0: INTEL_SIP_SMC_REG_READ. - * a1: register address. - * a2-7: not used. - * - * Return status: - * a0: INTEL_SIP_SMC_STATUS_OK, INTEL_SIP_SMC_REG_ERROR, or - * INTEL_SIP_SMC_RETURN_UNKNOWN_FUNCTION - * a1: Value in the register - * a2-3: not used. - */ -#define INTEL_SIP_SMC_FUNCID_REG_READ 7 -#define INTEL_SIP_SMC_REG_READ \ - INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_REG_READ) - -/* - * Request INTEL_SIP_SMC_REG_WRITE - * - * Write a protected register using SMCCC - * - * Call register usage: - * a0: INTEL_SIP_SMC_REG_WRITE. - * a1: register address - * a2: value to program into register. - * a3-7: not used. - * - * Return status: - * a0: INTEL_SIP_SMC_STATUS_OK, INTEL_SIP_SMC_REG_ERROR, or - * INTEL_SIP_SMC_RETURN_UNKNOWN_FUNCTION - * a1-3: not used. - */ -#define INTEL_SIP_SMC_FUNCID_REG_WRITE 8 -#define INTEL_SIP_SMC_REG_WRITE \ - INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_REG_WRITE) - #endif /* #ifndef _ALTERA_EDAC_H */ diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 6ea98575a402..e2a99466faaa 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -18,6 +18,9 @@ static struct msr __percpu *msrs; /* Per-node stuff */ static struct ecc_settings **ecc_stngs; +/* Number of Unified Memory Controllers */ +static u8 num_umcs; + /* * Valid scrub rates for the K8 hardware memory scrubber. We map the scrubbing * bandwidth to a valid bit pattern. The 'set' operation finds the 'matching- @@ -449,6 +452,9 @@ static void get_cs_base_and_mask(struct amd64_pvt *pvt, int csrow, u8 dct, #define for_each_chip_select_mask(i, dct, pvt) \ for (i = 0; i < pvt->csels[dct].m_cnt; i++) +#define for_each_umc(i) \ + for (i = 0; i < num_umcs; i++) + /* * @input_addr is an InputAddr associated with the node given by mci. Return the * csrow that input_addr maps to, or -1 on failure (no csrow claims input_addr). @@ -722,7 +728,7 @@ static unsigned long determine_edac_cap(struct amd64_pvt *pvt) if (pvt->umc) { u8 i, umc_en_mask = 0, dimm_ecc_en_mask = 0; - for (i = 0; i < NUM_UMCS; i++) { + for_each_umc(i) { if (!(pvt->umc[i].sdp_ctrl & UMC_SDP_INIT)) continue; @@ -781,6 +787,22 @@ static void debug_dump_dramcfg_low(struct amd64_pvt *pvt, u32 dclr, int chan) (dclr & BIT(15)) ? "yes" : "no"); } +/* + * The Address Mask should be a contiguous set of bits in the non-interleaved + * case. So to check for CS interleaving, find the most- and least-significant + * bits of the mask, generate a contiguous bitmask, and compare the two. + */ +static bool f17_cs_interleaved(struct amd64_pvt *pvt, u8 ctrl, int cs) +{ + u32 mask = pvt->csels[ctrl].csmasks[cs >> 1]; + u32 msb = fls(mask) - 1, lsb = ffs(mask) - 1; + u32 test_mask = GENMASK(msb, lsb); + + edac_dbg(1, "mask=0x%08x test_mask=0x%08x\n", mask, test_mask); + + return mask ^ test_mask; +} + static void debug_display_dimm_sizes_df(struct amd64_pvt *pvt, u8 ctrl) { int dimm, size0, size1, cs0, cs1; @@ -797,8 +819,19 @@ static void debug_display_dimm_sizes_df(struct amd64_pvt *pvt, u8 ctrl) size1 = 0; cs1 = dimm * 2 + 1; - if (csrow_enabled(cs1, ctrl, pvt)) - size1 = pvt->ops->dbam_to_cs(pvt, ctrl, 0, cs1); + if (csrow_enabled(cs1, ctrl, pvt)) { + /* + * CS interleaving is only supported if both CSes have + * the same amount of memory. Because they are + * interleaved, it will look like both CSes have the + * full amount of memory. Save the size for both as + * half the amount we found on CS0, if interleaved. + */ + if (f17_cs_interleaved(pvt, ctrl, cs1)) + size1 = size0 = (size0 >> 1); + else + size1 = pvt->ops->dbam_to_cs(pvt, ctrl, 0, cs1); + } amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n", cs0, size0, @@ -811,7 +844,7 @@ static void __dump_misc_regs_df(struct amd64_pvt *pvt) struct amd64_umc *umc; u32 i, tmp, umc_base; - for (i = 0; i < NUM_UMCS; i++) { + for_each_umc(i) { umc_base = get_umc_base(i); umc = &pvt->umc[i]; @@ -894,8 +927,7 @@ static void dump_misc_regs(struct amd64_pvt *pvt) edac_dbg(1, " DramHoleValid: %s\n", dhar_valid(pvt) ? "yes" : "no"); - amd64_info("using %s syndromes.\n", - ((pvt->ecc_sym_sz == 8) ? "x8" : "x4")); + amd64_info("using x%u syndromes.\n", pvt->ecc_sym_sz); } /* @@ -1388,7 +1420,7 @@ static int f17_early_channel_count(struct amd64_pvt *pvt) int i, channels = 0; /* SDP Control bit 31 (SdpInit) is clear for unused UMC channels */ - for (i = 0; i < NUM_UMCS; i++) + for_each_umc(i) channels += !!(pvt->umc[i].sdp_ctrl & UMC_SDP_INIT); amd64_info("MCT channel count: %d\n", channels); @@ -2211,6 +2243,15 @@ static struct amd64_family_type family_types[] = { .dbam_to_cs = f17_base_addr_to_cs_size, } }, + [F17_M30H_CPUS] = { + .ctl_name = "F17h_M30h", + .f0_id = PCI_DEVICE_ID_AMD_17H_M30H_DF_F0, + .f6_id = PCI_DEVICE_ID_AMD_17H_M30H_DF_F6, + .ops = { + .early_channel_count = f17_early_channel_count, + .dbam_to_cs = f17_base_addr_to_cs_size, + } + }, }; /* @@ -2464,18 +2505,14 @@ static inline void decode_bus_error(int node_id, struct mce *m) * To find the UMC channel represented by this bank we need to match on its * instance_id. The instance_id of a bank is held in the lower 32 bits of its * IPID. + * + * Currently, we can derive the channel number by looking at the 6th nibble in + * the instance_id. For example, instance_id=0xYXXXXX where Y is the channel + * number. */ -static int find_umc_channel(struct amd64_pvt *pvt, struct mce *m) +static int find_umc_channel(struct mce *m) { - u32 umc_instance_id[] = {0x50f00, 0x150f00}; - u32 instance_id = m->ipid & GENMASK(31, 0); - int i, channel = -1; - - for (i = 0; i < ARRAY_SIZE(umc_instance_id); i++) - if (umc_instance_id[i] == instance_id) - channel = i; - - return channel; + return (m->ipid & GENMASK(31, 0)) >> 20; } static void decode_umc_error(int node_id, struct mce *m) @@ -2497,11 +2534,7 @@ static void decode_umc_error(int node_id, struct mce *m) if (m->status & MCI_STATUS_DEFERRED) ecc_type = 3; - err.channel = find_umc_channel(pvt, m); - if (err.channel < 0) { - err.err_code = ERR_CHANNEL; - goto log_error; - } + err.channel = find_umc_channel(m); if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, err.channel, &sys_addr)) { err.err_code = ERR_NORM_ADDR; @@ -2603,19 +2636,19 @@ static void determine_ecc_sym_sz(struct amd64_pvt *pvt) if (pvt->umc) { u8 i; - for (i = 0; i < NUM_UMCS; i++) { + for_each_umc(i) { /* Check enabled channels only: */ - if ((pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) && - (pvt->umc[i].ecc_ctrl & BIT(7))) { - pvt->ecc_sym_sz = 8; - break; + if (pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) { + if (pvt->umc[i].ecc_ctrl & BIT(9)) { + pvt->ecc_sym_sz = 16; + return; + } else if (pvt->umc[i].ecc_ctrl & BIT(7)) { + pvt->ecc_sym_sz = 8; + return; + } } } - - return; - } - - if (pvt->fam >= 0x10) { + } else if (pvt->fam >= 0x10) { u32 tmp; amd64_read_pci_cfg(pvt->F3, EXT_NB_MCA_CFG, &tmp); @@ -2639,7 +2672,7 @@ static void __read_mc_regs_df(struct amd64_pvt *pvt) u32 i, umc_base; /* Read registers from each UMC */ - for (i = 0; i < NUM_UMCS; i++) { + for_each_umc(i) { umc_base = get_umc_base(i); umc = &pvt->umc[i]; @@ -3052,7 +3085,7 @@ static bool ecc_enabled(struct pci_dev *F3, u16 nid) if (boot_cpu_data.x86 >= 0x17) { u8 umc_en_mask = 0, ecc_en_mask = 0; - for (i = 0; i < NUM_UMCS; i++) { + for_each_umc(i) { u32 base = get_umc_base(i); /* Only check enabled UMCs. */ @@ -3105,7 +3138,7 @@ f17h_determine_edac_ctl_cap(struct mem_ctl_info *mci, struct amd64_pvt *pvt) { u8 i, ecc_en = 1, cpk_en = 1; - for (i = 0; i < NUM_UMCS; i++) { + for_each_umc(i) { if (pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) { ecc_en &= !!(pvt->umc[i].umc_cap_hi & UMC_ECC_ENABLED); cpk_en &= !!(pvt->umc[i].umc_cap_hi & UMC_ECC_CHIPKILL_CAP); @@ -3203,6 +3236,10 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt) fam_type = &family_types[F17_M10H_CPUS]; pvt->ops = &family_types[F17_M10H_CPUS].ops; break; + } else if (pvt->model >= 0x30 && pvt->model <= 0x3f) { + fam_type = &family_types[F17_M30H_CPUS]; + pvt->ops = &family_types[F17_M30H_CPUS].ops; + break; } /* fall through */ case 0x18: @@ -3236,6 +3273,22 @@ static const struct attribute_group *amd64_edac_attr_groups[] = { NULL }; +/* Set the number of Unified Memory Controllers in the system. */ +static void compute_num_umcs(void) +{ + u8 model = boot_cpu_data.x86_model; + + if (boot_cpu_data.x86 < 0x17) + return; + + if (model >= 0x30 && model <= 0x3f) + num_umcs = 8; + else + num_umcs = 2; + + edac_dbg(1, "Number of UMCs: %x", num_umcs); +} + static int init_one_instance(unsigned int nid) { struct pci_dev *F3 = node_to_amd_nb(nid)->misc; @@ -3260,7 +3313,7 @@ static int init_one_instance(unsigned int nid) goto err_free; if (pvt->fam >= 0x17) { - pvt->umc = kcalloc(NUM_UMCS, sizeof(struct amd64_umc), GFP_KERNEL); + pvt->umc = kcalloc(num_umcs, sizeof(struct amd64_umc), GFP_KERNEL); if (!pvt->umc) { ret = -ENOMEM; goto err_free; @@ -3299,8 +3352,14 @@ static int init_one_instance(unsigned int nid) * Always allocate two channels since we can have setups with DIMMs on * only one channel. Also, this simplifies handling later for the price * of a couple of KBs tops. + * + * On Fam17h+, the number of controllers may be greater than two. So set + * the size equal to the maximum number of UMCs. */ - layers[1].size = 2; + if (pvt->fam >= 0x17) + layers[1].size = num_umcs; + else + layers[1].size = 2; layers[1].is_virt_csrow = false; mci = edac_mc_alloc(nid, ARRAY_SIZE(layers), layers, 0); @@ -3481,6 +3540,8 @@ static int __init amd64_edac_init(void) if (!msrs) goto err_free; + compute_num_umcs(); + for (i = 0; i < amd_nb_num(); i++) { err = probe_one_instance(i); if (err) { diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 4242f8e39c18..8f66472f7adc 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -117,6 +117,8 @@ #define PCI_DEVICE_ID_AMD_17H_DF_F6 0x1466 #define PCI_DEVICE_ID_AMD_17H_M10H_DF_F0 0x15e8 #define PCI_DEVICE_ID_AMD_17H_M10H_DF_F6 0x15ee +#define PCI_DEVICE_ID_AMD_17H_M30H_DF_F0 0x1490 +#define PCI_DEVICE_ID_AMD_17H_M30H_DF_F6 0x1496 /* * Function 1 - Address Map @@ -272,8 +274,6 @@ #define UMC_SDP_INIT BIT(31) -#define NUM_UMCS 2 - enum amd_families { K8_CPUS = 0, F10_CPUS, @@ -284,6 +284,7 @@ enum amd_families { F16_M30H_CPUS, F17_CPUS, F17_M10H_CPUS, + F17_M30H_CPUS, NUM_FAMILIES, }; @@ -363,7 +364,7 @@ struct amd64_pvt { u32 dct_sel_hi; /* DRAM Controller Select High */ u32 online_spare; /* On-Line spare Reg */ - /* x4 or x8 syndromes in use */ + /* x4, x8, or x16 syndromes in use */ u8 ecc_sym_sz; /* place to store error injection parameters prior to issue */ @@ -396,8 +397,8 @@ struct err_info { static inline u32 get_umc_base(u8 channel) { - /* ch0: 0x50000, ch1: 0x150000 */ - return 0x50000 + (!!channel << 20); + /* chY: 0xY50000 */ + return 0x50000 + (channel << 20); } static inline u64 get_dram_base(struct amd64_pvt *pvt, u8 i) diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index c334fb7c63df..6f06aec4877c 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -181,6 +181,54 @@ static struct notifier_block i10nm_mce_dec = { .priority = MCE_PRIO_EDAC, }; +#ifdef CONFIG_EDAC_DEBUG +/* + * Debug feature. + * Exercise the address decode logic by writing an address to + * /sys/kernel/debug/edac/i10nm_test/addr. + */ +static struct dentry *i10nm_test; + +static int debugfs_u64_set(void *data, u64 val) +{ + struct mce m; + + pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val); + + memset(&m, 0, sizeof(m)); + /* ADDRV + MemRd + Unknown channel */ + m.status = MCI_STATUS_ADDRV + 0x90; + /* One corrected error */ + m.status |= BIT_ULL(MCI_STATUS_CEC_SHIFT); + m.addr = val; + skx_mce_check_error(NULL, 0, &m); + + return 0; +} +DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n"); + +static void setup_i10nm_debug(void) +{ + i10nm_test = edac_debugfs_create_dir("i10nm_test"); + if (!i10nm_test) + return; + + if (!edac_debugfs_create_file("addr", 0200, i10nm_test, + NULL, &fops_u64_wo)) { + debugfs_remove(i10nm_test); + i10nm_test = NULL; + } +} + +static void teardown_i10nm_debug(void) +{ + debugfs_remove_recursive(i10nm_test); +} +#else +static inline void setup_i10nm_debug(void) {} +static inline void teardown_i10nm_debug(void) {} +#endif /*CONFIG_EDAC_DEBUG*/ + static int __init i10nm_init(void) { u8 mc = 0, src_id = 0, node_id = 0; @@ -249,7 +297,7 @@ static int __init i10nm_init(void) opstate_init(); mce_register_decode_chain(&i10nm_mce_dec); - setup_skx_debug("i10nm_test"); + setup_i10nm_debug(); i10nm_printk(KERN_INFO, "%s\n", I10NM_REVISION); @@ -262,7 +310,7 @@ fail: static void __exit i10nm_exit(void) { edac_dbg(2, "\n"); - teardown_skx_debug(); + teardown_i10nm_debug(); mce_unregister_decode_chain(&i10nm_mce_dec); skx_adxl_put(); skx_remove(); diff --git a/drivers/edac/skx_base.c b/drivers/edac/skx_base.c index adae4c848ca1..a5c8fa3a249a 100644 --- a/drivers/edac/skx_base.c +++ b/drivers/edac/skx_base.c @@ -540,6 +540,54 @@ static struct notifier_block skx_mce_dec = { .priority = MCE_PRIO_EDAC, }; +#ifdef CONFIG_EDAC_DEBUG +/* + * Debug feature. + * Exercise the address decode logic by writing an address to + * /sys/kernel/debug/edac/skx_test/addr. + */ +static struct dentry *skx_test; + +static int debugfs_u64_set(void *data, u64 val) +{ + struct mce m; + + pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val); + + memset(&m, 0, sizeof(m)); + /* ADDRV + MemRd + Unknown channel */ + m.status = MCI_STATUS_ADDRV + 0x90; + /* One corrected error */ + m.status |= BIT_ULL(MCI_STATUS_CEC_SHIFT); + m.addr = val; + skx_mce_check_error(NULL, 0, &m); + + return 0; +} +DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n"); + +static void setup_skx_debug(void) +{ + skx_test = edac_debugfs_create_dir("skx_test"); + if (!skx_test) + return; + + if (!edac_debugfs_create_file("addr", 0200, skx_test, + NULL, &fops_u64_wo)) { + debugfs_remove(skx_test); + skx_test = NULL; + } +} + +static void teardown_skx_debug(void) +{ + debugfs_remove_recursive(skx_test); +} +#else +static inline void setup_skx_debug(void) {} +static inline void teardown_skx_debug(void) {} +#endif /*CONFIG_EDAC_DEBUG*/ + /* * skx_init: * make sure we are running on the correct cpu model @@ -619,7 +667,7 @@ static int __init skx_init(void) /* Ensure that the OPSTATE is set correctly for POLL or NMI */ opstate_init(); - setup_skx_debug("skx_test"); + setup_skx_debug(); mce_register_decode_chain(&skx_mce_dec); diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index 0e96e7b5b0a7..b0dddcfa9baa 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -1,7 +1,15 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Common codes for both the skx_edac driver and Intel 10nm server EDAC driver. - * Originally split out from the skx_edac driver. + * + * Shared code by both skx_edac and i10nm_edac. Originally split out + * from the skx_edac driver. + * + * This file is linked into both skx_edac and i10nm_edac drivers. In + * order to avoid link errors, this file must be like a pure library + * without including symbols and defines which would otherwise conflict, + * when linked once into a module and into a built-in object, at the + * same time. For example, __this_module symbol references when that + * file is being linked into a built-in object. * * Copyright (c) 2018, Intel Corporation. */ @@ -644,48 +652,3 @@ void skx_remove(void) kfree(d); } } - -#ifdef CONFIG_EDAC_DEBUG -/* - * Debug feature. - * Exercise the address decode logic by writing an address to - * /sys/kernel/debug/edac/dirname/addr. - */ -static struct dentry *skx_test; - -static int debugfs_u64_set(void *data, u64 val) -{ - struct mce m; - - pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val); - - memset(&m, 0, sizeof(m)); - /* ADDRV + MemRd + Unknown channel */ - m.status = MCI_STATUS_ADDRV + 0x90; - /* One corrected error */ - m.status |= BIT_ULL(MCI_STATUS_CEC_SHIFT); - m.addr = val; - skx_mce_check_error(NULL, 0, &m); - - return 0; -} -DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n"); - -void setup_skx_debug(const char *dirname) -{ - skx_test = edac_debugfs_create_dir(dirname); - if (!skx_test) - return; - - if (!edac_debugfs_create_file("addr", 0200, skx_test, - NULL, &fops_u64_wo)) { - debugfs_remove(skx_test); - skx_test = NULL; - } -} - -void teardown_skx_debug(void) -{ - debugfs_remove_recursive(skx_test); -} -#endif /*CONFIG_EDAC_DEBUG*/ diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index d25374e34d4f..d18fa98669af 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -141,12 +141,4 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val, void skx_remove(void); -#ifdef CONFIG_EDAC_DEBUG -void setup_skx_debug(const char *dirname); -void teardown_skx_debug(void); -#else -static inline void setup_skx_debug(const char *dirname) {} -static inline void teardown_skx_debug(void) {} -#endif /*CONFIG_EDAC_DEBUG*/ - #endif /* _SKX_COMM_EDAC_H */ |