summaryrefslogtreecommitdiffstats
path: root/drivers/edac/sb_edac.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/edac/sb_edac.c')
-rw-r--r--drivers/edac/sb_edac.c204
1 files changed, 154 insertions, 50 deletions
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index 07726fb00321..9353c3fc7c05 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -326,6 +326,7 @@ struct sbridge_info {
const struct interleave_pkg *interleave_pkg;
u8 max_sad;
u8 (*get_node_id)(struct sbridge_pvt *pvt);
+ u8 (*get_ha)(u8 bank);
enum mem_type (*get_memory_type)(struct sbridge_pvt *pvt);
enum dev_type (*get_width)(struct sbridge_pvt *pvt, u32 mtr);
struct pci_dev *pci_vtd;
@@ -1002,6 +1003,39 @@ static u8 knl_get_node_id(struct sbridge_pvt *pvt)
return GET_BITFIELD(reg, 0, 2);
}
+/*
+ * Use the reporting bank number to determine which memory
+ * controller (also known as "ha" for "home agent"). Sandy
+ * Bridge only has one memory controller per socket, so the
+ * answer is always zero.
+ */
+static u8 sbridge_get_ha(u8 bank)
+{
+ return 0;
+}
+
+/*
+ * On Ivy Bridge, Haswell and Broadwell the error may be in a
+ * home agent bank (7, 8), or one of the per-channel memory
+ * controller banks (9 .. 16).
+ */
+static u8 ibridge_get_ha(u8 bank)
+{
+ switch (bank) {
+ case 7 ... 8:
+ return bank - 7;
+ case 9 ... 16:
+ return (bank - 9) / 4;
+ default:
+ return 0xff;
+ }
+}
+
+/* Not used, but included for safety/symmetry */
+static u8 knl_get_ha(u8 bank)
+{
+ return 0xff;
+}
static u64 haswell_get_tolm(struct sbridge_pvt *pvt)
{
@@ -1622,7 +1656,7 @@ static int __populate_dimms(struct mem_ctl_info *mci,
size = ((u64)rows * cols * banks * ranks) >> (20 - 3);
npages = MiB_TO_PAGES(size);
- edac_dbg(0, "mc#%d: ha %d channel %d, dimm %d, %lld Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n",
+ edac_dbg(0, "mc#%d: ha %d channel %d, dimm %d, %lld MiB (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n",
pvt->sbridge_dev->mc, pvt->sbridge_dev->dom, i, j,
size, npages,
banks, ranks, rows, cols);
@@ -2207,6 +2241,60 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
return 0;
}
+static int get_memory_error_data_from_mce(struct mem_ctl_info *mci,
+ const struct mce *m, u8 *socket,
+ u8 *ha, long *channel_mask,
+ char *msg)
+{
+ u32 reg, channel = GET_BITFIELD(m->status, 0, 3);
+ struct mem_ctl_info *new_mci;
+ struct sbridge_pvt *pvt;
+ struct pci_dev *pci_ha;
+ bool tad0;
+
+ if (channel >= NUM_CHANNELS) {
+ sprintf(msg, "Invalid channel 0x%x", channel);
+ return -EINVAL;
+ }
+
+ pvt = mci->pvt_info;
+ if (!pvt->info.get_ha) {
+ sprintf(msg, "No get_ha()");
+ return -EINVAL;
+ }
+ *ha = pvt->info.get_ha(m->bank);
+ if (*ha != 0 && *ha != 1) {
+ sprintf(msg, "Impossible bank %d", m->bank);
+ return -EINVAL;
+ }
+
+ *socket = m->socketid;
+ new_mci = get_mci_for_node_id(*socket, *ha);
+ if (!new_mci) {
+ strcpy(msg, "mci socket got corrupted!");
+ return -EINVAL;
+ }
+
+ pvt = new_mci->pvt_info;
+ pci_ha = pvt->pci_ha;
+ pci_read_config_dword(pci_ha, tad_dram_rule[0], &reg);
+ tad0 = m->addr <= TAD_LIMIT(reg);
+
+ *channel_mask = 1 << channel;
+ if (pvt->mirror_mode == FULL_MIRRORING ||
+ (pvt->mirror_mode == ADDR_RANGE_MIRRORING && tad0)) {
+ *channel_mask |= 1 << ((channel + 2) % 4);
+ pvt->is_cur_addr_mirrored = true;
+ } else {
+ pvt->is_cur_addr_mirrored = false;
+ }
+
+ if (pvt->is_lockstep)
+ *channel_mask |= 1 << ((channel + 1) % 4);
+
+ return 0;
+}
+
/****************************************************************************
Device initialization routines: put/get, init/exit
****************************************************************************/
@@ -2877,10 +2965,16 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
u32 errcode = GET_BITFIELD(m->status, 0, 15);
u32 channel = GET_BITFIELD(m->status, 0, 3);
u32 optypenum = GET_BITFIELD(m->status, 4, 6);
+ /*
+ * Bits 5-0 of MCi_MISC give the least significant bit that is valid.
+ * A value 6 is for cache line aligned address, a value 12 is for page
+ * aligned address reported by patrol scrubber.
+ */
+ u32 lsb = GET_BITFIELD(m->misc, 0, 5);
long channel_mask, first_channel;
- u8 rank, socket, ha;
+ u8 rank = 0xff, socket, ha;
int rc, dimm;
- char *area_type = NULL;
+ char *area_type = "DRAM";
if (pvt->info.type != SANDY_BRIDGE)
recoverable = true;
@@ -2888,6 +2982,7 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
recoverable = GET_BITFIELD(m->status, 56, 56);
if (uncorrected_error) {
+ core_err_cnt = 1;
if (ripv) {
type = "FATAL";
tp_event = HW_EVENT_ERR_FATAL;
@@ -2911,35 +3006,27 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
* cccc = channel
* If the mask doesn't match, report an error to the parsing logic
*/
- if (! ((errcode & 0xef80) == 0x80)) {
- optype = "Can't parse: it is not a mem";
- } else {
- switch (optypenum) {
- case 0:
- optype = "generic undef request error";
- break;
- case 1:
- optype = "memory read error";
- break;
- case 2:
- optype = "memory write error";
- break;
- case 3:
- optype = "addr/cmd error";
- break;
- case 4:
- optype = "memory scrubbing error";
- break;
- default:
- optype = "reserved";
- break;
- }
+ switch (optypenum) {
+ case 0:
+ optype = "generic undef request error";
+ break;
+ case 1:
+ optype = "memory read error";
+ break;
+ case 2:
+ optype = "memory write error";
+ break;
+ case 3:
+ optype = "addr/cmd error";
+ break;
+ case 4:
+ optype = "memory scrubbing error";
+ break;
+ default:
+ optype = "reserved";
+ break;
}
- /* Only decode errors with an valid address (ADDRV) */
- if (!GET_BITFIELD(m->status, 58, 58))
- return;
-
if (pvt->info.type == KNIGHTS_LANDING) {
if (channel == 14) {
edac_dbg(0, "%s%s err_code:%04x:%04x EDRAM bank %d\n",
@@ -2972,9 +3059,13 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
optype, msg);
}
return;
- } else {
+ } else if (lsb < 12) {
rc = get_memory_error_data(mci, m->addr, &socket, &ha,
- &channel_mask, &rank, &area_type, msg);
+ &channel_mask, &rank,
+ &area_type, msg);
+ } else {
+ rc = get_memory_error_data_from_mce(mci, m, &socket, &ha,
+ &channel_mask, msg);
}
if (rc < 0)
@@ -2989,14 +3080,15 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
first_channel = find_first_bit(&channel_mask, NUM_CHANNELS);
- if (rank < 4)
+ if (rank == 0xff)
+ dimm = -1;
+ else if (rank < 4)
dimm = 0;
else if (rank < 8)
dimm = 1;
else
dimm = 2;
-
/*
* FIXME: On some memory configurations (mirror, lockstep), the
* Memory Controller can't point the error to a single DIMM. The
@@ -3045,17 +3137,11 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
{
struct mce *mce = (struct mce *)data;
struct mem_ctl_info *mci;
- struct sbridge_pvt *pvt;
char *type;
if (edac_get_report_status() == EDAC_REPORTING_DISABLED)
return NOTIFY_DONE;
- mci = get_mci_for_node_id(mce->socketid, IMC0);
- if (!mci)
- return NOTIFY_DONE;
- pvt = mci->pvt_info;
-
/*
* Just let mcelog handle it if the error is
* outside the memory controller. A memory error
@@ -3065,6 +3151,22 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
if ((mce->status & 0xefff) >> 7 != 1)
return NOTIFY_DONE;
+ /* Check ADDRV bit in STATUS */
+ if (!GET_BITFIELD(mce->status, 58, 58))
+ return NOTIFY_DONE;
+
+ /* Check MISCV bit in STATUS */
+ if (!GET_BITFIELD(mce->status, 59, 59))
+ return NOTIFY_DONE;
+
+ /* Check address type in MISC (physical address only) */
+ if (GET_BITFIELD(mce->misc, 6, 8) != 2)
+ return NOTIFY_DONE;
+
+ mci = get_mci_for_node_id(mce->socketid, IMC0);
+ if (!mci)
+ return NOTIFY_DONE;
+
if (mce->mcgstatus & MCG_STATUS_MCIP)
type = "Exception";
else
@@ -3173,6 +3275,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
pvt->info.dram_rule = ibridge_dram_rule;
pvt->info.get_memory_type = get_memory_type;
pvt->info.get_node_id = get_node_id;
+ pvt->info.get_ha = ibridge_get_ha;
pvt->info.rir_limit = rir_limit;
pvt->info.sad_limit = sad_limit;
pvt->info.interleave_mode = interleave_mode;
@@ -3197,6 +3300,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
pvt->info.dram_rule = sbridge_dram_rule;
pvt->info.get_memory_type = get_memory_type;
pvt->info.get_node_id = get_node_id;
+ pvt->info.get_ha = sbridge_get_ha;
pvt->info.rir_limit = rir_limit;
pvt->info.sad_limit = sad_limit;
pvt->info.interleave_mode = interleave_mode;
@@ -3221,6 +3325,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
pvt->info.dram_rule = ibridge_dram_rule;
pvt->info.get_memory_type = haswell_get_memory_type;
pvt->info.get_node_id = haswell_get_node_id;
+ pvt->info.get_ha = ibridge_get_ha;
pvt->info.rir_limit = haswell_rir_limit;
pvt->info.sad_limit = sad_limit;
pvt->info.interleave_mode = interleave_mode;
@@ -3245,6 +3350,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
pvt->info.dram_rule = ibridge_dram_rule;
pvt->info.get_memory_type = haswell_get_memory_type;
pvt->info.get_node_id = haswell_get_node_id;
+ pvt->info.get_ha = ibridge_get_ha;
pvt->info.rir_limit = haswell_rir_limit;
pvt->info.sad_limit = sad_limit;
pvt->info.interleave_mode = interleave_mode;
@@ -3269,6 +3375,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
pvt->info.dram_rule = knl_dram_rule;
pvt->info.get_memory_type = knl_get_memory_type;
pvt->info.get_node_id = knl_get_node_id;
+ pvt->info.get_ha = knl_get_ha;
pvt->info.rir_limit = NULL;
pvt->info.sad_limit = knl_sad_limit;
pvt->info.interleave_mode = knl_interleave_mode;
@@ -3320,17 +3427,14 @@ fail0:
return rc;
}
-#define ICPU(model, table) \
- { X86_VENDOR_INTEL, 6, model, 0, (unsigned long)&table }
-
static const struct x86_cpu_id sbridge_cpuids[] = {
- ICPU(INTEL_FAM6_SANDYBRIDGE_X, pci_dev_descr_sbridge_table),
- ICPU(INTEL_FAM6_IVYBRIDGE_X, pci_dev_descr_ibridge_table),
- ICPU(INTEL_FAM6_HASWELL_X, pci_dev_descr_haswell_table),
- ICPU(INTEL_FAM6_BROADWELL_X, pci_dev_descr_broadwell_table),
- ICPU(INTEL_FAM6_BROADWELL_XEON_D, pci_dev_descr_broadwell_table),
- ICPU(INTEL_FAM6_XEON_PHI_KNL, pci_dev_descr_knl_table),
- ICPU(INTEL_FAM6_XEON_PHI_KNM, pci_dev_descr_knl_table),
+ INTEL_CPU_FAM6(SANDYBRIDGE_X, pci_dev_descr_sbridge_table),
+ INTEL_CPU_FAM6(IVYBRIDGE_X, pci_dev_descr_ibridge_table),
+ INTEL_CPU_FAM6(HASWELL_X, pci_dev_descr_haswell_table),
+ INTEL_CPU_FAM6(BROADWELL_X, pci_dev_descr_broadwell_table),
+ INTEL_CPU_FAM6(BROADWELL_XEON_D, pci_dev_descr_broadwell_table),
+ INTEL_CPU_FAM6(XEON_PHI_KNL, pci_dev_descr_knl_table),
+ INTEL_CPU_FAM6(XEON_PHI_KNM, pci_dev_descr_knl_table),
{ }
};
MODULE_DEVICE_TABLE(x86cpu, sbridge_cpuids);