From 0b892c717714334890ea179a2dc1941a223e446f Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Sat, 29 Oct 2016 09:56:00 -0200 Subject: edac: move EDAC PCI definitions to drivers/edac/edac_pci.h The edac_core.h header contain data structures and function definitions for the 3 parts of EDAC: MC, PCI and device. Let's move the PCI ones to a separate header file, as part of a header reorganization. Signed-off-by: Mauro Carvalho Chehab --- include/linux/edac.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/edac.h b/include/linux/edac.h index cb56dcba68c6..0fec0f26469b 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -18,6 +18,8 @@ #include #include +#define EDAC_DEVICE_NAME_LEN 31 + struct device; #define EDAC_OPSTATE_INVAL -1 -- cgit v1.2.3 From e002075819d987dec3bf9fa3ca98ad19fa86ae0f Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 28 Oct 2016 15:04:52 -0200 Subject: edac: fix kenel-doc markups at edac.h As this file was never added to the driver-api, the kernel-doc markups there were never tested. Some of them have issues. Fix them. Signed-off-by: Mauro Carvalho Chehab --- include/linux/edac.h | 38 +++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/edac.h b/include/linux/edac.h index 0fec0f26469b..3c20d2d2686a 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -132,6 +132,8 @@ enum dev_type { * it for example, by re-trying the operation). * @HW_EVENT_ERR_FATAL: Fatal Error - Uncorrected error that could not * be recovered. + * @HW_EVENT_ERR_INFO: Informational - The CPER spec defines a forth + * type of error: informational logs. */ enum hw_event_mc_err_type { HW_EVENT_ERR_CORRECTED, @@ -162,7 +164,7 @@ static inline char *mc_event_error_type(const unsigned int err_type) * enum mem_type - memory types. For a more detailed reference, please see * http://en.wikipedia.org/wiki/DRAM * - * @MEM_EMPTY Empty csrow + * @MEM_EMPTY: Empty csrow * @MEM_RESERVED: Reserved csrow type * @MEM_UNKNOWN: Unknown csrow type * @MEM_FPM: FPM - Fast Page Mode, used on systems up to 1995. @@ -286,7 +288,7 @@ enum edac_type { /** * enum scrub_type - scrubbing capabilities - * @SCRUB_UNKNOWN Unknown if scrubber is available + * @SCRUB_UNKNOWN: Unknown if scrubber is available * @SCRUB_NONE: No scrubber * @SCRUB_SW_PROG: SW progressive (sequential) scrubbing * @SCRUB_SW_SRC: Software scrub only errors @@ -295,7 +297,7 @@ enum edac_type { * @SCRUB_HW_PROG: HW progressive (sequential) scrubbing * @SCRUB_HW_SRC: Hardware scrub only errors * @SCRUB_HW_PROG_SRC: Progressive hardware scrub from an error - * SCRUB_HW_TUNABLE: Hardware scrub frequency is tunable + * @SCRUB_HW_TUNABLE: Hardware scrub frequency is tunable */ enum scrub_type { SCRUB_UNKNOWN = 0, @@ -460,7 +462,7 @@ enum edac_mc_layer_type { /** * struct edac_mc_layer - describes the memory controller hierarchy - * @layer: layer type + * @type: layer type * @size: number of components per layer. For example, * if the channel layer has two channels, size = 2 * @is_virt_csrow: This layer is part of the "csrow" when old API @@ -483,24 +485,28 @@ struct edac_mc_layer { #define EDAC_MAX_LAYERS 3 /** - * EDAC_DIMM_OFF - Macro responsible to get a pointer offset inside a pointer array - * for the element given by [layer0,layer1,layer2] position + * EDAC_DIMM_OFF - Macro responsible to get a pointer offset inside a pointer + * array for the element given by [layer0,layer1,layer2] + * position * * @layers: a struct edac_mc_layer array, describing how many elements * were allocated for each layer - * @n_layers: Number of layers at the @layers array + * @nlayers: Number of layers at the @layers array * @layer0: layer0 position * @layer1: layer1 position. Unused if n_layers < 2 * @layer2: layer2 position. Unused if n_layers < 3 * - * For 1 layer, this macro returns &var[layer0] - &var + * For 1 layer, this macro returns "var[layer0] - var"; + * * For 2 layers, this macro is similar to allocate a bi-dimensional array - * and to return "&var[layer0][layer1] - &var" + * and to return "var[layer0][layer1] - var"; + * * For 3 layers, this macro is similar to allocate a tri-dimensional array - * and to return "&var[layer0][layer1][layer2] - &var" + * and to return "var[layer0][layer1][layer2] - var". * * A loop could be used here to make it more generic, but, as we only have * 3 layers, this is a little faster. + * * By design, layers can never be 0 or more than 3. If that ever happens, * a NULL is returned, causing an OOPS during the memory allocation routine, * with would point to the developer that he's doing something wrong. @@ -527,16 +533,18 @@ struct edac_mc_layer { * were allocated for each layer * @var: name of the var where we want to get the pointer * (like mci->dimms) - * @n_layers: Number of layers at the @layers array + * @nlayers: Number of layers at the @layers array * @layer0: layer0 position * @layer1: layer1 position. Unused if n_layers < 2 * @layer2: layer2 position. Unused if n_layers < 3 * - * For 1 layer, this macro returns &var[layer0] + * For 1 layer, this macro returns "var[layer0]"; + * * For 2 layers, this macro is similar to allocate a bi-dimensional array - * and to return "&var[layer0][layer1]" + * and to return "var[layer0][layer1]"; + * * For 3 layers, this macro is similar to allocate a tri-dimensional array - * and to return "&var[layer0][layer1][layer2]" + * and to return "var[layer0][layer1][layer2]"; */ #define EDAC_DIMM_PTR(layers, var, nlayers, layer0, layer1, layer2) ({ \ typeof(*var) __p; \ @@ -622,7 +630,7 @@ struct errcount_attribute_data { }; /** - * edac_raw_error_desc - Raw error report structure + * struct edac_raw_error_desc - Raw error report structure * @grain: minimum granularity for an error report, in bytes * @error_count: number of errors of the same type * @top_layer: top layer of the error (layer[0]) -- cgit v1.2.3 From 6b1fb6f7037221981fb2cf1822c31b5fba1b9c22 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Sat, 29 Oct 2016 16:13:23 -0200 Subject: edac.rst: move concepts dictionary from edac.h Instead of storing the concepts dictionary inside header file, move it to the subsystem documentation. Signed-off-by: Mauro Carvalho Chehab --- Documentation/driver-api/edac.rst | 106 +++++++++++++++++++++++++++++++++++++ include/linux/edac.h | 108 -------------------------------------- 2 files changed, 106 insertions(+), 108 deletions(-) (limited to 'include') diff --git a/Documentation/driver-api/edac.rst b/Documentation/driver-api/edac.rst index 3771e768fda1..b8c742aa0a71 100644 --- a/Documentation/driver-api/edac.rst +++ b/Documentation/driver-api/edac.rst @@ -1,6 +1,112 @@ Error Detection And Correction (EDAC) Devices ============================================= +Main Concepts used at the EDAC subsystem +---------------------------------------- + +There are several things to be aware of that aren't at all obvious, like +*sockets, *socket sets*, *banks*, *rows*, *chip-select rows*, *channels*, +etc... + +These are some of the many terms that are thrown about that don't always +mean what people think they mean (Inconceivable!). In the interest of +creating a common ground for discussion, terms and their definitions +will be established. + +* Memory devices + +The individual DRAM chips on a memory stick. These devices commonly +output 4 and 8 bits each (x4, x8). Grouping several of these in parallel +provides the number of bits that the memory controller expects: +typically 72 bits, in order to provide 64 bits + 8 bits of ECC data. + +* Memory Stick + +A printed circuit board that aggregates multiple memory devices in +parallel. In general, this is the Field Replaceable Unit (FRU) which +gets replaced, in the case of excessive errors. Most often it is also +called DIMM (Dual Inline Memory Module). + +* Memory Socket + +A physical connector on the motherboard that accepts a single memory +stick. Also called as "slot" on several datasheets. + +* Channel + +A memory controller channel, responsible to communicate with a group of +DIMMs. Each channel has its own independent control (command) and data +bus, and can be used independently or grouped with other channels. + +* Branch + +It is typically the highest hierarchy on a Fully-Buffered DIMM memory +controller. Typically, it contains two channels. Two channels at the +same branch can be used in single mode or in lockstep mode. When +lockstep is enabled, the cacheline is doubled, but it generally brings +some performance penalty. Also, it is generally not possible to point to +just one memory stick when an error occurs, as the error correction code +is calculated using two DIMMs instead of one. Due to that, it is capable +of correcting more errors than on single mode. + +* Single-channel + +The data accessed by the memory controller is contained into one dimm +only. E. g. if the data is 64 bits-wide, the data flows to the CPU using +one 64 bits parallel access. Typically used with SDR, DDR, DDR2 and DDR3 +memories. FB-DIMM and RAMBUS use a different concept for channel, so +this concept doesn't apply there. + +* Double-channel + +The data size accessed by the memory controller is interlaced into two +dimms, accessed at the same time. E. g. if the DIMM is 64 bits-wide (72 +bits with ECC), the data flows to the CPU using a 128 bits parallel +access. + +* Chip-select row + +This is the name of the DRAM signal used to select the DRAM ranks to be +accessed. Common chip-select rows for single channel are 64 bits, for +dual channel 128 bits. It may not be visible by the memory controller, +as some DIMM types have a memory buffer that can hide direct access to +it from the Memory Controller. + +* Single-Ranked stick + +A Single-ranked stick has 1 chip-select row of memory. Motherboards +commonly drive two chip-select pins to a memory stick. A single-ranked +stick, will occupy only one of those rows. The other will be unused. + +.. _doubleranked: + +* Double-Ranked stick + +A double-ranked stick has two chip-select rows which access different +sets of memory devices. The two rows cannot be accessed concurrently. + +* Double-sided stick + +**DEPRECATED TERM**, see :ref:`Double-Ranked stick `. + +A double-sided stick has two chip-select rows which access different sets +of memory devices. The two rows cannot be accessed concurrently. +"Double-sided" is irrespective of the memory devices being mounted on +both sides of the memory stick. + +* Socket set + +All of the memory sticks that are required for a single memory access or +all of the memory sticks spanned by a chip-select row. A single socket +set has two chip-select rows and if double-sided sticks are used these +will occupy those chip-select rows. + +* Bank + +This term is avoided because it is unclear when needing to distinguish +between chip-select rows and socket sets. + + Memory Controllers ------------------ diff --git a/include/linux/edac.h b/include/linux/edac.h index 3c20d2d2686a..c4433fd6c859 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -330,114 +330,6 @@ enum scrub_type { #define OP_RUNNING_POLL_INTR 0x203 #define OP_OFFLINE 0x300 -/* - * Concepts used at the EDAC subsystem - * - * There are several things to be aware of that aren't at all obvious: - * - * SOCKETS, SOCKET SETS, BANKS, ROWS, CHIP-SELECT ROWS, CHANNELS, etc.. - * - * These are some of the many terms that are thrown about that don't always - * mean what people think they mean (Inconceivable!). In the interest of - * creating a common ground for discussion, terms and their definitions - * will be established. - * - * Memory devices: The individual DRAM chips on a memory stick. These - * devices commonly output 4 and 8 bits each (x4, x8). - * Grouping several of these in parallel provides the - * number of bits that the memory controller expects: - * typically 72 bits, in order to provide 64 bits + - * 8 bits of ECC data. - * - * Memory Stick: A printed circuit board that aggregates multiple - * memory devices in parallel. In general, this is the - * Field Replaceable Unit (FRU) which gets replaced, in - * the case of excessive errors. Most often it is also - * called DIMM (Dual Inline Memory Module). - * - * Memory Socket: A physical connector on the motherboard that accepts - * a single memory stick. Also called as "slot" on several - * datasheets. - * - * Channel: A memory controller channel, responsible to communicate - * with a group of DIMMs. Each channel has its own - * independent control (command) and data bus, and can - * be used independently or grouped with other channels. - * - * Branch: It is typically the highest hierarchy on a - * Fully-Buffered DIMM memory controller. - * Typically, it contains two channels. - * Two channels at the same branch can be used in single - * mode or in lockstep mode. - * When lockstep is enabled, the cacheline is doubled, - * but it generally brings some performance penalty. - * Also, it is generally not possible to point to just one - * memory stick when an error occurs, as the error - * correction code is calculated using two DIMMs instead - * of one. Due to that, it is capable of correcting more - * errors than on single mode. - * - * Single-channel: The data accessed by the memory controller is contained - * into one dimm only. E. g. if the data is 64 bits-wide, - * the data flows to the CPU using one 64 bits parallel - * access. - * Typically used with SDR, DDR, DDR2 and DDR3 memories. - * FB-DIMM and RAMBUS use a different concept for channel, - * so this concept doesn't apply there. - * - * Double-channel: The data size accessed by the memory controller is - * interlaced into two dimms, accessed at the same time. - * E. g. if the DIMM is 64 bits-wide (72 bits with ECC), - * the data flows to the CPU using a 128 bits parallel - * access. - * - * Chip-select row: This is the name of the DRAM signal used to select the - * DRAM ranks to be accessed. Common chip-select rows for - * single channel are 64 bits, for dual channel 128 bits. - * It may not be visible by the memory controller, as some - * DIMM types have a memory buffer that can hide direct - * access to it from the Memory Controller. - * - * Single-Ranked stick: A Single-ranked stick has 1 chip-select row of memory. - * Motherboards commonly drive two chip-select pins to - * a memory stick. A single-ranked stick, will occupy - * only one of those rows. The other will be unused. - * - * Double-Ranked stick: A double-ranked stick has two chip-select rows which - * access different sets of memory devices. The two - * rows cannot be accessed concurrently. - * - * Double-sided stick: DEPRECATED TERM, see Double-Ranked stick. - * A double-sided stick has two chip-select rows which - * access different sets of memory devices. The two - * rows cannot be accessed concurrently. "Double-sided" - * is irrespective of the memory devices being mounted - * on both sides of the memory stick. - * - * Socket set: All of the memory sticks that are required for - * a single memory access or all of the memory sticks - * spanned by a chip-select row. A single socket set - * has two chip-select rows and if double-sided sticks - * are used these will occupy those chip-select rows. - * - * Bank: This term is avoided because it is unclear when - * needing to distinguish between chip-select rows and - * socket sets. - * - * Controller pages: - * - * Physical pages: - * - * Virtual pages: - * - * - * STRUCTURE ORGANIZATION AND CHOICES - * - * - * - * PS - I enjoyed writing all that about as much as you enjoyed reading it. - */ - /** * enum edac_mc_layer - memory controller hierarchy layer * -- cgit v1.2.3 From 4838a0def07f5611347860b1fc0129c3fe77cc02 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Thu, 1 Dec 2016 14:24:53 -0600 Subject: EDAC: Document HW_EVENT_ERR_DEFERRED type Add a description of the HW_EVENT_ERR_DEFERRED type that wasn't included with commit d12a969ebbfc ("EDAC, amd64: Add Deferred Error type"). Signed-off-by: Yazen Ghannam Acked-by: Borislav Petkov Signed-off-by: Mauro Carvalho Chehab --- include/linux/edac.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/edac.h b/include/linux/edac.h index c4433fd6c859..07c52c0af62d 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -130,6 +130,12 @@ enum dev_type { * fatal (maybe it is on an unused memory area, * or the memory controller could recover from * it for example, by re-trying the operation). + * @HW_EVENT_ERR_DEFERRED: Deferred Error - Indicates an uncorrectable + * error whose handling is not urgent. This could + * be due to hardware data poisoning where the + * system can continue operation until the poisoned + * data is consumed. Preemptive measures may also + * be taken, e.g. offlining pages, etc. * @HW_EVENT_ERR_FATAL: Fatal Error - Uncorrected error that could not * be recovered. * @HW_EVENT_ERR_INFO: Informational - The CPER spec defines a forth -- cgit v1.2.3