summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-12-12 13:55:31 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2022-12-12 13:55:31 -0800
commitc1f0fcd85d3d66f002fc1a4986363840fcca766d (patch)
tree414ad8ea3b38a33585de78686528d4c99e89598e /include
parent691806e977a3a64895bd891878ed726cdbd282c0 (diff)
parentf04facfb993de47e2133b2b842d72b97b1c50162 (diff)
downloadlinux-c1f0fcd85d3d66f002fc1a4986363840fcca766d.tar.bz2
Merge tag 'cxl-for-6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl
Pull cxl updates from Dan Williams: "Compute Express Link (CXL) updates for 6.2. While it may seem backwards, the CXL update this time around includes some focus on CXL 1.x enabling where the work to date had been with CXL 2.0 (VH topologies) in mind. First generation CXL can mostly be supported via BIOS, similar to DDR, however it became clear there are use cases for OS native CXL error handling and some CXL 3.0 endpoint features can be deployed on CXL 1.x hosts (Restricted CXL Host (RCH) topologies). So, this update brings RCH topologies into the Linux CXL device model. In support of the ongoing CXL 2.0+ enabling two new core kernel facilities are added. One is the ability for the kernel to flag collisions between userspace access to PCI configuration registers and kernel accesses. This is brought on by the PCIe Data-Object-Exchange (DOE) facility, a hardware mailbox over config-cycles. The other is a cpu_cache_invalidate_memregion() API that maps to wbinvd_on_all_cpus() on x86. To prevent abuse it is disabled in guest VMs and architectures that do not support it yet. The CXL paths that need it, dynamic memory region creation and security commands (erase / unlock), are disabled when it is not present. As for the CXL 2.0+ this cycle the subsystem gains support Persistent Memory Security commands, error handling in response to PCIe AER notifications, and support for the "XOR" host bridge interleave algorithm. Summary: - Add the cpu_cache_invalidate_memregion() API for cache flushing in response to physical memory reconfiguration, or memory-side data invalidation from operations like secure erase or memory-device unlock. - Add a facility for the kernel to warn about collisions between kernel and userspace access to PCI configuration registers - Add support for Restricted CXL Host (RCH) topologies (formerly CXL 1.1) - Add handling and reporting of CXL errors reported via the PCIe AER mechanism - Add support for CXL Persistent Memory Security commands - Add support for the "XOR" algorithm for CXL host bridge interleave - Rework / simplify CXL to NVDIMM interactions - Miscellaneous cleanups and fixes" * tag 'cxl-for-6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl: (71 commits) cxl/region: Fix memdev reuse check cxl/pci: Remove endian confusion cxl/pci: Add some type-safety to the AER trace points cxl/security: Drop security command ioctl uapi cxl/mbox: Add variable output size validation for internal commands cxl/mbox: Enable cxl_mbox_send_cmd() users to validate output size cxl/security: Fix Get Security State output payload endian handling cxl: update names for interleave ways conversion macros cxl: update names for interleave granularity conversion macros cxl/acpi: Warn about an invalid CHBCR in an existing CHBS entry tools/testing/cxl: Require cache invalidation bypass cxl/acpi: Fail decoder add if CXIMS for HBIG is missing cxl/region: Fix spelling mistake "memergion" -> "memregion" cxl/regs: Fix sparse warning cxl/acpi: Set ACPI's CXL _OSC to indicate RCD mode support tools/testing/cxl: Add an RCH topology cxl/port: Add RCD endpoint port enumeration cxl/mem: Move devm_cxl_add_endpoint() from cxl_core to cxl_mem tools/testing/cxl: Add XOR Math support to cxl_test cxl/acpi: Support CXL XOR Interleave Math (CXIMS) ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/ioport.h2
-rw-r--r--include/linux/libnvdimm.h7
-rw-r--r--include/linux/memregion.h38
-rw-r--r--include/linux/pci.h20
-rw-r--r--include/trace/events/cxl.h112
-rw-r--r--include/uapi/linux/pci_regs.h1
6 files changed, 180 insertions, 0 deletions
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 27642ca15d93..4ae3c541ea6f 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -318,6 +318,8 @@ extern void __devm_release_region(struct device *dev, struct resource *parent,
resource_size_t start, resource_size_t n);
extern int iomem_map_sanity_check(resource_size_t addr, unsigned long size);
extern bool iomem_is_exclusive(u64 addr);
+extern bool resource_is_exclusive(struct resource *resource, u64 addr,
+ resource_size_t size);
extern int
walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index c74acfa1a3fe..af38252ad704 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -35,6 +35,11 @@ enum {
NDD_WORK_PENDING = 4,
/* dimm supports namespace labels */
NDD_LABELING = 6,
+ /*
+ * dimm contents have changed requiring invalidation of CPU caches prior
+ * to activation of a region that includes this device
+ */
+ NDD_INCOHERENT = 7,
/* need to set a limit somewhere, but yes, this is likely overkill */
ND_IOCTL_MAX_BUFLEN = SZ_4M,
@@ -183,6 +188,8 @@ struct nvdimm_security_ops {
int (*overwrite)(struct nvdimm *nvdimm,
const struct nvdimm_key_data *key_data);
int (*query_overwrite)(struct nvdimm *nvdimm);
+ int (*disable_master)(struct nvdimm *nvdimm,
+ const struct nvdimm_key_data *key_data);
};
enum nvdimm_fwa_state {
diff --git a/include/linux/memregion.h b/include/linux/memregion.h
index c04c4fd2e209..bf83363807ac 100644
--- a/include/linux/memregion.h
+++ b/include/linux/memregion.h
@@ -3,6 +3,7 @@
#define _MEMREGION_H_
#include <linux/types.h>
#include <linux/errno.h>
+#include <linux/bug.h>
struct memregion_info {
int target_node;
@@ -20,4 +21,41 @@ static inline void memregion_free(int id)
{
}
#endif
+
+/**
+ * cpu_cache_invalidate_memregion - drop any CPU cached data for
+ * memregions described by @res_desc
+ * @res_desc: one of the IORES_DESC_* types
+ *
+ * Perform cache maintenance after a memory event / operation that
+ * changes the contents of physical memory in a cache-incoherent manner.
+ * For example, device memory technologies like NVDIMM and CXL have
+ * device secure erase, and dynamic region provision that can replace
+ * the memory mapped to a given physical address.
+ *
+ * Limit the functionality to architectures that have an efficient way
+ * to writeback and invalidate potentially terabytes of address space at
+ * once. Note that this routine may or may not write back any dirty
+ * contents while performing the invalidation. It is only exported for
+ * the explicit usage of the NVDIMM and CXL modules in the 'DEVMEM'
+ * symbol namespace on bare platforms.
+ *
+ * Returns 0 on success or negative error code on a failure to perform
+ * the cache maintenance.
+ */
+#ifdef CONFIG_ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION
+int cpu_cache_invalidate_memregion(int res_desc);
+bool cpu_cache_has_invalidate_memregion(void);
+#else
+static inline bool cpu_cache_has_invalidate_memregion(void)
+{
+ return false;
+}
+
+static inline int cpu_cache_invalidate_memregion(int res_desc)
+{
+ WARN_ON_ONCE("CPU cache invalidation required");
+ return -ENXIO;
+}
+#endif
#endif /* _MEMREGION_H_ */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index aa514b54c681..c0d939f3169c 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -410,6 +410,7 @@ struct pci_dev {
*/
unsigned int irq;
struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */
+ struct resource driver_exclusive_resource; /* driver exclusive resource ranges */
bool match_driver; /* Skip attaching driver */
@@ -844,6 +845,9 @@ struct pci_error_handlers {
/* Device driver may resume normal operations */
void (*resume)(struct pci_dev *dev);
+
+ /* Allow device driver to record more details of a correctable error */
+ void (*cor_error_detected)(struct pci_dev *dev);
};
@@ -1408,6 +1412,21 @@ int pci_request_selected_regions(struct pci_dev *, int, const char *);
int pci_request_selected_regions_exclusive(struct pci_dev *, int, const char *);
void pci_release_selected_regions(struct pci_dev *, int);
+static inline __must_check struct resource *
+pci_request_config_region_exclusive(struct pci_dev *pdev, unsigned int offset,
+ unsigned int len, const char *name)
+{
+ return __request_region(&pdev->driver_exclusive_resource, offset, len,
+ name, IORESOURCE_EXCLUSIVE);
+}
+
+static inline void pci_release_config_region(struct pci_dev *pdev,
+ unsigned int offset,
+ unsigned int len)
+{
+ __release_region(&pdev->driver_exclusive_resource, offset, len);
+}
+
/* drivers/pci/bus.c */
void pci_add_resource(struct list_head *resources, struct resource *res);
void pci_add_resource_offset(struct list_head *resources, struct resource *res,
@@ -2505,6 +2524,7 @@ void pci_ims_free_irq(struct pci_dev *pdev, struct msi_map map);
#define pci_crit(pdev, fmt, arg...) dev_crit(&(pdev)->dev, fmt, ##arg)
#define pci_err(pdev, fmt, arg...) dev_err(&(pdev)->dev, fmt, ##arg)
#define pci_warn(pdev, fmt, arg...) dev_warn(&(pdev)->dev, fmt, ##arg)
+#define pci_warn_once(pdev, fmt, arg...) dev_warn_once(&(pdev)->dev, fmt, ##arg)
#define pci_notice(pdev, fmt, arg...) dev_notice(&(pdev)->dev, fmt, ##arg)
#define pci_info(pdev, fmt, arg...) dev_info(&(pdev)->dev, fmt, ##arg)
#define pci_dbg(pdev, fmt, arg...) dev_dbg(&(pdev)->dev, fmt, ##arg)
diff --git a/include/trace/events/cxl.h b/include/trace/events/cxl.h
new file mode 100644
index 000000000000..ad085a2534ef
--- /dev/null
+++ b/include/trace/events/cxl.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM cxl
+
+#if !defined(_CXL_EVENTS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _CXL_EVENTS_H
+
+#include <linux/tracepoint.h>
+
+#define CXL_HEADERLOG_SIZE SZ_512
+#define CXL_HEADERLOG_SIZE_U32 SZ_512 / sizeof(u32)
+
+#define CXL_RAS_UC_CACHE_DATA_PARITY BIT(0)
+#define CXL_RAS_UC_CACHE_ADDR_PARITY BIT(1)
+#define CXL_RAS_UC_CACHE_BE_PARITY BIT(2)
+#define CXL_RAS_UC_CACHE_DATA_ECC BIT(3)
+#define CXL_RAS_UC_MEM_DATA_PARITY BIT(4)
+#define CXL_RAS_UC_MEM_ADDR_PARITY BIT(5)
+#define CXL_RAS_UC_MEM_BE_PARITY BIT(6)
+#define CXL_RAS_UC_MEM_DATA_ECC BIT(7)
+#define CXL_RAS_UC_REINIT_THRESH BIT(8)
+#define CXL_RAS_UC_RSVD_ENCODE BIT(9)
+#define CXL_RAS_UC_POISON BIT(10)
+#define CXL_RAS_UC_RECV_OVERFLOW BIT(11)
+#define CXL_RAS_UC_INTERNAL_ERR BIT(14)
+#define CXL_RAS_UC_IDE_TX_ERR BIT(15)
+#define CXL_RAS_UC_IDE_RX_ERR BIT(16)
+
+#define show_uc_errs(status) __print_flags(status, " | ", \
+ { CXL_RAS_UC_CACHE_DATA_PARITY, "Cache Data Parity Error" }, \
+ { CXL_RAS_UC_CACHE_ADDR_PARITY, "Cache Address Parity Error" }, \
+ { CXL_RAS_UC_CACHE_BE_PARITY, "Cache Byte Enable Parity Error" }, \
+ { CXL_RAS_UC_CACHE_DATA_ECC, "Cache Data ECC Error" }, \
+ { CXL_RAS_UC_MEM_DATA_PARITY, "Memory Data Parity Error" }, \
+ { CXL_RAS_UC_MEM_ADDR_PARITY, "Memory Address Parity Error" }, \
+ { CXL_RAS_UC_MEM_BE_PARITY, "Memory Byte Enable Parity Error" }, \
+ { CXL_RAS_UC_MEM_DATA_ECC, "Memory Data ECC Error" }, \
+ { CXL_RAS_UC_REINIT_THRESH, "REINIT Threshold Hit" }, \
+ { CXL_RAS_UC_RSVD_ENCODE, "Received Unrecognized Encoding" }, \
+ { CXL_RAS_UC_POISON, "Received Poison From Peer" }, \
+ { CXL_RAS_UC_RECV_OVERFLOW, "Receiver Overflow" }, \
+ { CXL_RAS_UC_INTERNAL_ERR, "Component Specific Error" }, \
+ { CXL_RAS_UC_IDE_TX_ERR, "IDE Tx Error" }, \
+ { CXL_RAS_UC_IDE_RX_ERR, "IDE Rx Error" } \
+)
+
+TRACE_EVENT(cxl_aer_uncorrectable_error,
+ TP_PROTO(const struct device *dev, u32 status, u32 fe, u32 *hl),
+ TP_ARGS(dev, status, fe, hl),
+ TP_STRUCT__entry(
+ __string(dev_name, dev_name(dev))
+ __field(u32, status)
+ __field(u32, first_error)
+ __array(u32, header_log, CXL_HEADERLOG_SIZE_U32)
+ ),
+ TP_fast_assign(
+ __assign_str(dev_name, dev_name(dev));
+ __entry->status = status;
+ __entry->first_error = fe;
+ /*
+ * Embed the 512B headerlog data for user app retrieval and
+ * parsing, but no need to print this in the trace buffer.
+ */
+ memcpy(__entry->header_log, hl, CXL_HEADERLOG_SIZE);
+ ),
+ TP_printk("%s: status: '%s' first_error: '%s'",
+ __get_str(dev_name),
+ show_uc_errs(__entry->status),
+ show_uc_errs(__entry->first_error)
+ )
+);
+
+#define CXL_RAS_CE_CACHE_DATA_ECC BIT(0)
+#define CXL_RAS_CE_MEM_DATA_ECC BIT(1)
+#define CXL_RAS_CE_CRC_THRESH BIT(2)
+#define CLX_RAS_CE_RETRY_THRESH BIT(3)
+#define CXL_RAS_CE_CACHE_POISON BIT(4)
+#define CXL_RAS_CE_MEM_POISON BIT(5)
+#define CXL_RAS_CE_PHYS_LAYER_ERR BIT(6)
+
+#define show_ce_errs(status) __print_flags(status, " | ", \
+ { CXL_RAS_CE_CACHE_DATA_ECC, "Cache Data ECC Error" }, \
+ { CXL_RAS_CE_MEM_DATA_ECC, "Memory Data ECC Error" }, \
+ { CXL_RAS_CE_CRC_THRESH, "CRC Threshold Hit" }, \
+ { CLX_RAS_CE_RETRY_THRESH, "Retry Threshold" }, \
+ { CXL_RAS_CE_CACHE_POISON, "Received Cache Poison From Peer" }, \
+ { CXL_RAS_CE_MEM_POISON, "Received Memory Poison From Peer" }, \
+ { CXL_RAS_CE_PHYS_LAYER_ERR, "Received Error From Physical Layer" } \
+)
+
+TRACE_EVENT(cxl_aer_correctable_error,
+ TP_PROTO(const struct device *dev, u32 status),
+ TP_ARGS(dev, status),
+ TP_STRUCT__entry(
+ __string(dev_name, dev_name(dev))
+ __field(u32, status)
+ ),
+ TP_fast_assign(
+ __assign_str(dev_name, dev_name(dev));
+ __entry->status = status;
+ ),
+ TP_printk("%s: status: '%s'",
+ __get_str(dev_name), show_ce_errs(__entry->status)
+ )
+);
+
+#endif /* _CXL_EVENTS_H */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE cxl
+#include <trace/define_trace.h>
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index 57b8e2ffb1dd..82a03ea954af 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -1119,6 +1119,7 @@
#define PCI_DOE_STATUS_DATA_OBJECT_READY 0x80000000 /* Data Object Ready */
#define PCI_DOE_WRITE 0x10 /* DOE Write Data Mailbox Register */
#define PCI_DOE_READ 0x14 /* DOE Read Data Mailbox Register */
+#define PCI_DOE_CAP_SIZEOF 0x18 /* Size of DOE register block */
/* DOE Data Object - note not actually registers */
#define PCI_DOE_DATA_OBJECT_HEADER_1_VID 0x0000ffff