diff options
author | Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 2019-11-26 10:31:02 +0100 |
---|---|---|
committer | Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 2019-11-26 10:31:02 +0100 |
commit | 782b59711e1561ee0da06bc478ca5e8249aa8d09 (patch) | |
tree | a03ba1d7b9212480e707045619ae493d3be7a084 /drivers | |
parent | 995e2ef08280dab8de6c517acd836613678fe2a3 (diff) | |
parent | 0f1839d0888700389e3062b4787046d61780d6d9 (diff) | |
download | linux-782b59711e1561ee0da06bc478ca5e8249aa8d09.tar.bz2 |
Merge branch 'acpi-mm'
* acpi-mm:
ACPI: HMAT: use %u instead of %d to print u32 values
ACPI: NUMA: HMAT: fix a section mismatch
ACPI: HMAT: don't mix pxm and nid when setting memory target processor_pxm
ACPI: NUMA: HMAT: Register "soft reserved" memory as an "hmem" device
ACPI: NUMA: HMAT: Register HMAT at device_initcall level
device-dax: Add a driver for "hmem" devices
dax: Fix alloc_dax_region() compile warning
lib: Uplevel the pmem "region" ida to a global allocator
x86/efi: Add efi_fake_mem support for EFI_MEMORY_SP
arm/efi: EFI soft reservation to memblock
x86/efi: EFI soft reservation to E820 enumeration
efi: Common enable/disable infrastructure for EFI soft reservation
x86/efi: Push EFI_MEMMAP check into leaf routines
efi: Enumerate EFI_MEMORY_SP
ACPI: NUMA: Establish a new drivers/acpi/numa/ directory
Diffstat (limited to 'drivers')
29 files changed, 426 insertions, 71 deletions
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 0f23d8b22c42..4fb97511a16f 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -319,12 +319,6 @@ config ACPI_THERMAL To compile this driver as a module, choose M here: the module will be called thermal. -config ACPI_NUMA - bool "NUMA support" - depends on NUMA - depends on (X86 || IA64 || ARM64) - default y if IA64 || ARM64 - config ACPI_CUSTOM_DSDT_FILE string "Custom DSDT Table file to include" default "" @@ -473,8 +467,7 @@ config ACPI_REDUCED_HARDWARE_ONLY If you are unsure what to do, do not enable this option. source "drivers/acpi/nfit/Kconfig" -source "drivers/acpi/hmat/Kconfig" - +source "drivers/acpi/numa/Kconfig" source "drivers/acpi/apei/Kconfig" source "drivers/acpi/dptf/Kconfig" diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index 61d31c36ed8a..33fdaf67454e 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -55,7 +55,6 @@ acpi-$(CONFIG_X86) += acpi_cmos_rtc.o acpi-$(CONFIG_X86) += x86/apple.o acpi-$(CONFIG_X86) += x86/utils.o acpi-$(CONFIG_DEBUG_FS) += debugfs.o -acpi-$(CONFIG_ACPI_NUMA) += numa.o acpi-$(CONFIG_ACPI_PROCFS_POWER) += cm_sbs.o acpi-y += acpi_lpat.o acpi-$(CONFIG_ACPI_LPIT) += acpi_lpit.o @@ -80,7 +79,7 @@ obj-$(CONFIG_ACPI_PROCESSOR) += processor.o obj-$(CONFIG_ACPI) += container.o obj-$(CONFIG_ACPI_THERMAL) += thermal.o obj-$(CONFIG_ACPI_NFIT) += nfit/ -obj-$(CONFIG_ACPI_HMAT) += hmat/ +obj-$(CONFIG_ACPI_NUMA) += numa/ obj-$(CONFIG_ACPI) += acpi_memhotplug.o obj-$(CONFIG_ACPI_HOTPLUG_IOAPIC) += ioapic.o obj-$(CONFIG_ACPI_BATTERY) += battery.o diff --git a/drivers/acpi/hmat/Makefile b/drivers/acpi/hmat/Makefile deleted file mode 100644 index 1c20ef36a385..000000000000 --- a/drivers/acpi/hmat/Makefile +++ /dev/null @@ -1,2 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_ACPI_HMAT) := hmat.o diff --git a/drivers/acpi/hmat/Kconfig b/drivers/acpi/numa/Kconfig index 95a29964dbea..fcf2e556d69d 100644 --- a/drivers/acpi/hmat/Kconfig +++ b/drivers/acpi/numa/Kconfig @@ -1,8 +1,15 @@ # SPDX-License-Identifier: GPL-2.0 +config ACPI_NUMA + bool "NUMA support" + depends on NUMA + depends on (X86 || IA64 || ARM64) + default y if IA64 || ARM64 + config ACPI_HMAT bool "ACPI Heterogeneous Memory Attribute Table Support" depends on ACPI_NUMA select HMEM_REPORTING + select MEMREGION help If set, this option has the kernel parse and report the platform's ACPI HMAT (Heterogeneous Memory Attributes Table), diff --git a/drivers/acpi/numa/Makefile b/drivers/acpi/numa/Makefile new file mode 100644 index 000000000000..517a6c689a94 --- /dev/null +++ b/drivers/acpi/numa/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_ACPI_NUMA) += srat.o +obj-$(CONFIG_ACPI_HMAT) += hmat.o diff --git a/drivers/acpi/hmat/hmat.c b/drivers/acpi/numa/hmat.c index 8b0de8a3c647..2c32cfb72370 100644 --- a/drivers/acpi/hmat/hmat.c +++ b/drivers/acpi/numa/hmat.c @@ -8,12 +8,18 @@ * the applicable attributes with the node's interfaces. */ +#define pr_fmt(fmt) "acpi/hmat: " fmt +#define dev_fmt(fmt) "acpi/hmat: " fmt + #include <linux/acpi.h> #include <linux/bitops.h> #include <linux/device.h> #include <linux/init.h> #include <linux/list.h> +#include <linux/mm.h> +#include <linux/platform_device.h> #include <linux/list_sort.h> +#include <linux/memregion.h> #include <linux/memory.h> #include <linux/mutex.h> #include <linux/node.h> @@ -49,6 +55,7 @@ struct memory_target { struct list_head node; unsigned int memory_pxm; unsigned int processor_pxm; + struct resource memregions; struct node_hmem_attrs hmem_attrs; struct list_head caches; struct node_cache_attrs cache_attrs; @@ -104,22 +111,36 @@ static __init void alloc_memory_initiator(unsigned int cpu_pxm) list_add_tail(&initiator->node, &initiators); } -static __init void alloc_memory_target(unsigned int mem_pxm) +static __init void alloc_memory_target(unsigned int mem_pxm, + resource_size_t start, resource_size_t len) { struct memory_target *target; target = find_mem_target(mem_pxm); - if (target) - return; - - target = kzalloc(sizeof(*target), GFP_KERNEL); - if (!target) - return; + if (!target) { + target = kzalloc(sizeof(*target), GFP_KERNEL); + if (!target) + return; + target->memory_pxm = mem_pxm; + target->processor_pxm = PXM_INVAL; + target->memregions = (struct resource) { + .name = "ACPI mem", + .start = 0, + .end = -1, + .flags = IORESOURCE_MEM, + }; + list_add_tail(&target->node, &targets); + INIT_LIST_HEAD(&target->caches); + } - target->memory_pxm = mem_pxm; - target->processor_pxm = PXM_INVAL; - list_add_tail(&target->node, &targets); - INIT_LIST_HEAD(&target->caches); + /* + * There are potentially multiple ranges per PXM, so record each + * in the per-target memregions resource tree. + */ + if (!__request_region(&target->memregions, start, len, "memory target", + IORESOURCE_MEM)) + pr_warn("failed to reserve %#llx - %#llx in pxm: %d\n", + start, start + len, mem_pxm); } static __init const char *hmat_data_type(u8 type) @@ -272,7 +293,7 @@ static __init int hmat_parse_locality(union acpi_subtable_headers *header, u8 type, mem_hier; if (hmat_loc->header.length < sizeof(*hmat_loc)) { - pr_notice("HMAT: Unexpected locality header length: %d\n", + pr_notice("HMAT: Unexpected locality header length: %u\n", hmat_loc->header.length); return -EINVAL; } @@ -284,12 +305,12 @@ static __init int hmat_parse_locality(union acpi_subtable_headers *header, total_size = sizeof(*hmat_loc) + sizeof(*entries) * ipds * tpds + sizeof(*inits) * ipds + sizeof(*targs) * tpds; if (hmat_loc->header.length < total_size) { - pr_notice("HMAT: Unexpected locality header length:%d, minimum required:%d\n", + pr_notice("HMAT: Unexpected locality header length:%u, minimum required:%u\n", hmat_loc->header.length, total_size); return -EINVAL; } - pr_info("HMAT: Locality: Flags:%02x Type:%s Initiator Domains:%d Target Domains:%d Base:%lld\n", + pr_info("HMAT: Locality: Flags:%02x Type:%s Initiator Domains:%u Target Domains:%u Base:%lld\n", hmat_loc->flags, hmat_data_type(type), ipds, tpds, hmat_loc->entry_base_unit); @@ -302,7 +323,7 @@ static __init int hmat_parse_locality(union acpi_subtable_headers *header, value = hmat_normalize(entries[init * tpds + targ], hmat_loc->entry_base_unit, type); - pr_info(" Initiator-Target[%d-%d]:%d%s\n", + pr_info(" Initiator-Target[%u-%u]:%u%s\n", inits[init], targs[targ], value, hmat_data_type_suffix(type)); @@ -329,13 +350,13 @@ static __init int hmat_parse_cache(union acpi_subtable_headers *header, u32 attrs; if (cache->header.length < sizeof(*cache)) { - pr_notice("HMAT: Unexpected cache header length: %d\n", + pr_notice("HMAT: Unexpected cache header length: %u\n", cache->header.length); return -EINVAL; } attrs = cache->cache_attributes; - pr_info("HMAT: Cache: Domain:%d Size:%llu Attrs:%08x SMBIOS Handles:%d\n", + pr_info("HMAT: Cache: Domain:%u Size:%llu Attrs:%08x SMBIOS Handles:%d\n", cache->memory_PD, cache->cache_size, attrs, cache->number_of_SMBIOShandles); @@ -390,17 +411,17 @@ static int __init hmat_parse_proximity_domain(union acpi_subtable_headers *heade struct memory_target *target = NULL; if (p->header.length != sizeof(*p)) { - pr_notice("HMAT: Unexpected address range header length: %d\n", + pr_notice("HMAT: Unexpected address range header length: %u\n", p->header.length); return -EINVAL; } if (hmat_revision == 1) - pr_info("HMAT: Memory (%#llx length %#llx) Flags:%04x Processor Domain:%d Memory Domain:%d\n", + pr_info("HMAT: Memory (%#llx length %#llx) Flags:%04x Processor Domain:%u Memory Domain:%u\n", p->reserved3, p->reserved4, p->flags, p->processor_PD, p->memory_PD); else - pr_info("HMAT: Memory Flags:%04x Processor Domain:%d Memory Domain:%d\n", + pr_info("HMAT: Memory Flags:%04x Processor Domain:%u Memory Domain:%u\n", p->flags, p->processor_PD, p->memory_PD); if (p->flags & ACPI_HMAT_MEMORY_PD_VALID && hmat_revision == 1) { @@ -417,7 +438,7 @@ static int __init hmat_parse_proximity_domain(union acpi_subtable_headers *heade pr_debug("HMAT: Invalid Processor Domain\n"); return -EINVAL; } - target->processor_pxm = p_node; + target->processor_pxm = p->processor_PD; } return 0; @@ -452,7 +473,7 @@ static __init int srat_parse_mem_affinity(union acpi_subtable_headers *header, return -EINVAL; if (!(ma->flags & ACPI_SRAT_MEM_ENABLED)) return 0; - alloc_memory_target(ma->proximity_domain); + alloc_memory_target(ma->proximity_domain, ma->base_address, ma->length); return 0; } @@ -613,11 +634,92 @@ static void hmat_register_target_perf(struct memory_target *target) node_set_perf_attrs(mem_nid, &target->hmem_attrs, 0); } +static void hmat_register_target_device(struct memory_target *target, + struct resource *r) +{ + /* define a clean / non-busy resource for the platform device */ + struct resource res = { + .start = r->start, + .end = r->end, + .flags = IORESOURCE_MEM, + }; + struct platform_device *pdev; + struct memregion_info info; + int rc, id; + + rc = region_intersects(res.start, resource_size(&res), IORESOURCE_MEM, + IORES_DESC_SOFT_RESERVED); + if (rc != REGION_INTERSECTS) + return; + + id = memregion_alloc(GFP_KERNEL); + if (id < 0) { + pr_err("memregion allocation failure for %pr\n", &res); + return; + } + + pdev = platform_device_alloc("hmem", id); + if (!pdev) { + pr_err("hmem device allocation failure for %pr\n", &res); + goto out_pdev; + } + + pdev->dev.numa_node = acpi_map_pxm_to_online_node(target->memory_pxm); + info = (struct memregion_info) { + .target_node = acpi_map_pxm_to_node(target->memory_pxm), + }; + rc = platform_device_add_data(pdev, &info, sizeof(info)); + if (rc < 0) { + pr_err("hmem memregion_info allocation failure for %pr\n", &res); + goto out_pdev; + } + + rc = platform_device_add_resources(pdev, &res, 1); + if (rc < 0) { + pr_err("hmem resource allocation failure for %pr\n", &res); + goto out_resource; + } + + rc = platform_device_add(pdev); + if (rc < 0) { + dev_err(&pdev->dev, "device add failed for %pr\n", &res); + goto out_resource; + } + + return; + +out_resource: + put_device(&pdev->dev); +out_pdev: + memregion_free(id); +} + +static void hmat_register_target_devices(struct memory_target *target) +{ + struct resource *res; + + /* + * Do not bother creating devices if no driver is available to + * consume them. + */ + if (!IS_ENABLED(CONFIG_DEV_DAX_HMEM)) + return; + + for (res = target->memregions.child; res; res = res->sibling) + hmat_register_target_device(target, res); +} + static void hmat_register_target(struct memory_target *target) { int nid = pxm_to_node(target->memory_pxm); /* + * Devices may belong to either an offline or online + * node, so unconditionally add them. + */ + hmat_register_target_devices(target); + + /* * Skip offline nodes. This can happen when memory * marked EFI_MEMORY_SP, "specific purpose", is applied * to all the memory in a promixity domain leading to @@ -677,11 +779,21 @@ static __init void hmat_free_structures(void) struct target_cache *tcache, *cnext; list_for_each_entry_safe(target, tnext, &targets, node) { + struct resource *res, *res_next; + list_for_each_entry_safe(tcache, cnext, &target->caches, node) { list_del(&tcache->node); kfree(tcache); } + list_del(&target->node); + res = target->memregions.child; + while (res) { + res_next = res->sibling; + __release_region(&target->memregions, res->start, + resource_size(res)); + res = res_next; + } kfree(target); } @@ -748,4 +860,4 @@ out_put: acpi_put_table(tbl); return 0; } -subsys_initcall(hmat_init); +device_initcall(hmat_init); diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa/srat.c index eadbf90e65d1..eadbf90e65d1 100644 --- a/drivers/acpi/numa.c +++ b/drivers/acpi/numa/srat.c diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig index f33c73e4af41..3b6c06f07326 100644 --- a/drivers/dax/Kconfig +++ b/drivers/dax/Kconfig @@ -32,19 +32,36 @@ config DEV_DAX_PMEM Say M if unsure +config DEV_DAX_HMEM + tristate "HMEM DAX: direct access to 'specific purpose' memory" + depends on EFI_SOFT_RESERVE + default DEV_DAX + help + EFI 2.8 platforms, and others, may advertise 'specific purpose' + memory. For example, a high bandwidth memory pool. The + indication from platform firmware is meant to reserve the + memory from typical usage by default. This driver creates + device-dax instances for these memory ranges, and that also + enables the possibility to assign them to the DEV_DAX_KMEM + driver to override the reservation and add them to kernel + "System RAM" pool. + + Say M if unsure. + config DEV_DAX_KMEM tristate "KMEM DAX: volatile-use of persistent memory" default DEV_DAX depends on DEV_DAX depends on MEMORY_HOTPLUG # for add_memory() and friends help - Support access to persistent memory as if it were RAM. This - allows easier use of persistent memory by unmodified - applications. + Support access to persistent, or other performance + differentiated memory as if it were System RAM. This allows + easier use of persistent memory by unmodified applications, or + adds core kernel memory services to heterogeneous memory types + (HMEM) marked "reserved" by platform firmware. To use this feature, a DAX device must be unbound from the - device_dax driver (PMEM DAX) and bound to this kmem driver - on each boot. + device_dax driver and bound to this kmem driver on each boot. Say N if unsure. diff --git a/drivers/dax/Makefile b/drivers/dax/Makefile index 81f7d54dadfb..80065b38b3c4 100644 --- a/drivers/dax/Makefile +++ b/drivers/dax/Makefile @@ -2,9 +2,11 @@ obj-$(CONFIG_DAX) += dax.o obj-$(CONFIG_DEV_DAX) += device_dax.o obj-$(CONFIG_DEV_DAX_KMEM) += kmem.o +obj-$(CONFIG_DEV_DAX_HMEM) += dax_hmem.o dax-y := super.o dax-y += bus.o device_dax-y := device.o +dax_hmem-y := hmem.o obj-y += pmem/ diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c index 8fafbeab510a..eccdda1f7b71 100644 --- a/drivers/dax/bus.c +++ b/drivers/dax/bus.c @@ -227,7 +227,7 @@ static void dax_region_unregister(void *region) struct dax_region *alloc_dax_region(struct device *parent, int region_id, struct resource *res, int target_node, unsigned int align, - unsigned long pfn_flags) + unsigned long long pfn_flags) { struct dax_region *dax_region; diff --git a/drivers/dax/bus.h b/drivers/dax/bus.h index 8619e3299943..9e4eba67e8b9 100644 --- a/drivers/dax/bus.h +++ b/drivers/dax/bus.h @@ -11,7 +11,7 @@ struct dax_region; void dax_region_put(struct dax_region *dax_region); struct dax_region *alloc_dax_region(struct device *parent, int region_id, struct resource *res, int target_node, unsigned int align, - unsigned long flags); + unsigned long long flags); enum dev_dax_subsys { DEV_DAX_BUS, diff --git a/drivers/dax/dax-private.h b/drivers/dax/dax-private.h index 6ccca3b890d6..3107ce80e809 100644 --- a/drivers/dax/dax-private.h +++ b/drivers/dax/dax-private.h @@ -32,7 +32,7 @@ struct dax_region { struct device *dev; unsigned int align; struct resource res; - unsigned long pfn_flags; + unsigned long long pfn_flags; }; /** diff --git a/drivers/dax/hmem.c b/drivers/dax/hmem.c new file mode 100644 index 000000000000..fe7214daf62e --- /dev/null +++ b/drivers/dax/hmem.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/platform_device.h> +#include <linux/memregion.h> +#include <linux/module.h> +#include <linux/pfn_t.h> +#include "bus.h" + +static int dax_hmem_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct dev_pagemap pgmap = { }; + struct dax_region *dax_region; + struct memregion_info *mri; + struct dev_dax *dev_dax; + struct resource *res; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -ENOMEM; + + mri = dev->platform_data; + memcpy(&pgmap.res, res, sizeof(*res)); + + dax_region = alloc_dax_region(dev, pdev->id, res, mri->target_node, + PMD_SIZE, PFN_DEV|PFN_MAP); + if (!dax_region) + return -ENOMEM; + + dev_dax = devm_create_dev_dax(dax_region, 0, &pgmap); + if (IS_ERR(dev_dax)) + return PTR_ERR(dev_dax); + + /* child dev_dax instances now own the lifetime of the dax_region */ + dax_region_put(dax_region); + return 0; +} + +static int dax_hmem_remove(struct platform_device *pdev) +{ + /* devm handles teardown */ + return 0; +} + +static struct platform_driver dax_hmem_driver = { + .probe = dax_hmem_probe, + .remove = dax_hmem_remove, + .driver = { + .name = "hmem", + }, +}; + +module_platform_driver(dax_hmem_driver); + +MODULE_ALIAS("platform:hmem*"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Intel Corporation"); diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig index b248870a9806..bcc378c19ebe 100644 --- a/drivers/firmware/efi/Kconfig +++ b/drivers/firmware/efi/Kconfig @@ -75,6 +75,27 @@ config EFI_MAX_FAKE_MEM Ranges can be set up to this value using comma-separated list. The default value is 8. +config EFI_SOFT_RESERVE + bool "Reserve EFI Specific Purpose Memory" + depends on EFI && EFI_STUB && ACPI_HMAT + default ACPI_HMAT + help + On systems that have mixed performance classes of memory EFI + may indicate specific purpose memory with an attribute (See + EFI_MEMORY_SP in UEFI 2.8). A memory range tagged with this + attribute may have unique performance characteristics compared + to the system's general purpose "System RAM" pool. On the + expectation that such memory has application specific usage, + and its base EFI memory type is "conventional" answer Y to + arrange for the kernel to reserve it as a "Soft Reserved" + resource, and set aside for direct-access (device-dax) by + default. The memory range can later be optionally assigned to + the page allocator by system administrator policy via the + device-dax kmem facility. Say N to have the kernel treat this + memory as "System RAM" by default. + + If unsure, say Y. + config EFI_PARAMS_FROM_FDT bool help diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile index 4ac2de4dfa72..554d795270d9 100644 --- a/drivers/firmware/efi/Makefile +++ b/drivers/firmware/efi/Makefile @@ -20,13 +20,16 @@ obj-$(CONFIG_UEFI_CPER) += cper.o obj-$(CONFIG_EFI_RUNTIME_MAP) += runtime-map.o obj-$(CONFIG_EFI_RUNTIME_WRAPPERS) += runtime-wrappers.o obj-$(CONFIG_EFI_STUB) += libstub/ -obj-$(CONFIG_EFI_FAKE_MEMMAP) += fake_mem.o +obj-$(CONFIG_EFI_FAKE_MEMMAP) += fake_map.o obj-$(CONFIG_EFI_BOOTLOADER_CONTROL) += efibc.o obj-$(CONFIG_EFI_TEST) += test/ obj-$(CONFIG_EFI_DEV_PATH_PARSER) += dev-path-parser.o obj-$(CONFIG_APPLE_PROPERTIES) += apple-properties.o obj-$(CONFIG_EFI_RCI2_TABLE) += rci2-table.o +fake_map-y += fake_mem.o +fake_map-$(CONFIG_X86) += x86_fake_mem.o + arm-obj-$(CONFIG_EFI) := arm-init.o arm-runtime.o obj-$(CONFIG_ARM) += $(arm-obj-y) obj-$(CONFIG_ARM64) += $(arm-obj-y) diff --git a/drivers/firmware/efi/arm-init.c b/drivers/firmware/efi/arm-init.c index 311cd349a862..904fa09e6a6b 100644 --- a/drivers/firmware/efi/arm-init.c +++ b/drivers/firmware/efi/arm-init.c @@ -164,6 +164,15 @@ static __init int is_usable_memory(efi_memory_desc_t *md) case EFI_CONVENTIONAL_MEMORY: case EFI_PERSISTENT_MEMORY: /* + * Special purpose memory is 'soft reserved', which means it + * is set aside initially, but can be hotplugged back in or + * be assigned to the dax driver after boot. + */ + if (efi_soft_reserve_enabled() && + (md->attribute & EFI_MEMORY_SP)) + return false; + + /* * According to the spec, these regions are no longer reserved * after calling ExitBootServices(). However, we can only use * them as System RAM if they can be mapped writeback cacheable. diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c index e2ac5fa5531b..899b803842bb 100644 --- a/drivers/firmware/efi/arm-runtime.c +++ b/drivers/firmware/efi/arm-runtime.c @@ -121,6 +121,30 @@ static int __init arm_enable_runtime_services(void) return 0; } + if (efi_soft_reserve_enabled()) { + efi_memory_desc_t *md; + + for_each_efi_memory_desc(md) { + int md_size = md->num_pages << EFI_PAGE_SHIFT; + struct resource *res; + + if (!(md->attribute & EFI_MEMORY_SP)) + continue; + + res = kzalloc(sizeof(*res), GFP_KERNEL); + if (WARN_ON(!res)) + break; + + res->start = md->phys_addr; + res->end = md->phys_addr + md_size - 1; + res->name = "Soft Reserved"; + res->flags = IORESOURCE_MEM; + res->desc = IORES_DESC_SOFT_RESERVED; + + insert_resource(&iomem_resource, res); + } + } + if (efi_runtime_disabled()) { pr_info("EFI runtime services will be disabled.\n"); return 0; diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 8a492ce16b3b..d101f072c8f8 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -81,6 +81,11 @@ bool efi_runtime_disabled(void) return disable_runtime; } +bool __pure __efi_soft_reserve_enabled(void) +{ + return !efi_enabled(EFI_MEM_NO_SOFT_RESERVE); +} + static int __init parse_efi_cmdline(char *str) { if (!str) { @@ -94,6 +99,9 @@ static int __init parse_efi_cmdline(char *str) if (parse_option_str(str, "noruntime")) disable_runtime = true; + if (parse_option_str(str, "nosoftreserve")) + set_bit(EFI_MEM_NO_SOFT_RESERVE, &efi.flags); + return 0; } early_param("efi", parse_efi_cmdline); @@ -842,15 +850,16 @@ char * __init efi_md_typeattr_format(char *buf, size_t size, if (attr & ~(EFI_MEMORY_UC | EFI_MEMORY_WC | EFI_MEMORY_WT | EFI_MEMORY_WB | EFI_MEMORY_UCE | EFI_MEMORY_RO | EFI_MEMORY_WP | EFI_MEMORY_RP | EFI_MEMORY_XP | - EFI_MEMORY_NV | + EFI_MEMORY_NV | EFI_MEMORY_SP | EFI_MEMORY_RUNTIME | EFI_MEMORY_MORE_RELIABLE)) snprintf(pos, size, "|attr=0x%016llx]", (unsigned long long)attr); else snprintf(pos, size, - "|%3s|%2s|%2s|%2s|%2s|%2s|%2s|%3s|%2s|%2s|%2s|%2s]", + "|%3s|%2s|%2s|%2s|%2s|%2s|%2s|%2s|%3s|%2s|%2s|%2s|%2s]", attr & EFI_MEMORY_RUNTIME ? "RUN" : "", attr & EFI_MEMORY_MORE_RELIABLE ? "MR" : "", + attr & EFI_MEMORY_SP ? "SP" : "", attr & EFI_MEMORY_NV ? "NV" : "", attr & EFI_MEMORY_XP ? "XP" : "", attr & EFI_MEMORY_RP ? "RP" : "", diff --git a/drivers/firmware/efi/esrt.c b/drivers/firmware/efi/esrt.c index d6dd5f503fa2..2762e0662bf4 100644 --- a/drivers/firmware/efi/esrt.c +++ b/drivers/firmware/efi/esrt.c @@ -246,6 +246,9 @@ void __init efi_esrt_init(void) int rc; phys_addr_t end; + if (!efi_enabled(EFI_MEMMAP)) + return; + pr_debug("esrt-init: loading.\n"); if (!esrt_table_exists()) return; diff --git a/drivers/firmware/efi/fake_mem.c b/drivers/firmware/efi/fake_mem.c index 9501edc0fcfb..bb9fc70d0cfa 100644 --- a/drivers/firmware/efi/fake_mem.c +++ b/drivers/firmware/efi/fake_mem.c @@ -17,12 +17,10 @@ #include <linux/memblock.h> #include <linux/types.h> #include <linux/sort.h> -#include <asm/efi.h> +#include "fake_mem.h" -#define EFI_MAX_FAKEMEM CONFIG_EFI_MAX_FAKE_MEM - -static struct efi_mem_range fake_mems[EFI_MAX_FAKEMEM]; -static int nr_fake_mem; +struct efi_mem_range efi_fake_mems[EFI_MAX_FAKEMEM]; +int nr_fake_mem; static int __init cmp_fake_mem(const void *x1, const void *x2) { @@ -44,13 +42,13 @@ void __init efi_fake_memmap(void) void *new_memmap; int i; - if (!nr_fake_mem) + if (!efi_enabled(EFI_MEMMAP) || !nr_fake_mem) return; /* count up the number of EFI memory descriptor */ for (i = 0; i < nr_fake_mem; i++) { for_each_efi_memory_desc(md) { - struct range *r = &fake_mems[i].range; + struct range *r = &efi_fake_mems[i].range; new_nr_map += efi_memmap_split_count(md, r); } @@ -70,7 +68,7 @@ void __init efi_fake_memmap(void) } for (i = 0; i < nr_fake_mem; i++) - efi_memmap_insert(&efi.memmap, new_memmap, &fake_mems[i]); + efi_memmap_insert(&efi.memmap, new_memmap, &efi_fake_mems[i]); /* swap into new EFI memmap */ early_memunmap(new_memmap, efi.memmap.desc_size * new_nr_map); @@ -104,22 +102,22 @@ static int __init setup_fake_mem(char *p) if (nr_fake_mem >= EFI_MAX_FAKEMEM) break; - fake_mems[nr_fake_mem].range.start = start; - fake_mems[nr_fake_mem].range.end = start + mem_size - 1; - fake_mems[nr_fake_mem].attribute = attribute; + efi_fake_mems[nr_fake_mem].range.start = start; + efi_fake_mems[nr_fake_mem].range.end = start + mem_size - 1; + efi_fake_mems[nr_fake_mem].attribute = attribute; nr_fake_mem++; if (*p == ',') p++; } - sort(fake_mems, nr_fake_mem, sizeof(struct efi_mem_range), + sort(efi_fake_mems, nr_fake_mem, sizeof(struct efi_mem_range), cmp_fake_mem, NULL); for (i = 0; i < nr_fake_mem; i++) pr_info("efi_fake_mem: add attr=0x%016llx to [mem 0x%016llx-0x%016llx]", - fake_mems[i].attribute, fake_mems[i].range.start, - fake_mems[i].range.end); + efi_fake_mems[i].attribute, efi_fake_mems[i].range.start, + efi_fake_mems[i].range.end); return *p == '\0' ? 0 : -EINVAL; } diff --git a/drivers/firmware/efi/fake_mem.h b/drivers/firmware/efi/fake_mem.h new file mode 100644 index 000000000000..d52791af4b18 --- /dev/null +++ b/drivers/firmware/efi/fake_mem.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __EFI_FAKE_MEM_H__ +#define __EFI_FAKE_MEM_H__ +#include <asm/efi.h> + +#define EFI_MAX_FAKEMEM CONFIG_EFI_MAX_FAKE_MEM + +extern struct efi_mem_range efi_fake_mems[EFI_MAX_FAKEMEM]; +extern int nr_fake_mem; +#endif /* __EFI_FAKE_MEM_H__ */ diff --git a/drivers/firmware/efi/libstub/arm32-stub.c b/drivers/firmware/efi/libstub/arm32-stub.c index 41213bf5fcf5..4566640de650 100644 --- a/drivers/firmware/efi/libstub/arm32-stub.c +++ b/drivers/firmware/efi/libstub/arm32-stub.c @@ -146,6 +146,11 @@ static efi_status_t reserve_kernel_base(efi_system_table_t *sys_table_arg, continue; case EFI_CONVENTIONAL_MEMORY: + /* Skip soft reserved conventional memory */ + if (efi_soft_reserve_enabled() && + (desc->attribute & EFI_MEMORY_SP)) + continue; + /* * Reserve the intersection between this entry and the * region. diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index 35dbc2791c97..e02579907f2e 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -32,6 +32,7 @@ static unsigned long __chunk_size = EFI_READ_CHUNK_SIZE; static int __section(.data) __nokaslr; static int __section(.data) __quiet; static int __section(.data) __novamap; +static bool __section(.data) efi_nosoftreserve; int __pure nokaslr(void) { @@ -45,6 +46,10 @@ int __pure novamap(void) { return __novamap; } +bool __pure __efi_soft_reserve_enabled(void) +{ + return !efi_nosoftreserve; +} #define EFI_MMAP_NR_SLACK_SLOTS 8 @@ -211,6 +216,10 @@ again: if (desc->type != EFI_CONVENTIONAL_MEMORY) continue; + if (efi_soft_reserve_enabled() && + (desc->attribute & EFI_MEMORY_SP)) + continue; + if (desc->num_pages < nr_pages) continue; @@ -305,6 +314,10 @@ efi_status_t efi_low_alloc_above(efi_system_table_t *sys_table_arg, if (desc->type != EFI_CONVENTIONAL_MEMORY) continue; + if (efi_soft_reserve_enabled() && + (desc->attribute & EFI_MEMORY_SP)) + continue; + if (desc->num_pages < nr_pages) continue; @@ -484,6 +497,12 @@ efi_status_t efi_parse_options(char const *cmdline) __novamap = 1; } + if (IS_ENABLED(CONFIG_EFI_SOFT_RESERVE) && + !strncmp(str, "nosoftreserve", 7)) { + str += strlen("nosoftreserve"); + efi_nosoftreserve = 1; + } + /* Group words together, delimited by "," */ while (*str && *str != ' ' && *str != ',') str++; diff --git a/drivers/firmware/efi/libstub/random.c b/drivers/firmware/efi/libstub/random.c index b4b1d1dcb5fd..6c188695e730 100644 --- a/drivers/firmware/efi/libstub/random.c +++ b/drivers/firmware/efi/libstub/random.c @@ -46,6 +46,10 @@ static unsigned long get_entry_num_slots(efi_memory_desc_t *md, if (md->type != EFI_CONVENTIONAL_MEMORY) return 0; + if (efi_soft_reserve_enabled() && + (md->attribute & EFI_MEMORY_SP)) + return 0; + region_end = min((u64)ULONG_MAX, md->phys_addr + md->num_pages*EFI_PAGE_SIZE - 1); first_slot = round_up(md->phys_addr, align); diff --git a/drivers/firmware/efi/x86_fake_mem.c b/drivers/firmware/efi/x86_fake_mem.c new file mode 100644 index 000000000000..e5d6d5a1b240 --- /dev/null +++ b/drivers/firmware/efi/x86_fake_mem.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2019 Intel Corporation. All rights reserved. */ +#include <linux/efi.h> +#include <asm/e820/api.h> +#include "fake_mem.h" + +void __init efi_fake_memmap_early(void) +{ + int i; + + /* + * The late efi_fake_mem() call can handle all requests if + * EFI_MEMORY_SP support is disabled. + */ + if (!efi_soft_reserve_enabled()) + return; + + if (!efi_enabled(EFI_MEMMAP) || !nr_fake_mem) + return; + + /* + * Given that efi_fake_memmap() needs to perform memblock + * allocations it needs to run after e820__memblock_setup(). + * However, if efi_fake_mem specifies EFI_MEMORY_SP for a given + * address range that potentially needs to mark the memory as + * reserved prior to e820__memblock_setup(). Update e820 + * directly if EFI_MEMORY_SP is specified for an + * EFI_CONVENTIONAL_MEMORY descriptor. + */ + for (i = 0; i < nr_fake_mem; i++) { + struct efi_mem_range *mem = &efi_fake_mems[i]; + efi_memory_desc_t *md; + u64 m_start, m_end; + + if ((mem->attribute & EFI_MEMORY_SP) == 0) + continue; + + m_start = mem->range.start; + m_end = mem->range.end; + for_each_efi_memory_desc(md) { + u64 start, end; + + if (md->type != EFI_CONVENTIONAL_MEMORY) + continue; + + start = md->phys_addr; + end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1; + + if (m_start <= end && m_end >= start) + /* fake range overlaps descriptor */; + else + continue; + + /* + * Trim the boundary of the e820 update to the + * descriptor in case the fake range overlaps + * !EFI_CONVENTIONAL_MEMORY + */ + start = max(start, m_start); + end = min(end, m_end); + + if (end <= start) + continue; + e820__range_update(start, end - start + 1, E820_TYPE_RAM, + E820_TYPE_SOFT_RESERVED); + e820__update_table(e820_table); + } + } +} diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig index 36af7af6b7cf..b7d1eb38b27d 100644 --- a/drivers/nvdimm/Kconfig +++ b/drivers/nvdimm/Kconfig @@ -4,6 +4,7 @@ menuconfig LIBNVDIMM depends on PHYS_ADDR_T_64BIT depends on HAS_IOMEM depends on BLK_DEV + select MEMREGION help Generic support for non-volatile memory devices including ACPI-6-NFIT defined resources. On platforms that define an diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c index 9204f1e9fd14..e592c4964674 100644 --- a/drivers/nvdimm/core.c +++ b/drivers/nvdimm/core.c @@ -455,7 +455,6 @@ static __exit void libnvdimm_exit(void) nd_region_exit(); nvdimm_exit(); nvdimm_bus_exit(); - nd_region_devs_exit(); nvdimm_devs_exit(); } diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h index 25fa121104d0..aa059439fca0 100644 --- a/drivers/nvdimm/nd-core.h +++ b/drivers/nvdimm/nd-core.h @@ -114,7 +114,6 @@ struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev); int __init nvdimm_bus_init(void); void nvdimm_bus_exit(void); void nvdimm_devs_exit(void); -void nd_region_devs_exit(void); struct nd_region; void nd_region_advance_seeds(struct nd_region *nd_region, struct device *dev); void nd_region_create_ns_seed(struct nd_region *nd_region); diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index ef423ba1a711..fbf34cf688f4 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -3,6 +3,7 @@ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. */ #include <linux/scatterlist.h> +#include <linux/memregion.h> #include <linux/highmem.h> #include <linux/sched.h> #include <linux/slab.h> @@ -19,7 +20,6 @@ */ #include <linux/io-64-nonatomic-hi-lo.h> -static DEFINE_IDA(region_ida); static DEFINE_PER_CPU(int, flush_idx); static int nvdimm_map_flush(struct device *dev, struct nvdimm *nvdimm, int dimm, @@ -133,7 +133,7 @@ static void nd_region_release(struct device *dev) put_device(&nvdimm->dev); } free_percpu(nd_region->lane); - ida_simple_remove(®ion_ida, nd_region->id); + memregion_free(nd_region->id); if (is_nd_blk(dev)) kfree(to_nd_blk_region(dev)); else @@ -985,7 +985,7 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus, if (!region_buf) return NULL; - nd_region->id = ida_simple_get(®ion_ida, 0, 0, GFP_KERNEL); + nd_region->id = memregion_alloc(GFP_KERNEL); if (nd_region->id < 0) goto err_id; @@ -1044,7 +1044,7 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus, return nd_region; err_percpu: - ida_simple_remove(®ion_ida, nd_region->id); + memregion_free(nd_region->id); err_id: kfree(region_buf); return NULL; @@ -1216,8 +1216,3 @@ int nd_region_conflict(struct nd_region *nd_region, resource_size_t start, return device_for_each_child(&nvdimm_bus->dev, &ctx, region_conflict); } - -void __exit nd_region_devs_exit(void) -{ - ida_destroy(®ion_ida); -} |