From a814dcc269830c9dbb8a83731cfc6fc5dd787f8d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 1 Oct 2019 17:27:21 +0300 Subject: ACPI / utils: Move acpi_dev_get_first_match_dev() under CONFIG_ACPI We have a stub defined for the acpi_dev_get_first_match_dev() in acpi.h for the case when CONFIG_ACPI=n. Moreover, acpi_dev_put(), counterpart function, is already placed under CONFIG_ACPI. Thus, move acpi_dev_get_first_match_dev() under CONFIG_ACPI as well. Fixes: 817b4d64da03 ("ACPI / utils: Introduce acpi_dev_get_first_match_dev() helper") Reported-by: kbuild test robot Signed-off-by: Andy Shevchenko Reviewed-by: Mika Westerberg Cc: 5.2+ # 5.2+ Signed-off-by: Rafael J. Wysocki --- include/acpi/acpi_bus.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 175f7b40c585..3f6fddeb7519 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -78,9 +78,6 @@ acpi_evaluate_dsm_typed(acpi_handle handle, const guid_t *guid, u64 rev, bool acpi_dev_found(const char *hid); bool acpi_dev_present(const char *hid, const char *uid, s64 hrv); -struct acpi_device * -acpi_dev_get_first_match_dev(const char *hid, const char *uid, s64 hrv); - #ifdef CONFIG_ACPI #include @@ -683,6 +680,9 @@ static inline bool acpi_device_can_poweroff(struct acpi_device *adev) adev->power.states[ACPI_STATE_D3_HOT].flags.explicit_set); } +struct acpi_device * +acpi_dev_get_first_match_dev(const char *hid, const char *uid, s64 hrv); + static inline void acpi_dev_put(struct acpi_device *adev) { put_device(&adev->dev); -- cgit v1.2.3 From 35009c807488ccd5a01cbf102033695e52794b68 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 1 Oct 2019 17:27:22 +0300 Subject: ACPI / utils: Introduce acpi_dev_hid_uid_match() helper There are users outside of ACPI realm which reimplementing the comparator function to check if the given device matches to given HID and UID. For better utilization, introduce a helper for everyone to use. Signed-off-by: Andy Shevchenko Reviewed-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- drivers/acpi/utils.c | 25 +++++++++++++++++++++++++ include/acpi/acpi_bus.h | 2 ++ include/linux/acpi.h | 8 ++++++++ 3 files changed, 35 insertions(+) (limited to 'include') diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c index dbd1c4cfd7d1..804ac0df58ec 100644 --- a/drivers/acpi/utils.c +++ b/drivers/acpi/utils.c @@ -701,6 +701,31 @@ bool acpi_check_dsm(acpi_handle handle, const guid_t *guid, u64 rev, u64 funcs) } EXPORT_SYMBOL(acpi_check_dsm); +/** + * acpi_dev_hid_uid_match - Match device by supplied HID and UID + * @adev: ACPI device to match. + * @hid2: Hardware ID of the device. + * @uid2: Unique ID of the device, pass NULL to not check _UID. + * + * Matches HID and UID in @adev with given @hid2 and @uid2. + * Returns true if matches. + */ +bool acpi_dev_hid_uid_match(struct acpi_device *adev, + const char *hid2, const char *uid2) +{ + const char *hid1 = acpi_device_hid(adev); + const char *uid1 = acpi_device_uid(adev); + + if (strcmp(hid1, hid2)) + return false; + + if (!uid2) + return true; + + return uid1 && !strcmp(uid1, uid2); +} +EXPORT_SYMBOL(acpi_dev_hid_uid_match); + /** * acpi_dev_found - Detect presence of a given ACPI device in the namespace. * @hid: Hardware ID of the device. diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 3f6fddeb7519..0c23fd0548d1 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -680,6 +680,8 @@ static inline bool acpi_device_can_poweroff(struct acpi_device *adev) adev->power.states[ACPI_STATE_D3_HOT].flags.explicit_set); } +bool acpi_dev_hid_uid_match(struct acpi_device *adev, const char *hid2, const char *uid2); + struct acpi_device * acpi_dev_get_first_match_dev(const char *hid, const char *uid, s64 hrv); diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 8b4e516bac00..0f37a7d5fa77 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -678,6 +678,14 @@ static inline bool acpi_dev_present(const char *hid, const char *uid, s64 hrv) return false; } +struct acpi_device; + +static inline bool +acpi_dev_hid_uid_match(struct acpi_device *adev, const char *hid2, const char *uid2) +{ + return false; +} + static inline struct acpi_device * acpi_dev_get_first_match_dev(const char *hid, const char *uid, s64 hrv) { -- cgit v1.2.3 From e346d0cf2c0a2dc9e63d5b90824bbe5ac0cc43e2 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 26 Oct 2019 22:24:36 +0200 Subject: ACPI: button: Remove unused acpi_lid_notifier_[un]register() functions There are no users of the acpi_lid_notifier_[un]register functions, so lets remove them. Reviewed-by: Andy Shevchenko Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/acpi/button.c | 27 +-------------------------- include/acpi/button.h | 12 ------------ 2 files changed, 1 insertion(+), 38 deletions(-) (limited to 'include') diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c index a090e9542d82..d27b01c0323d 100644 --- a/drivers/acpi/button.c +++ b/drivers/acpi/button.c @@ -147,7 +147,6 @@ struct acpi_button { bool suspended; }; -static BLOCKING_NOTIFIER_HEAD(acpi_lid_notifier); static struct acpi_device *lid_device; static long lid_init_state = -1; @@ -177,7 +176,6 @@ static int acpi_lid_evaluate_state(struct acpi_device *device) static int acpi_lid_notify_state(struct acpi_device *device, int state) { struct acpi_button *button = acpi_driver_data(device); - int ret; ktime_t next_report; bool do_update; @@ -254,18 +252,7 @@ static int acpi_lid_notify_state(struct acpi_device *device, int state) button->last_time = ktime_get(); } - ret = blocking_notifier_call_chain(&acpi_lid_notifier, state, device); - if (ret == NOTIFY_DONE) - ret = blocking_notifier_call_chain(&acpi_lid_notifier, state, - device); - if (ret == NOTIFY_DONE || ret == NOTIFY_OK) { - /* - * It is also regarded as success if the notifier_chain - * returns NOTIFY_OK or NOTIFY_DONE. - */ - ret = 0; - } - return ret; + return 0; } static int __maybe_unused acpi_button_state_seq_show(struct seq_file *seq, @@ -362,18 +349,6 @@ static int acpi_button_remove_fs(struct acpi_device *device) /* -------------------------------------------------------------------------- Driver Interface -------------------------------------------------------------------------- */ -int acpi_lid_notifier_register(struct notifier_block *nb) -{ - return blocking_notifier_chain_register(&acpi_lid_notifier, nb); -} -EXPORT_SYMBOL(acpi_lid_notifier_register); - -int acpi_lid_notifier_unregister(struct notifier_block *nb) -{ - return blocking_notifier_chain_unregister(&acpi_lid_notifier, nb); -} -EXPORT_SYMBOL(acpi_lid_notifier_unregister); - int acpi_lid_open(void) { if (!lid_device) diff --git a/include/acpi/button.h b/include/acpi/button.h index 3a2b8535dec6..340da7784cc8 100644 --- a/include/acpi/button.h +++ b/include/acpi/button.h @@ -2,21 +2,9 @@ #ifndef ACPI_BUTTON_H #define ACPI_BUTTON_H -#include - #if IS_ENABLED(CONFIG_ACPI_BUTTON) -extern int acpi_lid_notifier_register(struct notifier_block *nb); -extern int acpi_lid_notifier_unregister(struct notifier_block *nb); extern int acpi_lid_open(void); #else -static inline int acpi_lid_notifier_register(struct notifier_block *nb) -{ - return 0; -} -static inline int acpi_lid_notifier_unregister(struct notifier_block *nb) -{ - return 0; -} static inline int acpi_lid_open(void) { return 1; -- cgit v1.2.3 From 42d939fadbfa96d2983c804bb1980699a6c1f221 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Fri, 25 Oct 2019 14:36:52 -0700 Subject: ACPICA: Add new external interface, acpi_unload_table() ACPICA commit c69369cd9cf0134e1aac516e97d612947daa8dc2 Unload a table via the table_index. Link: https://github.com/acpica/acpica/commit/c69369cd Signed-off-by: Bob Moore Signed-off-by: Erik Schmauss Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/tbxfload.c | 32 ++++++++++++++++++++++++++++++++ include/acpi/acpixf.h | 3 +++ 2 files changed, 35 insertions(+) (limited to 'include') diff --git a/drivers/acpi/acpica/tbxfload.c b/drivers/acpi/acpica/tbxfload.c index 86f1693f6d29..ce86e7945e90 100644 --- a/drivers/acpi/acpica/tbxfload.c +++ b/drivers/acpi/acpica/tbxfload.c @@ -390,3 +390,35 @@ acpi_status acpi_unload_parent_table(acpi_handle object) } ACPI_EXPORT_SYMBOL(acpi_unload_parent_table) +/******************************************************************************* + * + * FUNCTION: acpi_unload_table + * + * PARAMETERS: table_index - Index as returned by acpi_load_table + * + * RETURN: Status + * + * DESCRIPTION: Via the table_index representing an SSDT or OEMx table, unloads + * the table and deletes all namespace objects associated with + * that table. Unloading of the DSDT is not allowed. + * Note: Mainly intended to support hotplug removal of SSDTs. + * + ******************************************************************************/ +acpi_status acpi_unload_table(u32 table_index) +{ + acpi_status status; + + ACPI_FUNCTION_TRACE(acpi_unload_table); + + if (table_index == 1) { + + /* table_index==1 means DSDT is the owner. DSDT cannot be unloaded */ + + return_ACPI_STATUS(AE_TYPE); + } + + status = acpi_tb_unload_table(table_index); + return_ACPI_STATUS(status); +} + +ACPI_EXPORT_SYMBOL(acpi_unload_table) diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index e5e041413581..109b2f14b6c6 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -460,6 +460,9 @@ ACPI_EXTERNAL_RETURN_STATUS(acpi_status ACPI_INIT_FUNCTION ACPI_EXTERNAL_RETURN_STATUS(acpi_status acpi_load_table(struct acpi_table_header *table)) +ACPI_EXTERNAL_RETURN_STATUS(acpi_status + acpi_unload_table(u32 table_index)) + ACPI_EXTERNAL_RETURN_STATUS(acpi_status acpi_unload_parent_table(acpi_handle object)) -- cgit v1.2.3 From 1770093c5bed404ac69b04dc8b9e62a2c4db944a Mon Sep 17 00:00:00 2001 From: Nikolaus Voss Date: Fri, 25 Oct 2019 14:36:53 -0700 Subject: ACPICA: make acpi_load_table() return table index ACPICA commit d1716a829d19be23277d9157c575a03b9abb7457 For unloading an ACPI table, it is necessary to provide the index of the table. The method intended for dynamically loading or hotplug addition of tables, acpi_load_table(), should provide this information via an optional pointer to the loaded table index. This patch fixes the table unload function of acpi_configfs. Reported-by: Andy Shevchenko Fixes: d06c47e3dd07f ("ACPI: configfs: Resolve objects on host-directed table loads") Link: https://github.com/acpica/acpica/commit/d1716a82 Signed-off-by: Nikolaus Voss Signed-off-by: Erik Schmauss Signed-off-by: Bob Moore Tested-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpi_configfs.c | 4 ++-- drivers/acpi/acpica/dbfileio.c | 2 +- drivers/acpi/acpica/tbxfload.c | 8 +++++++- drivers/firmware/efi/efi.c | 2 +- include/acpi/acpixf.h | 3 ++- 5 files changed, 13 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/acpi/acpi_configfs.c b/drivers/acpi/acpi_configfs.c index 57d9d574d4dd..ece8c1a921cc 100644 --- a/drivers/acpi/acpi_configfs.c +++ b/drivers/acpi/acpi_configfs.c @@ -53,7 +53,7 @@ static ssize_t acpi_table_aml_write(struct config_item *cfg, if (!table->header) return -ENOMEM; - ret = acpi_load_table(table->header); + ret = acpi_load_table(table->header, &table->index); if (ret) { kfree(table->header); table->header = NULL; @@ -223,7 +223,7 @@ static void acpi_table_drop_item(struct config_group *group, struct acpi_table *table = container_of(cfg, struct acpi_table, cfg); ACPI_INFO(("Host-directed Dynamic ACPI Table Unload")); - acpi_tb_unload_table(table->index); + acpi_unload_table(table->index); } static struct configfs_group_operations acpi_table_group_ops = { diff --git a/drivers/acpi/acpica/dbfileio.c b/drivers/acpi/acpica/dbfileio.c index c6e25734dc5c..e1b6e54a96ac 100644 --- a/drivers/acpi/acpica/dbfileio.c +++ b/drivers/acpi/acpica/dbfileio.c @@ -93,7 +93,7 @@ acpi_status acpi_db_load_tables(struct acpi_new_table_desc *list_head) while (table_list_head) { table = table_list_head->table; - status = acpi_load_table(table); + status = acpi_load_table(table, NULL); if (ACPI_FAILURE(status)) { if (status == AE_ALREADY_EXISTS) { acpi_os_printf diff --git a/drivers/acpi/acpica/tbxfload.c b/drivers/acpi/acpica/tbxfload.c index ce86e7945e90..0782acf85722 100644 --- a/drivers/acpi/acpica/tbxfload.c +++ b/drivers/acpi/acpica/tbxfload.c @@ -268,6 +268,8 @@ ACPI_EXPORT_SYMBOL_INIT(acpi_install_table) * * PARAMETERS: table - Pointer to a buffer containing the ACPI * table to be loaded. + * table_idx - Pointer to a u32 for storing the table + * index, might be NULL * * RETURN: Status * @@ -278,7 +280,7 @@ ACPI_EXPORT_SYMBOL_INIT(acpi_install_table) * to ensure that the table is not deleted or unmapped. * ******************************************************************************/ -acpi_status acpi_load_table(struct acpi_table_header *table) +acpi_status acpi_load_table(struct acpi_table_header *table, u32 *table_idx) { acpi_status status; u32 table_index; @@ -297,6 +299,10 @@ acpi_status acpi_load_table(struct acpi_table_header *table) status = acpi_tb_install_and_load_table(ACPI_PTR_TO_PHYSADDR(table), ACPI_TABLE_ORIGIN_EXTERNAL_VIRTUAL, FALSE, &table_index); + if (table_idx) { + *table_idx = table_index; + } + if (ACPI_SUCCESS(status)) { /* Complete the initialization/resolution of new objects */ diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 69f00f7453a3..0d65cb21519d 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -296,7 +296,7 @@ static __init int efivar_ssdt_load(void) goto free_data; } - ret = acpi_load_table(data); + ret = acpi_load_table(data, NULL); if (ret) { pr_err("failed to load table: %d\n", ret); goto free_data; diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 109b2f14b6c6..867170049b07 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -458,7 +458,8 @@ ACPI_EXTERNAL_RETURN_STATUS(acpi_status ACPI_INIT_FUNCTION u8 physical)) ACPI_EXTERNAL_RETURN_STATUS(acpi_status - acpi_load_table(struct acpi_table_header *table)) + acpi_load_table(struct acpi_table_header *table, + u32 *table_idx)) ACPI_EXTERNAL_RETURN_STATUS(acpi_status acpi_unload_table(u32 table_index)) -- cgit v1.2.3 From c7ccf10bb92e68d8eb09f1b9635df7fbce0190d9 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Fri, 25 Oct 2019 14:37:00 -0700 Subject: ACPICA: Update version to 20191018 ACPICA commit 3d70fd4894824ed1e685f2d059ca22ccd9ac6163 Version 20191018. Link: https://github.com/acpica/acpica/commit/3d70fd48 Signed-off-by: Bob Moore Signed-off-by: Erik Schmauss Signed-off-by: Rafael J. Wysocki --- include/acpi/acpixf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 867170049b07..18790b9e16b5 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -12,7 +12,7 @@ /* Current ACPICA subsystem version in YYYYMMDD format */ -#define ACPI_CA_VERSION 0x20190816 +#define ACPI_CA_VERSION 0x20191018 #include #include -- cgit v1.2.3 From fe3e5e65c06edb1c56e64e567f053e243142001f Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 6 Nov 2019 17:43:00 -0800 Subject: efi: Enumerate EFI_MEMORY_SP UEFI 2.8 defines an EFI_MEMORY_SP attribute bit to augment the interpretation of the EFI Memory Types as "reserved for a specific purpose". The intent of this bit is to allow the OS to identify precious or scarce memory resources and optionally manage it separately from EfiConventionalMemory. As defined older OSes that do not know about this attribute are permitted to ignore it and the memory will be handled according to the OS default policy for the given memory type. In other words, this "specific purpose" hint is deliberately weaker than EfiReservedMemoryType in that the system continues to operate if the OS takes no action on the attribute. The risk of taking no action is potentially unwanted / unmovable kernel allocations from the designated resource that prevent the full realization of the "specific purpose". For example, consider a system with a high-bandwidth memory pool. Older kernels are permitted to boot and consume that memory as conventional "System-RAM" newer kernels may arrange for that memory to be set aside (soft reserved) by the system administrator for a dedicated high-bandwidth memory aware application to consume. Specifically, this mechanism allows for the elimination of scenarios where platform firmware tries to game OS policy by lying about ACPI SLIT values, i.e. claiming that a precious memory resource has a high distance to trigger the OS to avoid it by default. This reservation hint allows platform-firmware to instead tell the truth about performance characteristics by indicate to OS memory management to put immovable allocations elsewhere. Implement simple detection of the bit for EFI memory table dumps and save the kernel policy for a follow-on change. Reviewed-by: Ard Biesheuvel Reviewed-by: Dave Hansen Signed-off-by: Dan Williams Acked-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki --- drivers/firmware/efi/efi.c | 5 +++-- include/linux/efi.h | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index e98bbf8e56d9..f8f8e273d809 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -842,15 +842,16 @@ char * __init efi_md_typeattr_format(char *buf, size_t size, if (attr & ~(EFI_MEMORY_UC | EFI_MEMORY_WC | EFI_MEMORY_WT | EFI_MEMORY_WB | EFI_MEMORY_UCE | EFI_MEMORY_RO | EFI_MEMORY_WP | EFI_MEMORY_RP | EFI_MEMORY_XP | - EFI_MEMORY_NV | + EFI_MEMORY_NV | EFI_MEMORY_SP | EFI_MEMORY_RUNTIME | EFI_MEMORY_MORE_RELIABLE)) snprintf(pos, size, "|attr=0x%016llx]", (unsigned long long)attr); else snprintf(pos, size, - "|%3s|%2s|%2s|%2s|%2s|%2s|%2s|%3s|%2s|%2s|%2s|%2s]", + "|%3s|%2s|%2s|%2s|%2s|%2s|%2s|%2s|%3s|%2s|%2s|%2s|%2s]", attr & EFI_MEMORY_RUNTIME ? "RUN" : "", attr & EFI_MEMORY_MORE_RELIABLE ? "MR" : "", + attr & EFI_MEMORY_SP ? "SP" : "", attr & EFI_MEMORY_NV ? "NV" : "", attr & EFI_MEMORY_XP ? "XP" : "", attr & EFI_MEMORY_RP ? "RP" : "", diff --git a/include/linux/efi.h b/include/linux/efi.h index d87acf62958e..78c75992b313 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -112,6 +112,7 @@ typedef struct { #define EFI_MEMORY_MORE_RELIABLE \ ((u64)0x0000000000010000ULL) /* higher reliability */ #define EFI_MEMORY_RO ((u64)0x0000000000020000ULL) /* read-only */ +#define EFI_MEMORY_SP ((u64)0x0000000000040000ULL) /* soft reserved */ #define EFI_MEMORY_RUNTIME ((u64)0x8000000000000000ULL) /* range requires runtime mapping */ #define EFI_MEMORY_DESCRIPTOR_VERSION 1 -- cgit v1.2.3 From 6950e31b35fdf4588cbbdec1813091bb02cf8871 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 6 Nov 2019 17:43:05 -0800 Subject: x86/efi: Push EFI_MEMMAP check into leaf routines In preparation for adding another EFI_MEMMAP dependent call that needs to occur before e820__memblock_setup() fixup the existing efi calls to check for EFI_MEMMAP internally. This ends up being cleaner than the alternative of checking EFI_MEMMAP multiple times in setup_arch(). Reviewed-by: Dave Hansen Reviewed-by: Ard Biesheuvel Signed-off-by: Dan Williams Acked-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki --- arch/x86/include/asm/efi.h | 9 ++++++++- arch/x86/kernel/setup.c | 18 ++++++++---------- arch/x86/platform/efi/efi.c | 3 +++ arch/x86/platform/efi/quirks.c | 3 +++ drivers/firmware/efi/esrt.c | 3 +++ drivers/firmware/efi/fake_mem.c | 2 +- include/linux/efi.h | 1 - 7 files changed, 26 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 43a82e59c59d..45f853bce869 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -140,7 +140,6 @@ extern void efi_delete_dummy_variable(void); extern void efi_switch_mm(struct mm_struct *mm); extern void efi_recover_from_page_fault(unsigned long phys_addr); extern void efi_free_boot_services(void); -extern void efi_reserve_boot_services(void); struct efi_setup_data { u64 fw_vendor; @@ -244,6 +243,8 @@ static inline bool efi_is_64bit(void) extern bool efi_reboot_required(void); extern bool efi_is_table_address(unsigned long phys_addr); +extern void efi_find_mirror(void); +extern void efi_reserve_boot_services(void); #else static inline void parse_efi_setup(u64 phys_addr, u32 data_len) {} static inline bool efi_reboot_required(void) @@ -254,6 +255,12 @@ static inline bool efi_is_table_address(unsigned long phys_addr) { return false; } +static inline void efi_find_mirror(void) +{ +} +static inline void efi_reserve_boot_services(void) +{ +} #endif /* CONFIG_EFI */ #endif /* _ASM_X86_EFI_H */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 77ea96b794bd..1c4b866bc184 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1122,17 +1122,15 @@ void __init setup_arch(char **cmdline_p) reserve_bios_regions(); - if (efi_enabled(EFI_MEMMAP)) { - efi_fake_memmap(); - efi_find_mirror(); - efi_esrt_init(); + efi_fake_memmap(); + efi_find_mirror(); + efi_esrt_init(); - /* - * The EFI specification says that boot service code won't be - * called after ExitBootServices(). This is, in fact, a lie. - */ - efi_reserve_boot_services(); - } + /* + * The EFI specification says that boot service code won't be + * called after ExitBootServices(). This is, in fact, a lie. + */ + efi_reserve_boot_services(); /* preallocate 4k for mptable mpc */ e820__memblock_alloc_reserved_mpc_new(); diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 425e025341db..e6e41b118d68 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -128,6 +128,9 @@ void __init efi_find_mirror(void) efi_memory_desc_t *md; u64 mirror_size = 0, total_size = 0; + if (!efi_enabled(EFI_MEMMAP)) + return; + for_each_efi_memory_desc(md) { unsigned long long start = md->phys_addr; unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 3b9fd679cea9..7675cf754d90 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -320,6 +320,9 @@ void __init efi_reserve_boot_services(void) { efi_memory_desc_t *md; + if (!efi_enabled(EFI_MEMMAP)) + return; + for_each_efi_memory_desc(md) { u64 start = md->phys_addr; u64 size = md->num_pages << EFI_PAGE_SHIFT; diff --git a/drivers/firmware/efi/esrt.c b/drivers/firmware/efi/esrt.c index d6dd5f503fa2..2762e0662bf4 100644 --- a/drivers/firmware/efi/esrt.c +++ b/drivers/firmware/efi/esrt.c @@ -246,6 +246,9 @@ void __init efi_esrt_init(void) int rc; phys_addr_t end; + if (!efi_enabled(EFI_MEMMAP)) + return; + pr_debug("esrt-init: loading.\n"); if (!esrt_table_exists()) return; diff --git a/drivers/firmware/efi/fake_mem.c b/drivers/firmware/efi/fake_mem.c index 9501edc0fcfb..526b45331d96 100644 --- a/drivers/firmware/efi/fake_mem.c +++ b/drivers/firmware/efi/fake_mem.c @@ -44,7 +44,7 @@ void __init efi_fake_memmap(void) void *new_memmap; int i; - if (!nr_fake_mem) + if (!efi_enabled(EFI_MEMMAP) || !nr_fake_mem) return; /* count up the number of EFI memory descriptor */ diff --git a/include/linux/efi.h b/include/linux/efi.h index 78c75992b313..44c85b559e15 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1045,7 +1045,6 @@ extern void efi_enter_virtual_mode (void); /* switch EFI to virtual mode, if pos extern efi_status_t efi_query_variable_store(u32 attributes, unsigned long size, bool nonblocking); -extern void efi_find_mirror(void); #else static inline efi_status_t efi_query_variable_store(u32 attributes, -- cgit v1.2.3 From b617c5266eedbef2ccbb90931bb9175faa4ae0bc Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 6 Nov 2019 17:43:11 -0800 Subject: efi: Common enable/disable infrastructure for EFI soft reservation UEFI 2.8 defines an EFI_MEMORY_SP attribute bit to augment the interpretation of the EFI Memory Types as "reserved for a specific purpose". The proposed Linux behavior for specific purpose memory is that it is reserved for direct-access (device-dax) by default and not available for any kernel usage, not even as an OOM fallback. Later, through udev scripts or another init mechanism, these device-dax claimed ranges can be reconfigured and hot-added to the available System-RAM with a unique node identifier. This device-dax management scheme implements "soft" in the "soft reserved" designation by allowing some or all of the reservation to be recovered as typical memory. This policy can be disabled at compile-time with CONFIG_EFI_SOFT_RESERVE=n, or runtime with efi=nosoftreserve. As for this patch, define the common helpers to determine if the EFI_MEMORY_SP attribute should be honored. The determination needs to be made early to prevent the kernel from being loaded into soft-reserved memory, or otherwise allowing early allocations to land there. Follow-on changes are needed per architecture to leverage these helpers in their respective mem-init paths. Reviewed-by: Ard Biesheuvel Signed-off-by: Dan Williams Acked-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki --- Documentation/admin-guide/kernel-parameters.txt | 9 ++++++++- drivers/firmware/efi/Kconfig | 21 +++++++++++++++++++++ drivers/firmware/efi/efi.c | 8 ++++++++ drivers/firmware/efi/libstub/efi-stub-helper.c | 19 +++++++++++++++++++ include/linux/efi.h | 14 ++++++++++++++ 5 files changed, 70 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index a84a83f8881e..2359dc56d82c 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1168,7 +1168,8 @@ Format: {"off" | "on" | "skip[mbr]"} efi= [EFI] - Format: { "old_map", "nochunk", "noruntime", "debug" } + Format: { "old_map", "nochunk", "noruntime", "debug", + "nosoftreserve" } old_map [X86-64]: switch to the old ioremap-based EFI runtime services mapping. 32-bit still uses this one by default. @@ -1177,6 +1178,12 @@ firmware implementations. noruntime : disable EFI runtime services support debug: enable misc debug output + nosoftreserve: The EFI_MEMORY_SP (Specific Purpose) + attribute may cause the kernel to reserve the + memory range for a memory mapping driver to + claim. Specify efi=nosoftreserve to disable this + reservation and treat the memory by its base type + (i.e. EFI_CONVENTIONAL_MEMORY / "System RAM"). efi_no_storage_paranoia [EFI; X86] Using this parameter you can use more than 50% of diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig index b248870a9806..bcc378c19ebe 100644 --- a/drivers/firmware/efi/Kconfig +++ b/drivers/firmware/efi/Kconfig @@ -75,6 +75,27 @@ config EFI_MAX_FAKE_MEM Ranges can be set up to this value using comma-separated list. The default value is 8. +config EFI_SOFT_RESERVE + bool "Reserve EFI Specific Purpose Memory" + depends on EFI && EFI_STUB && ACPI_HMAT + default ACPI_HMAT + help + On systems that have mixed performance classes of memory EFI + may indicate specific purpose memory with an attribute (See + EFI_MEMORY_SP in UEFI 2.8). A memory range tagged with this + attribute may have unique performance characteristics compared + to the system's general purpose "System RAM" pool. On the + expectation that such memory has application specific usage, + and its base EFI memory type is "conventional" answer Y to + arrange for the kernel to reserve it as a "Soft Reserved" + resource, and set aside for direct-access (device-dax) by + default. The memory range can later be optionally assigned to + the page allocator by system administrator policy via the + device-dax kmem facility. Say N to have the kernel treat this + memory as "System RAM" by default. + + If unsure, say Y. + config EFI_PARAMS_FROM_FDT bool help diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index f8f8e273d809..e1cb915b45c6 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -81,6 +81,11 @@ bool efi_runtime_disabled(void) return disable_runtime; } +bool __pure __efi_soft_reserve_enabled(void) +{ + return !efi_enabled(EFI_MEM_NO_SOFT_RESERVE); +} + static int __init parse_efi_cmdline(char *str) { if (!str) { @@ -94,6 +99,9 @@ static int __init parse_efi_cmdline(char *str) if (parse_option_str(str, "noruntime")) disable_runtime = true; + if (parse_option_str(str, "nosoftreserve")) + set_bit(EFI_MEM_NO_SOFT_RESERVE, &efi.flags); + return 0; } early_param("efi", parse_efi_cmdline); diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index 35dbc2791c97..e02579907f2e 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -32,6 +32,7 @@ static unsigned long __chunk_size = EFI_READ_CHUNK_SIZE; static int __section(.data) __nokaslr; static int __section(.data) __quiet; static int __section(.data) __novamap; +static bool __section(.data) efi_nosoftreserve; int __pure nokaslr(void) { @@ -45,6 +46,10 @@ int __pure novamap(void) { return __novamap; } +bool __pure __efi_soft_reserve_enabled(void) +{ + return !efi_nosoftreserve; +} #define EFI_MMAP_NR_SLACK_SLOTS 8 @@ -211,6 +216,10 @@ again: if (desc->type != EFI_CONVENTIONAL_MEMORY) continue; + if (efi_soft_reserve_enabled() && + (desc->attribute & EFI_MEMORY_SP)) + continue; + if (desc->num_pages < nr_pages) continue; @@ -305,6 +314,10 @@ efi_status_t efi_low_alloc_above(efi_system_table_t *sys_table_arg, if (desc->type != EFI_CONVENTIONAL_MEMORY) continue; + if (efi_soft_reserve_enabled() && + (desc->attribute & EFI_MEMORY_SP)) + continue; + if (desc->num_pages < nr_pages) continue; @@ -484,6 +497,12 @@ efi_status_t efi_parse_options(char const *cmdline) __novamap = 1; } + if (IS_ENABLED(CONFIG_EFI_SOFT_RESERVE) && + !strncmp(str, "nosoftreserve", 7)) { + str += strlen("nosoftreserve"); + efi_nosoftreserve = 1; + } + /* Group words together, delimited by "," */ while (*str && *str != ' ' && *str != ',') str++; diff --git a/include/linux/efi.h b/include/linux/efi.h index 44c85b559e15..88654910ce29 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1202,6 +1202,7 @@ extern int __init efi_setup_pcdp_console(char *); #define EFI_DBG 8 /* Print additional debug info at runtime */ #define EFI_NX_PE_DATA 9 /* Can runtime data regions be mapped non-executable? */ #define EFI_MEM_ATTR 10 /* Did firmware publish an EFI_MEMORY_ATTRIBUTES table? */ +#define EFI_MEM_NO_SOFT_RESERVE 11 /* Is the kernel configured to ignore soft reservations? */ #ifdef CONFIG_EFI /* @@ -1212,6 +1213,14 @@ static inline bool efi_enabled(int feature) return test_bit(feature, &efi.flags) != 0; } extern void efi_reboot(enum reboot_mode reboot_mode, const char *__unused); + +bool __pure __efi_soft_reserve_enabled(void); + +static inline bool __pure efi_soft_reserve_enabled(void) +{ + return IS_ENABLED(CONFIG_EFI_SOFT_RESERVE) + && __efi_soft_reserve_enabled(); +} #else static inline bool efi_enabled(int feature) { @@ -1225,6 +1234,11 @@ efi_capsule_pending(int *reset_type) { return false; } + +static inline bool efi_soft_reserve_enabled(void) +{ + return false; +} #endif extern int efi_status_to_err(efi_status_t status); -- cgit v1.2.3 From 262b45ae3ab4bf8e2caf1fcfd0d8307897519630 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 6 Nov 2019 17:43:16 -0800 Subject: x86/efi: EFI soft reservation to E820 enumeration UEFI 2.8 defines an EFI_MEMORY_SP attribute bit to augment the interpretation of the EFI Memory Types as "reserved for a specific purpose". The proposed Linux behavior for specific purpose memory is that it is reserved for direct-access (device-dax) by default and not available for any kernel usage, not even as an OOM fallback. Later, through udev scripts or another init mechanism, these device-dax claimed ranges can be reconfigured and hot-added to the available System-RAM with a unique node identifier. This device-dax management scheme implements "soft" in the "soft reserved" designation by allowing some or all of the reservation to be recovered as typical memory. This policy can be disabled at compile-time with CONFIG_EFI_SOFT_RESERVE=n, or runtime with efi=nosoftreserve. This patch introduces 2 new concepts at once given the entanglement between early boot enumeration relative to memory that can optionally be reserved from the kernel page allocator by default. The new concepts are: - E820_TYPE_SOFT_RESERVED: Upon detecting the EFI_MEMORY_SP attribute on EFI_CONVENTIONAL memory, update the E820 map with this new type. Only perform this classification if the CONFIG_EFI_SOFT_RESERVE=y policy is enabled, otherwise treat it as typical ram. - IORES_DESC_SOFT_RESERVED: Add a new I/O resource descriptor for a device driver to search iomem resources for application specific memory. Teach the iomem code to identify such ranges as "Soft Reserved". Note that the comment for do_add_efi_memmap() needed refreshing since it seemed to imply that the efi map might overflow the e820 table, but that is not an issue as of commit 7b6e4ba3cb1f "x86/boot/e820: Clean up the E820_X_MAX definition" that removed the 128 entry limit for e820__range_add(). A follow-on change integrates parsing of the ACPI HMAT to identify the node and sub-range boundaries of EFI_MEMORY_SP designated memory. For now, just identify and reserve memory of this type. Acked-by: Ard Biesheuvel Reported-by: kbuild test robot Reviewed-by: Dave Hansen Signed-off-by: Dan Williams Acked-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki --- arch/x86/boot/compressed/eboot.c | 6 ++++- arch/x86/boot/compressed/kaslr.c | 4 ++++ arch/x86/include/asm/e820/types.h | 8 +++++++ arch/x86/kernel/e820.c | 12 ++++++++-- arch/x86/platform/efi/efi.c | 49 +++++++++++++++++++++++++++++++++++---- include/linux/ioport.h | 1 + 6 files changed, 73 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 82bc60c8acb2..f2db8c5e4b06 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -554,7 +554,11 @@ setup_e820(struct boot_params *params, struct setup_data *e820ext, u32 e820ext_s case EFI_BOOT_SERVICES_CODE: case EFI_BOOT_SERVICES_DATA: case EFI_CONVENTIONAL_MEMORY: - e820_type = E820_TYPE_RAM; + if (efi_soft_reserve_enabled() && + (d->attribute & EFI_MEMORY_SP)) + e820_type = E820_TYPE_SOFT_RESERVED; + else + e820_type = E820_TYPE_RAM; break; case EFI_ACPI_MEMORY_NVS: diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index 2e53c056ba20..ff6fa81949cd 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -760,6 +760,10 @@ process_efi_entries(unsigned long minimum, unsigned long image_size) if (md->type != EFI_CONVENTIONAL_MEMORY) continue; + if (efi_soft_reserve_enabled() && + (md->attribute & EFI_MEMORY_SP)) + continue; + if (efi_mirror_found && !(md->attribute & EFI_MEMORY_MORE_RELIABLE)) continue; diff --git a/arch/x86/include/asm/e820/types.h b/arch/x86/include/asm/e820/types.h index c3aa4b5e49e2..314f75d886d0 100644 --- a/arch/x86/include/asm/e820/types.h +++ b/arch/x86/include/asm/e820/types.h @@ -28,6 +28,14 @@ enum e820_type { */ E820_TYPE_PRAM = 12, + /* + * Special-purpose memory is indicated to the system via the + * EFI_MEMORY_SP attribute. Define an e820 translation of this + * memory type for the purpose of reserving this range and + * marking it with the IORES_DESC_SOFT_RESERVED designation. + */ + E820_TYPE_SOFT_RESERVED = 0xefffffff, + /* * Reserved RAM used by the kernel itself if * CONFIG_INTEL_TXT=y is enabled, memory of this type diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 7da2bcd2b8eb..9976106b57ec 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -190,6 +190,7 @@ static void __init e820_print_type(enum e820_type type) case E820_TYPE_RAM: /* Fall through: */ case E820_TYPE_RESERVED_KERN: pr_cont("usable"); break; case E820_TYPE_RESERVED: pr_cont("reserved"); break; + case E820_TYPE_SOFT_RESERVED: pr_cont("soft reserved"); break; case E820_TYPE_ACPI: pr_cont("ACPI data"); break; case E820_TYPE_NVS: pr_cont("ACPI NVS"); break; case E820_TYPE_UNUSABLE: pr_cont("unusable"); break; @@ -1037,6 +1038,7 @@ static const char *__init e820_type_to_string(struct e820_entry *entry) case E820_TYPE_PRAM: return "Persistent Memory (legacy)"; case E820_TYPE_PMEM: return "Persistent Memory"; case E820_TYPE_RESERVED: return "Reserved"; + case E820_TYPE_SOFT_RESERVED: return "Soft Reserved"; default: return "Unknown E820 type"; } } @@ -1052,6 +1054,7 @@ static unsigned long __init e820_type_to_iomem_type(struct e820_entry *entry) case E820_TYPE_PRAM: /* Fall-through: */ case E820_TYPE_PMEM: /* Fall-through: */ case E820_TYPE_RESERVED: /* Fall-through: */ + case E820_TYPE_SOFT_RESERVED: /* Fall-through: */ default: return IORESOURCE_MEM; } } @@ -1064,6 +1067,7 @@ static unsigned long __init e820_type_to_iores_desc(struct e820_entry *entry) case E820_TYPE_PMEM: return IORES_DESC_PERSISTENT_MEMORY; case E820_TYPE_PRAM: return IORES_DESC_PERSISTENT_MEMORY_LEGACY; case E820_TYPE_RESERVED: return IORES_DESC_RESERVED; + case E820_TYPE_SOFT_RESERVED: return IORES_DESC_SOFT_RESERVED; case E820_TYPE_RESERVED_KERN: /* Fall-through: */ case E820_TYPE_RAM: /* Fall-through: */ case E820_TYPE_UNUSABLE: /* Fall-through: */ @@ -1078,11 +1082,12 @@ static bool __init do_mark_busy(enum e820_type type, struct resource *res) return true; /* - * Treat persistent memory like device memory, i.e. reserve it - * for exclusive use of a driver + * Treat persistent memory and other special memory ranges like + * device memory, i.e. reserve it for exclusive use of a driver */ switch (type) { case E820_TYPE_RESERVED: + case E820_TYPE_SOFT_RESERVED: case E820_TYPE_PRAM: case E820_TYPE_PMEM: return false; @@ -1285,6 +1290,9 @@ void __init e820__memblock_setup(void) if (end != (resource_size_t)end) continue; + if (entry->type == E820_TYPE_SOFT_RESERVED) + memblock_reserve(entry->addr, entry->size); + if (entry->type != E820_TYPE_RAM && entry->type != E820_TYPE_RESERVED_KERN) continue; diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index e6e41b118d68..8609dccea096 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -148,14 +148,18 @@ void __init efi_find_mirror(void) /* * Tell the kernel about the EFI memory map. This might include - * more than the max 128 entries that can fit in the e820 legacy - * (zeropage) memory map. + * more than the max 128 entries that can fit in the passed in e820 + * legacy (zeropage) memory map, but the kernel's e820 table can hold + * E820_MAX_ENTRIES. */ static void __init do_add_efi_memmap(void) { efi_memory_desc_t *md; + if (!efi_enabled(EFI_MEMMAP)) + return; + for_each_efi_memory_desc(md) { unsigned long long start = md->phys_addr; unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; @@ -167,7 +171,10 @@ static void __init do_add_efi_memmap(void) case EFI_BOOT_SERVICES_CODE: case EFI_BOOT_SERVICES_DATA: case EFI_CONVENTIONAL_MEMORY: - if (md->attribute & EFI_MEMORY_WB) + if (efi_soft_reserve_enabled() + && (md->attribute & EFI_MEMORY_SP)) + e820_type = E820_TYPE_SOFT_RESERVED; + else if (md->attribute & EFI_MEMORY_WB) e820_type = E820_TYPE_RAM; else e820_type = E820_TYPE_RESERVED; @@ -193,11 +200,36 @@ static void __init do_add_efi_memmap(void) e820_type = E820_TYPE_RESERVED; break; } + e820__range_add(start, size, e820_type); } e820__update_table(e820_table); } +/* + * Given add_efi_memmap defaults to 0 and there there is no alternative + * e820 mechanism for soft-reserved memory, import the full EFI memory + * map if soft reservations are present and enabled. Otherwise, the + * mechanism to disable the kernel's consideration of EFI_MEMORY_SP is + * the efi=nosoftreserve option. + */ +static bool do_efi_soft_reserve(void) +{ + efi_memory_desc_t *md; + + if (!efi_enabled(EFI_MEMMAP)) + return false; + + if (!efi_soft_reserve_enabled()) + return false; + + for_each_efi_memory_desc(md) + if (md->type == EFI_CONVENTIONAL_MEMORY && + (md->attribute & EFI_MEMORY_SP)) + return true; + return false; +} + int __init efi_memblock_x86_reserve_range(void) { struct efi_info *e = &boot_params.efi_info; @@ -227,7 +259,7 @@ int __init efi_memblock_x86_reserve_range(void) if (rv) return rv; - if (add_efi_memmap) + if (add_efi_memmap || do_efi_soft_reserve()) do_add_efi_memmap(); WARN(efi.memmap.desc_version != 1, @@ -781,6 +813,15 @@ static bool should_map_region(efi_memory_desc_t *md) if (IS_ENABLED(CONFIG_X86_32)) return false; + /* + * EFI specific purpose memory may be reserved by default + * depending on kernel config and boot options. + */ + if (md->type == EFI_CONVENTIONAL_MEMORY && + efi_soft_reserve_enabled() && + (md->attribute & EFI_MEMORY_SP)) + return false; + /* * Map all of RAM so that we can access arguments in the 1:1 * mapping when making EFI runtime calls. diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 7bddddfc76d6..a9b9170b5dd2 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -134,6 +134,7 @@ enum { IORES_DESC_PERSISTENT_MEMORY_LEGACY = 5, IORES_DESC_DEVICE_PRIVATE_MEMORY = 6, IORES_DESC_RESERVED = 7, + IORES_DESC_SOFT_RESERVED = 8, }; /* -- cgit v1.2.3 From 33dd70752cd76f4d883a165a674f13121a4155ed Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 6 Nov 2019 17:43:31 -0800 Subject: lib: Uplevel the pmem "region" ida to a global allocator In preparation for handling platform differentiated memory types beyond persistent memory, uplevel the "region" identifier to a global number space. This enables a device-dax instance to be registered to any memory type with guaranteed unique names. Signed-off-by: Dan Williams Acked-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki --- drivers/nvdimm/Kconfig | 1 + drivers/nvdimm/core.c | 1 - drivers/nvdimm/nd-core.h | 1 - drivers/nvdimm/region_devs.c | 13 ++++--------- include/linux/memregion.h | 19 +++++++++++++++++++ lib/Kconfig | 3 +++ lib/Makefile | 1 + lib/memregion.c | 18 ++++++++++++++++++ 8 files changed, 46 insertions(+), 11 deletions(-) create mode 100644 include/linux/memregion.h create mode 100644 lib/memregion.c (limited to 'include') diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig index 36af7af6b7cf..b7d1eb38b27d 100644 --- a/drivers/nvdimm/Kconfig +++ b/drivers/nvdimm/Kconfig @@ -4,6 +4,7 @@ menuconfig LIBNVDIMM depends on PHYS_ADDR_T_64BIT depends on HAS_IOMEM depends on BLK_DEV + select MEMREGION help Generic support for non-volatile memory devices including ACPI-6-NFIT defined resources. On platforms that define an diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c index 9204f1e9fd14..e592c4964674 100644 --- a/drivers/nvdimm/core.c +++ b/drivers/nvdimm/core.c @@ -455,7 +455,6 @@ static __exit void libnvdimm_exit(void) nd_region_exit(); nvdimm_exit(); nvdimm_bus_exit(); - nd_region_devs_exit(); nvdimm_devs_exit(); } diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h index 25fa121104d0..aa059439fca0 100644 --- a/drivers/nvdimm/nd-core.h +++ b/drivers/nvdimm/nd-core.h @@ -114,7 +114,6 @@ struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev); int __init nvdimm_bus_init(void); void nvdimm_bus_exit(void); void nvdimm_devs_exit(void); -void nd_region_devs_exit(void); struct nd_region; void nd_region_advance_seeds(struct nd_region *nd_region, struct device *dev); void nd_region_create_ns_seed(struct nd_region *nd_region); diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index ef423ba1a711..fbf34cf688f4 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -3,6 +3,7 @@ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. */ #include +#include #include #include #include @@ -19,7 +20,6 @@ */ #include -static DEFINE_IDA(region_ida); static DEFINE_PER_CPU(int, flush_idx); static int nvdimm_map_flush(struct device *dev, struct nvdimm *nvdimm, int dimm, @@ -133,7 +133,7 @@ static void nd_region_release(struct device *dev) put_device(&nvdimm->dev); } free_percpu(nd_region->lane); - ida_simple_remove(®ion_ida, nd_region->id); + memregion_free(nd_region->id); if (is_nd_blk(dev)) kfree(to_nd_blk_region(dev)); else @@ -985,7 +985,7 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus, if (!region_buf) return NULL; - nd_region->id = ida_simple_get(®ion_ida, 0, 0, GFP_KERNEL); + nd_region->id = memregion_alloc(GFP_KERNEL); if (nd_region->id < 0) goto err_id; @@ -1044,7 +1044,7 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus, return nd_region; err_percpu: - ida_simple_remove(®ion_ida, nd_region->id); + memregion_free(nd_region->id); err_id: kfree(region_buf); return NULL; @@ -1216,8 +1216,3 @@ int nd_region_conflict(struct nd_region *nd_region, resource_size_t start, return device_for_each_child(&nvdimm_bus->dev, &ctx, region_conflict); } - -void __exit nd_region_devs_exit(void) -{ - ida_destroy(®ion_ida); -} diff --git a/include/linux/memregion.h b/include/linux/memregion.h new file mode 100644 index 000000000000..7de7c0a1444e --- /dev/null +++ b/include/linux/memregion.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _MEMREGION_H_ +#define _MEMREGION_H_ +#include +#include + +#ifdef CONFIG_MEMREGION +int memregion_alloc(gfp_t gfp); +void memregion_free(int id); +#else +static inline int memregion_alloc(gfp_t gfp) +{ + return -ENOMEM; +} +void memregion_free(int id) +{ +} +#endif +#endif /* _MEMREGION_H_ */ diff --git a/lib/Kconfig b/lib/Kconfig index 183f92a297ca..0dc043ac271d 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -606,6 +606,9 @@ config ARCH_NO_SG_CHAIN config ARCH_HAS_PMEM_API bool +config MEMREGION + bool + # use memcpy to implement user copies for nommu architectures config UACCESS_MEMCPY bool diff --git a/lib/Makefile b/lib/Makefile index c5892807e06f..2fb7b47018f1 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -212,6 +212,7 @@ obj-$(CONFIG_GENERIC_NET_UTILS) += net_utils.o obj-$(CONFIG_SG_SPLIT) += sg_split.o obj-$(CONFIG_SG_POOL) += sg_pool.o +obj-$(CONFIG_MEMREGION) += memregion.o obj-$(CONFIG_STMP_DEVICE) += stmp_device.o obj-$(CONFIG_IRQ_POLL) += irq_poll.o diff --git a/lib/memregion.c b/lib/memregion.c new file mode 100644 index 000000000000..77c85b5251da --- /dev/null +++ b/lib/memregion.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* identifiers for device / performance-differentiated memory regions */ +#include +#include + +static DEFINE_IDA(memregion_ids); + +int memregion_alloc(gfp_t gfp) +{ + return ida_alloc(&memregion_ids, gfp); +} +EXPORT_SYMBOL(memregion_alloc); + +void memregion_free(int id) +{ + ida_free(&memregion_ids, id); +} +EXPORT_SYMBOL(memregion_free); -- cgit v1.2.3 From a6c7f4c6aea5f4ca6056b06cec7ebd79f8c23e33 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 6 Nov 2019 17:43:43 -0800 Subject: device-dax: Add a driver for "hmem" devices Platform firmware like EFI/ACPI may publish "hmem" platform devices. Such a device is a performance differentiated memory range likely reserved for an application specific use case. The driver gives access to 100% of the capacity via a device-dax mmap instance by default. However, if over-subscription and other kernel memory management is desired the resulting dax device can be assigned to the core-mm via the kmem driver. This consumes "hmem" devices the producer of "hmem" devices is saved for a follow-on patch so that it can reference the new CONFIG_DEV_DAX_HMEM symbol to gate performing the enumeration work. Reported-by: kbuild test robot Reviewed-by: Dave Hansen Signed-off-by: Dan Williams Acked-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki --- drivers/dax/Kconfig | 27 ++++++++++++++++++----- drivers/dax/Makefile | 2 ++ drivers/dax/hmem.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/memregion.h | 4 ++++ 4 files changed, 84 insertions(+), 5 deletions(-) create mode 100644 drivers/dax/hmem.c (limited to 'include') diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig index f33c73e4af41..3b6c06f07326 100644 --- a/drivers/dax/Kconfig +++ b/drivers/dax/Kconfig @@ -32,19 +32,36 @@ config DEV_DAX_PMEM Say M if unsure +config DEV_DAX_HMEM + tristate "HMEM DAX: direct access to 'specific purpose' memory" + depends on EFI_SOFT_RESERVE + default DEV_DAX + help + EFI 2.8 platforms, and others, may advertise 'specific purpose' + memory. For example, a high bandwidth memory pool. The + indication from platform firmware is meant to reserve the + memory from typical usage by default. This driver creates + device-dax instances for these memory ranges, and that also + enables the possibility to assign them to the DEV_DAX_KMEM + driver to override the reservation and add them to kernel + "System RAM" pool. + + Say M if unsure. + config DEV_DAX_KMEM tristate "KMEM DAX: volatile-use of persistent memory" default DEV_DAX depends on DEV_DAX depends on MEMORY_HOTPLUG # for add_memory() and friends help - Support access to persistent memory as if it were RAM. This - allows easier use of persistent memory by unmodified - applications. + Support access to persistent, or other performance + differentiated memory as if it were System RAM. This allows + easier use of persistent memory by unmodified applications, or + adds core kernel memory services to heterogeneous memory types + (HMEM) marked "reserved" by platform firmware. To use this feature, a DAX device must be unbound from the - device_dax driver (PMEM DAX) and bound to this kmem driver - on each boot. + device_dax driver and bound to this kmem driver on each boot. Say N if unsure. diff --git a/drivers/dax/Makefile b/drivers/dax/Makefile index 81f7d54dadfb..80065b38b3c4 100644 --- a/drivers/dax/Makefile +++ b/drivers/dax/Makefile @@ -2,9 +2,11 @@ obj-$(CONFIG_DAX) += dax.o obj-$(CONFIG_DEV_DAX) += device_dax.o obj-$(CONFIG_DEV_DAX_KMEM) += kmem.o +obj-$(CONFIG_DEV_DAX_HMEM) += dax_hmem.o dax-y := super.o dax-y += bus.o device_dax-y := device.o +dax_hmem-y := hmem.o obj-y += pmem/ diff --git a/drivers/dax/hmem.c b/drivers/dax/hmem.c new file mode 100644 index 000000000000..fe7214daf62e --- /dev/null +++ b/drivers/dax/hmem.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include "bus.h" + +static int dax_hmem_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct dev_pagemap pgmap = { }; + struct dax_region *dax_region; + struct memregion_info *mri; + struct dev_dax *dev_dax; + struct resource *res; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -ENOMEM; + + mri = dev->platform_data; + memcpy(&pgmap.res, res, sizeof(*res)); + + dax_region = alloc_dax_region(dev, pdev->id, res, mri->target_node, + PMD_SIZE, PFN_DEV|PFN_MAP); + if (!dax_region) + return -ENOMEM; + + dev_dax = devm_create_dev_dax(dax_region, 0, &pgmap); + if (IS_ERR(dev_dax)) + return PTR_ERR(dev_dax); + + /* child dev_dax instances now own the lifetime of the dax_region */ + dax_region_put(dax_region); + return 0; +} + +static int dax_hmem_remove(struct platform_device *pdev) +{ + /* devm handles teardown */ + return 0; +} + +static struct platform_driver dax_hmem_driver = { + .probe = dax_hmem_probe, + .remove = dax_hmem_remove, + .driver = { + .name = "hmem", + }, +}; + +module_platform_driver(dax_hmem_driver); + +MODULE_ALIAS("platform:hmem*"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Intel Corporation"); diff --git a/include/linux/memregion.h b/include/linux/memregion.h index 7de7c0a1444e..e11595256cac 100644 --- a/include/linux/memregion.h +++ b/include/linux/memregion.h @@ -4,6 +4,10 @@ #include #include +struct memregion_info { + int target_node; +}; + #ifdef CONFIG_MEMREGION int memregion_alloc(gfp_t gfp); void memregion_free(int id); -- cgit v1.2.3