From 2d2feb6813171c6a0071148239845ee251a7d164 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Mon, 4 May 2020 17:46:54 -0700 Subject: ACPICA: Update version to 20200430 ACPICA commit c00a5cd99fa3fe6cd053a2a1a557e54b4fea26f7 Version 20200430. Link: https://github.com/acpica/acpica/commit/c00a5cd9 Signed-off-by: Bob Moore Signed-off-by: Erik Kaneda Signed-off-by: Rafael J. Wysocki --- include/acpi/acpixf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 49b519f36b69..1dc8d262035b 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -12,7 +12,7 @@ /* Current ACPICA subsystem version in YYYYMMDD format */ -#define ACPI_CA_VERSION 0x20200326 +#define ACPI_CA_VERSION 0x20200430 #include #include -- cgit v1.2.3 From 132565d8ec096a5a043f96092cfa4821d970d268 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 7 May 2020 12:49:16 +0200 Subject: ACPI: utils: Add acpi_evaluate_reg() helper With a recent fix to the pinctrl-cherryview driver we now have 2 drivers open-coding the parameter building / passing for calling _REG on an ACPI handle. Add a helper for this, so that these 2 drivers can be converted to this helper. Suggested-by: Andy Shevchenko Reviewed-by: Andy Shevchenko Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/acpi/utils.c | 25 +++++++++++++++++++++++++ include/acpi/acpi_bus.h | 1 + 2 files changed, 26 insertions(+) (limited to 'include') diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c index 804ac0df58ec..838b719ec7ce 100644 --- a/drivers/acpi/utils.c +++ b/drivers/acpi/utils.c @@ -605,6 +605,31 @@ acpi_status acpi_evaluate_lck(acpi_handle handle, int lock) return status; } +/** + * acpi_evaluate_reg: Evaluate _REG method to register OpRegion presence + * @handle: ACPI device handle + * @space_id: ACPI address space id to register OpRegion presence for + * @function: Parameter to pass to _REG one of ACPI_REG_CONNECT or + * ACPI_REG_DISCONNECT + * + * Evaluate device's _REG method to register OpRegion presence. + */ +acpi_status acpi_evaluate_reg(acpi_handle handle, u8 space_id, u32 function) +{ + struct acpi_object_list arg_list; + union acpi_object params[2]; + + params[0].type = ACPI_TYPE_INTEGER; + params[0].integer.value = space_id; + params[1].type = ACPI_TYPE_INTEGER; + params[1].integer.value = function; + arg_list.count = 2; + arg_list.pointer = params; + + return acpi_evaluate_object(handle, "_REG", &arg_list, NULL); +} +EXPORT_SYMBOL(acpi_evaluate_reg); + /** * acpi_evaluate_dsm - evaluate device's _DSM method * @handle: ACPI device handle diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index a92bea7184a8..5afb6ceb284f 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -44,6 +44,7 @@ acpi_status acpi_execute_simple_method(acpi_handle handle, char *method, u64 arg); acpi_status acpi_evaluate_ej0(acpi_handle handle); acpi_status acpi_evaluate_lck(acpi_handle handle, int lock); +acpi_status acpi_evaluate_reg(acpi_handle handle, u8 space_id, u32 function); bool acpi_ata_match(acpi_handle handle); bool acpi_bay_match(acpi_handle handle); bool acpi_dock_match(acpi_handle handle); -- cgit v1.2.3 From 062022315e8ad9e0628515dfc756ab54b5fdb26b Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 1 May 2020 17:45:41 +0100 Subject: mm/memory-failure: Add memory_failure_queue_kick() The GHES code calls memory_failure_queue() from IRQ context to schedule work on the current CPU so that memory_failure() can sleep. For synchronous memory errors the arch code needs to know any signals that memory_failure() will trigger are pending before it returns to user-space, possibly when exiting from the IRQ. Add a helper to kick the memory failure queue, to ensure the scheduled work has happened. This has to be called from process context, so may have been migrated from the original cpu. Pass the cpu the work was queued on. Change memory_failure_work_func() to permit being called on the 'wrong' cpu. Signed-off-by: James Morse Tested-by: Tyler Baicar Acked-by: Naoya Horiguchi Signed-off-by: Rafael J. Wysocki --- include/linux/mm.h | 1 + mm/memory-failure.c | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 5a323422d783..c606dbbfa5e1 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3012,6 +3012,7 @@ enum mf_flags { }; extern int memory_failure(unsigned long pfn, int flags); extern void memory_failure_queue(unsigned long pfn, int flags); +extern void memory_failure_queue_kick(int cpu); extern int unpoison_memory(unsigned long pfn); extern int get_hwpoison_page(struct page *page); #define put_hwpoison_page(page) put_page(page) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index a96364be8ab4..c4afb407bf0f 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1493,7 +1493,7 @@ static void memory_failure_work_func(struct work_struct *work) unsigned long proc_flags; int gotten; - mf_cpu = this_cpu_ptr(&memory_failure_cpu); + mf_cpu = container_of(work, struct memory_failure_cpu, work); for (;;) { spin_lock_irqsave(&mf_cpu->lock, proc_flags); gotten = kfifo_get(&mf_cpu->fifo, &entry); @@ -1507,6 +1507,19 @@ static void memory_failure_work_func(struct work_struct *work) } } +/* + * Process memory_failure work queued on the specified CPU. + * Used to avoid return-to-userspace racing with the memory_failure workqueue. + */ +void memory_failure_queue_kick(int cpu) +{ + struct memory_failure_cpu *mf_cpu; + + mf_cpu = &per_cpu(memory_failure_cpu, cpu); + cancel_work_sync(&mf_cpu->work); + memory_failure_work_func(&mf_cpu->work); +} + static int __init memory_failure_init(void) { struct memory_failure_cpu *mf_cpu; -- cgit v1.2.3 From 7f17b4a121d0d50eca22cb1edebf0a157f3e43bf Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 1 May 2020 17:45:42 +0100 Subject: ACPI: APEI: Kick the memory_failure() queue for synchronous errors memory_failure() offlines or repairs pages of memory that have been discovered to be corrupt. These may be detected by an external component, (e.g. the memory controller), and notified via an IRQ. In this case the work is queued as not all of memory_failure()s work can happen in IRQ context. If the error was detected as a result of user-space accessing a corrupt memory location the CPU may take an abort instead. On arm64 this is a 'synchronous external abort', and on a firmware first system it is replayed using NOTIFY_SEA. This notification has NMI like properties, (it can interrupt IRQ-masked code), so the memory_failure() work is queued. If we return to user-space before the queued memory_failure() work is processed, we will take the fault again. This loop may cause platform firmware to exceed some threshold and reboot when Linux could have recovered from this error. For NMIlike notifications keep track of whether memory_failure() work was queued, and make task_work pending to flush out the queue. To save memory allocations, the task_work is allocated as part of the ghes_estatus_node, and free()ing it back to the pool is deferred. Signed-off-by: James Morse Tested-by: Tyler Baicar Signed-off-by: Rafael J. Wysocki --- drivers/acpi/apei/ghes.c | 67 ++++++++++++++++++++++++++++++++++++++++-------- include/acpi/ghes.h | 3 +++ 2 files changed, 59 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 24c9642e8fc7..5abca09455ad 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -414,23 +415,46 @@ static void ghes_clear_estatus(struct ghes *ghes, ghes_ack_error(ghes->generic_v2); } -static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int sev) +/* + * Called as task_work before returning to user-space. + * Ensure any queued work has been done before we return to the context that + * triggered the notification. + */ +static void ghes_kick_task_work(struct callback_head *head) +{ + struct acpi_hest_generic_status *estatus; + struct ghes_estatus_node *estatus_node; + u32 node_len; + + estatus_node = container_of(head, struct ghes_estatus_node, task_work); + if (IS_ENABLED(CONFIG_ACPI_APEI_MEMORY_FAILURE)) + memory_failure_queue_kick(estatus_node->task_work_cpu); + + estatus = GHES_ESTATUS_FROM_NODE(estatus_node); + node_len = GHES_ESTATUS_NODE_LEN(cper_estatus_len(estatus)); + gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node, node_len); +} + +static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, + int sev) { -#ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE unsigned long pfn; int flags = -1; int sec_sev = ghes_severity(gdata->error_severity); struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata); + if (!IS_ENABLED(CONFIG_ACPI_APEI_MEMORY_FAILURE)) + return false; + if (!(mem_err->validation_bits & CPER_MEM_VALID_PA)) - return; + return false; pfn = mem_err->physical_addr >> PAGE_SHIFT; if (!pfn_valid(pfn)) { pr_warn_ratelimited(FW_WARN GHES_PFX "Invalid address in generic error data: %#llx\n", mem_err->physical_addr); - return; + return false; } /* iff following two events can be handled properly by now */ @@ -440,9 +464,12 @@ static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE) flags = 0; - if (flags != -1) + if (flags != -1) { memory_failure_queue(pfn, flags); -#endif + return true; + } + + return false; } /* @@ -490,7 +517,7 @@ static void ghes_handle_aer(struct acpi_hest_generic_data *gdata) #endif } -static void ghes_do_proc(struct ghes *ghes, +static bool ghes_do_proc(struct ghes *ghes, const struct acpi_hest_generic_status *estatus) { int sev, sec_sev; @@ -498,6 +525,7 @@ static void ghes_do_proc(struct ghes *ghes, guid_t *sec_type; const guid_t *fru_id = &guid_null; char *fru_text = ""; + bool queued = false; sev = ghes_severity(estatus->error_severity); apei_estatus_for_each_section(estatus, gdata) { @@ -515,7 +543,7 @@ static void ghes_do_proc(struct ghes *ghes, ghes_edac_report_mem_error(sev, mem_err); arch_apei_report_mem_error(sev, mem_err); - ghes_handle_memory_failure(gdata, sev); + queued = ghes_handle_memory_failure(gdata, sev); } else if (guid_equal(sec_type, &CPER_SEC_PCIE)) { ghes_handle_aer(gdata); @@ -532,6 +560,8 @@ static void ghes_do_proc(struct ghes *ghes, gdata->error_data_length); } } + + return queued; } static void __ghes_print_estatus(const char *pfx, @@ -827,7 +857,9 @@ static void ghes_proc_in_irq(struct irq_work *irq_work) struct ghes_estatus_node *estatus_node; struct acpi_hest_generic *generic; struct acpi_hest_generic_status *estatus; + bool task_work_pending; u32 len, node_len; + int ret; llnode = llist_del_all(&ghes_estatus_llist); /* @@ -842,14 +874,26 @@ static void ghes_proc_in_irq(struct irq_work *irq_work) estatus = GHES_ESTATUS_FROM_NODE(estatus_node); len = cper_estatus_len(estatus); node_len = GHES_ESTATUS_NODE_LEN(len); - ghes_do_proc(estatus_node->ghes, estatus); + task_work_pending = ghes_do_proc(estatus_node->ghes, estatus); if (!ghes_estatus_cached(estatus)) { generic = estatus_node->generic; if (ghes_print_estatus(NULL, generic, estatus)) ghes_estatus_cache_add(generic, estatus); } - gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node, - node_len); + + if (task_work_pending && current->mm != &init_mm) { + estatus_node->task_work.func = ghes_kick_task_work; + estatus_node->task_work_cpu = smp_processor_id(); + ret = task_work_add(current, &estatus_node->task_work, + true); + if (ret) + estatus_node->task_work.func = NULL; + } + + if (!estatus_node->task_work.func) + gen_pool_free(ghes_estatus_pool, + (unsigned long)estatus_node, node_len); + llnode = next; } } @@ -909,6 +953,7 @@ static int ghes_in_nmi_queue_one_entry(struct ghes *ghes, estatus_node->ghes = ghes; estatus_node->generic = ghes->generic; + estatus_node->task_work.func = NULL; estatus = GHES_ESTATUS_FROM_NODE(estatus_node); if (__ghes_read_estatus(estatus, buf_paddr, fixmap_idx, len)) { diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h index e3f1cddb4ac8..517a5231cc1b 100644 --- a/include/acpi/ghes.h +++ b/include/acpi/ghes.h @@ -33,6 +33,9 @@ struct ghes_estatus_node { struct llist_node llnode; struct acpi_hest_generic *generic; struct ghes *ghes; + + int task_work_cpu; + struct callback_head task_work; }; struct ghes_estatus_cache { -- cgit v1.2.3