powerpc/eeh: Defer printing stack trace

Currently we print a stack trace in the event handler to help with debugging EEH issues. In the case of suprise hot-unplug this is unneeded, so we want to prevent printing the stack trace unless we know it's due to an actual device error. To accomplish this, we can save a stack trace at the point of detection and only print it once the EEH recovery handler has determined the freeze was due to an actual error. Since the whole point of this is to prevent spurious EEH output we also move a few prints out of the detection thread, or mark them as pr_debug so anyone interested can get output from the eeh_check_dev_failure() if they want. Signed-off-by: Oliver O'Halloran <oohall@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20190903101605.2890-6-oohall@gmail.com
author: Oliver O'Halloran <oohall@gmail.com> 2019-09-03 20:15:56 +1000
committer: Michael Ellerman <mpe@ellerman.id.au> 2019-09-05 14:22:38 +1000
commit: 25baf3d81614b0b8ca8958f4d6f111ccaaaad578 (patch)
tree: 6c8e859c08e05b14b1c564282563f95b91da2d86 /arch/powerpc/kernel/eeh_driver.c
parent: b104af5a7687060792ca398bb86b033057afce75 (diff)
download: linux-25baf3d81614b0b8ca8958f4d6f111ccaaaad578.tar.bz2
1 files changed, 37 insertions, 1 deletions
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 52ce7584af43..0d34cc12c529 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -863,8 +863,44 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
 			if (eeh_slot_presence_check(edev->pdev))
 				devices++;
 
-	if (!devices)
+	if (!devices) {
+		pr_debug("EEH: Frozen PHB#%x-PE#%x is empty!\n",
+			pe->phb->global_number, pe->addr);
 		goto out; /* nothing to recover */
+	}
+
+	/* Log the event */
+	if (pe->type & EEH_PE_PHB) {
+		pr_err("EEH: PHB#%x failure detected, location: %s\n",
+			pe->phb->global_number, eeh_pe_loc_get(pe));
+	} else {
+		struct eeh_pe *phb_pe = eeh_phb_pe_get(pe->phb);
+
+		pr_err("EEH: Frozen PHB#%x-PE#%x detected\n",
+		       pe->phb->global_number, pe->addr);
+		pr_err("EEH: PE location: %s, PHB location: %s\n",
+		       eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe));
+	}
+
+	/*
+	 * Print the saved stack trace now that we've verified there's
+	 * something to recover.
+	 */
+	if (pe->trace_entries) {
+		void **ptrs = (void **) pe->stack_trace;
+		int i;
+
+		pr_err("EEH: Frozen PHB#%x-PE#%x detected\n",
+		       pe->phb->global_number, pe->addr);
+
+		/* FIXME: Use the same format as dump_stack() */
+		pr_err("EEH: Call Trace:\n");
+		for (i = 0; i < pe->trace_entries; i++)
+			pr_err("EEH: [%pK] %pS\n", ptrs[i], ptrs[i]);
+
+		pe->trace_entries = 0;
+	}
+
 
 	eeh_pe_update_time_stamp(pe);
 	pe->freeze_count++;
author	Oliver O'Halloran <oohall@gmail.com>	2019-09-03 20:15:56 +1000
committer	Michael Ellerman <mpe@ellerman.id.au>	2019-09-05 14:22:38 +1000
commit	25baf3d81614b0b8ca8958f4d6f111ccaaaad578 (patch)
tree	6c8e859c08e05b14b1c564282563f95b91da2d86 /arch/powerpc/kernel/eeh_driver.c
parent	b104af5a7687060792ca398bb86b033057afce75 (diff)
download	linux-25baf3d81614b0b8ca8958f4d6f111ccaaaad578.tar.bz2