From 0d923820c6db1644c27c2d0a5af8920fc0f8cd81 Mon Sep 17 00:00:00 2001 From: Anju T Sudhakar Date: Wed, 4 Oct 2017 12:20:52 +0530 Subject: powerpc/perf: Fix for core/nest imc call trace on cpuhotplug Nest/core pmu units are enabled only when it is used. A reference count is maintained for the events which uses the nest/core pmu units. Currently in *_imc_counters_release function a WARN() is used for notification of any underflow of ref count. The case where event ref count hit a negative value is, when perf session is started, followed by offlining of all cpus in a given core. i.e. in cpuhotplug offline path ppc_core_imc_cpu_offline() function set the ref->count to zero, if the current cpu which is about to offline is the last cpu in a given core and make an OPAL call to disable the engine in that core. And on perf session termination, perf->destroy (core_imc_counters_release) will first decrement the ref->count for this core and based on the ref->count value an opal call is made to disable the core-imc engine. Now, since cpuhotplug path already clears the ref->count for core and disabled the engine, perf->destroy() decrementing again at event termination make it negative which in turn fires the WARN_ON. The same happens for nest units. Add a check to see if the reference count is alreday zero, before decrementing the count, so that the ref count will not hit a negative value. Signed-off-by: Anju T Sudhakar Reviewed-by: Santosh Sivaraj Signed-off-by: Michael Ellerman --- arch/powerpc/perf/imc-pmu.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'arch/powerpc/perf') diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index 9ccac86f3463..e3a1f65933b5 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -399,6 +399,20 @@ static void nest_imc_counters_release(struct perf_event *event) /* Take the mutex lock for this node and then decrement the reference count */ mutex_lock(&ref->lock); + if (ref->refc == 0) { + /* + * The scenario where this is true is, when perf session is + * started, followed by offlining of all cpus in a given node. + * + * In the cpuhotplug offline path, ppc_nest_imc_cpu_offline() + * function set the ref->count to zero, if the cpu which is + * about to offline is the last cpu in a given node and make + * an OPAL call to disable the engine in that node. + * + */ + mutex_unlock(&ref->lock); + return; + } ref->refc--; if (ref->refc == 0) { rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST, @@ -646,6 +660,20 @@ static void core_imc_counters_release(struct perf_event *event) return; mutex_lock(&ref->lock); + if (ref->refc == 0) { + /* + * The scenario where this is true is, when perf session is + * started, followed by offlining of all cpus in a given core. + * + * In the cpuhotplug offline path, ppc_core_imc_cpu_offline() + * function set the ref->count to zero, if the cpu which is + * about to offline is the last cpu in a given core and make + * an OPAL call to disable the engine in that core. + * + */ + mutex_unlock(&ref->lock); + return; + } ref->refc--; if (ref->refc == 0) { rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE, -- cgit v1.2.3 From cd4f2b30e5ef7d4bde61eb515372d96e8aec1690 Mon Sep 17 00:00:00 2001 From: Anju T Sudhakar Date: Wed, 11 Oct 2017 18:27:39 +0530 Subject: powerpc/perf: Add ___GFP_NOWARN flag to alloc_pages_node() Stack trace output during a stress test: [ 4.310049] Freeing initrd memory: 22592K [ 4.310646] rtas_flash: no firmware flash support [ 4.313341] cpuhp/64: page allocation failure: order:0, mode:0x14480c0(GFP_KERNEL|__GFP_ZERO|__GFP_THISNODE), nodemask=(null) [ 4.313465] cpuhp/64 cpuset=/ mems_allowed=0 [ 4.313521] CPU: 64 PID: 392 Comm: cpuhp/64 Not tainted 4.11.0-39.el7a.ppc64le #1 [ 4.313588] Call Trace: [ 4.313622] [c000000f1fb1b8e0] [c000000000c09388] dump_stack+0xb0/0xf0 (unreliable) [ 4.313694] [c000000f1fb1b920] [c00000000030ef6c] warn_alloc+0x12c/0x1c0 [ 4.313753] [c000000f1fb1b9c0] [c00000000030ff68] __alloc_pages_nodemask+0xea8/0x1000 [ 4.313823] [c000000f1fb1bbb0] [c000000000113a8c] core_imc_mem_init+0xbc/0x1c0 [ 4.313892] [c000000f1fb1bc00] [c000000000113cdc] ppc_core_imc_cpu_online+0x14c/0x170 [ 4.313962] [c000000f1fb1bc90] [c000000000125758] cpuhp_invoke_callback+0x198/0x5d0 [ 4.314031] [c000000f1fb1bd00] [c00000000012782c] cpuhp_thread_fun+0x8c/0x3d0 [ 4.314101] [c000000f1fb1bd60] [c0000000001678d0] smpboot_thread_fn+0x290/0x2a0 [ 4.314169] [c000000f1fb1bdc0] [c00000000015ee78] kthread+0x168/0x1b0 [ 4.314229] [c000000f1fb1be30] [c00000000000b368] ret_from_kernel_thread+0x5c/0x74 [ 4.314313] Mem-Info: [ 4.314356] active_anon:0 inactive_anon:0 isolated_anon:0 core_imc_mem_init() at system boot use alloc_pages_node() to get memory and alloc_pages_node() throws this stack dump when tried to allocate memory from a node which has no memory behind it. Add a ___GFP_NOWARN flag in allocation request as a fix. Signed-off-by: Anju T Sudhakar Reported-by: Michael Ellerman Reported-by: Venkat R.B Signed-off-by: Michael Ellerman --- arch/powerpc/perf/imc-pmu.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/perf') diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index e3a1f65933b5..39a0203fd8a5 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -537,8 +537,8 @@ static int core_imc_mem_init(int cpu, int size) /* We need only vbase for core counters */ mem_info->vbase = page_address(alloc_pages_node(phys_id, - GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE, - get_order(size))); + GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE | + __GFP_NOWARN, get_order(size))); if (!mem_info->vbase) return -ENOMEM; @@ -791,8 +791,8 @@ static int thread_imc_mem_alloc(int cpu_id, int size) * free the memory in cpu offline path. */ local_mem = page_address(alloc_pages_node(phys_id, - GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE, - get_order(size))); + GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE | + __GFP_NOWARN, get_order(size))); if (!local_mem) return -ENOMEM; -- cgit v1.2.3 From 0d8ba16278ec30a262d931875018abee332f926f Mon Sep 17 00:00:00 2001 From: Anju T Sudhakar Date: Fri, 13 Oct 2017 11:29:41 +0530 Subject: powerpc/perf: Fix IMC initialization crash Panic observed with latest firmware, and upstream kernel: NIP init_imc_pmu+0x8c/0xcf0 LR init_imc_pmu+0x2f8/0xcf0 Call Trace: init_imc_pmu+0x2c8/0xcf0 (unreliable) opal_imc_counters_probe+0x300/0x400 platform_drv_probe+0x64/0x110 driver_probe_device+0x3d8/0x580 __driver_attach+0x14c/0x1a0 bus_for_each_dev+0x8c/0xf0 driver_attach+0x34/0x50 bus_add_driver+0x298/0x350 driver_register+0x9c/0x180 __platform_driver_register+0x5c/0x70 opal_imc_driver_init+0x2c/0x40 do_one_initcall+0x64/0x1d0 kernel_init_freeable+0x280/0x374 kernel_init+0x24/0x160 ret_from_kernel_thread+0x5c/0x74 While registering nest imc at init, cpu-hotplug callback nest_pmu_cpumask_init() makes an OPAL call to stop the engine. And if the OPAL call fails, imc_common_cpuhp_mem_free() is invoked to cleanup memory and cpuhotplug setup. But when cleaning up the attribute group, we are dereferencing the attribute element array without checking whether the backing element is not NULL. This causes the kernel panic. Add a check for the backing element prior to dereferencing the attribute element, to handle the failing case gracefully. Signed-off-by: Anju T Sudhakar Reported-by: Pridhiviraj Paidipeddi [mpe: Trim change log] Signed-off-by: Michael Ellerman --- arch/powerpc/perf/imc-pmu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/perf') diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index 39a0203fd8a5..88126245881b 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -1176,7 +1176,8 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr) } /* Only free the attr_groups which are dynamically allocated */ - kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs); + if (pmu_ptr->attr_groups[IMC_EVENT_ATTR]) + kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs); kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]); kfree(pmu_ptr); return; -- cgit v1.2.3 From 0b167f11085a34281349f395d200052b61a7e221 Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Thu, 19 Oct 2017 17:41:33 -0200 Subject: powerpc/perf: Fix IMC allocation routine When setting nr_cpus=1, we observed a crash in IMC code during boot due to a missing allocation: basically, IMC code is taking the number of threads into account in imc_mem_init() and if we manually set nr_cpus for a value that is not multiple of the number of threads per core, an integer division in that function will discard the decimal portion, leading IMC to not allocate one mem_info struct. This causes a NULL pointer dereference later, on is_core_imc_mem_inited(). This patch just rounds that division up, fixing the bug. Signed-off-by: Guilherme G. Piccoli Acked-by: Anju T Sudhakar Signed-off-by: Michael Ellerman --- arch/powerpc/perf/imc-pmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/perf') diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index 88126245881b..92ae5de0bbac 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -1104,7 +1104,7 @@ static int init_nest_pmu_ref(void) static void cleanup_all_core_imc_memory(void) { - int i, nr_cores = num_present_cpus() / threads_per_core; + int i, nr_cores = DIV_ROUND_UP(num_present_cpus(), threads_per_core); struct imc_mem_info *ptr = core_imc_pmu->mem_info; int size = core_imc_pmu->counter_mem_size; @@ -1212,7 +1212,7 @@ static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent, if (!pmu_ptr->pmu.name) return -ENOMEM; - nr_cores = num_present_cpus() / threads_per_core; + nr_cores = DIV_ROUND_UP(num_present_cpus(), threads_per_core); pmu_ptr->mem_info = kcalloc(nr_cores, sizeof(struct imc_mem_info), GFP_KERNEL); -- cgit v1.2.3 From 7ecb37f62fe58e3e4d9b03443b92d213b2c108ce Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Thu, 2 Nov 2017 17:42:26 +0530 Subject: powerpc/perf: Fix core-imc hotplug callback failure during imc initialization Call trace observed during boot: nest_capp0_imc performance monitor hardware support registered nest_capp1_imc performance monitor hardware support registered core_imc memory allocation for cpu 56 failed Unable to handle kernel paging request for data at address 0xffa400010 Faulting instruction address: 0xc000000000bf3294 0:mon> e cpu 0x0: Vector: 300 (Data Access) at [c000000ff38ff8d0] pc: c000000000bf3294: mutex_lock+0x34/0x90 lr: c000000000bf3288: mutex_lock+0x28/0x90 sp: c000000ff38ffb50 msr: 9000000002009033 dar: ffa400010 dsisr: 80000 current = 0xc000000ff383de00 paca = 0xc000000007ae0000 softe: 0 irq_happened: 0x01 pid = 13, comm = cpuhp/0 Linux version 4.11.0-39.el7a.ppc64le (mockbuild@ppc-058.build.eng.bos.redhat.com) (gcc version 4.8.5 20150623 (Red Hat 4.8.5-16) (GCC) ) #1 SMP Tue Oct 3 07:42:44 EDT 2017 0:mon> t [c000000ff38ffb80] c0000000002ddfac perf_pmu_migrate_context+0xac/0x470 [c000000ff38ffc40] c00000000011385c ppc_core_imc_cpu_offline+0x1ac/0x1e0 [c000000ff38ffc90] c000000000125758 cpuhp_invoke_callback+0x198/0x5d0 [c000000ff38ffd00] c00000000012782c cpuhp_thread_fun+0x8c/0x3d0 [c000000ff38ffd60] c0000000001678d0 smpboot_thread_fn+0x290/0x2a0 [c000000ff38ffdc0] c00000000015ee78 kthread+0x168/0x1b0 [c000000ff38ffe30] c00000000000b368 ret_from_kernel_thread+0x5c/0x74 While registering the cpuhoplug callbacks for core-imc, if we fails in the cpuhotplug online path for any random core (either because opal call to initialize the core-imc counters fails or because memory allocation fails for that core), ppc_core_imc_cpu_offline() will get invoked for other cpus who successfully returned from cpuhotplug online path. But in the ppc_core_imc_cpu_offline() path we are trying to migrate the event context, when core-imc counters are not even initialized. Thus creating the above stack dump. Add a check to see if core-imc counters are enabled or not in the cpuhotplug offline path before migrating the context to handle this failing scenario. Fixes: 885dcd709ba9 ("powerpc/perf: Add nest IMC PMU support") Signed-off-by: Madhavan Srinivasan Signed-off-by: Anju T Sudhakar Signed-off-by: Michael Ellerman --- arch/powerpc/perf/imc-pmu.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'arch/powerpc/perf') diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index 92ae5de0bbac..36344117c680 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -607,6 +607,20 @@ static int ppc_core_imc_cpu_offline(unsigned int cpu) if (!cpumask_test_and_clear_cpu(cpu, &core_imc_cpumask)) return 0; + /* + * Check whether core_imc is registered. We could end up here + * if the cpuhotplug callback registration fails. i.e, callback + * invokes the offline path for all sucessfully registered cpus. + * At this stage, core_imc pmu will not be registered and we + * should return here. + * + * We return with a zero since this is not an offline failure. + * And cpuhp_setup_state() returns the actual failure reason + * to the caller, which inturn will call the cleanup routine. + */ + if (!core_imc_pmu->pmu.event_init) + return 0; + /* Find any online cpu in that core except the current "cpu" */ ncpu = cpumask_any_but(cpu_sibling_mask(cpu), cpu); -- cgit v1.2.3