summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMark Rutland <mark.rutland@arm.com>2022-09-30 12:18:44 +0100
committerWill Deacon <will@kernel.org>2022-11-07 16:16:20 +0000
commitfe40ffdb7656d1f9c42dd402740765ff8b418b17 (patch)
treeefd1536a97f9758fd0f88dc74f8529cc58617f56
parent6349a2470d07561bc25e34681985c6ff9c807dfb (diff)
downloadlinux-fe40ffdb7656d1f9c42dd402740765ff8b418b17.tar.bz2
arm_pmu: rework ACPI probing
The current ACPI PMU probing logic tries to associate PMUs with CPUs when the CPU is first brought online, in order to handle late hotplug, though PMUs are only registered during early boot, and so for late hotplugged CPUs this can only associate the CPU with an existing PMU. We tried to be clever and the have the arm_pmu_acpi_cpu_starting() callback allocate a struct arm_pmu when no matching instance is found, in order to avoid duplication of logic. However, as above this doesn't do anything useful for late hotplugged CPUs, and this requires us to allocate memory in an atomic context, which is especially problematic for PREEMPT_RT, as reported by Valentin and Pierre. This patch reworks the probing to detect PMUs for all online CPUs in the arm_pmu_acpi_probe() function, which is more aligned with how DT probing works. The arm_pmu_acpi_cpu_starting() callback only tries to associate CPUs with an existing arm_pmu instance, avoiding the problem of allocating in atomic context. Note that as we didn't previously register PMUs for late-hotplugged CPUs, this change doesn't result in a loss of existing functionality, though we will now warn when we cannot associate a CPU with a PMU. This change allows us to pull the hotplug callback registration into the arm_pmu_acpi_probe() function, as we no longer need the callbacks to be invoked shortly after probing the boot CPUs, and can register it without invoking the calls. For the moment the arm_pmu_acpi_init() initcall remains to register the SPE PMU, though in future this should probably be moved elsewhere (e.g. the arm64 ACPI init code), since this doesn't need to be tied to the regular CPU PMU code. Signed-off-by: Mark Rutland <mark.rutland@arm.com> Reported-by: Valentin Schneider <valentin.schneider@arm.com> Link: https://lore.kernel.org/r/20210810134127.1394269-2-valentin.schneider@arm.com/ Reported-by: Pierre Gondois <pierre.gondois@arm.com> Link: https://lore.kernel.org/linux-arm-kernel/20220912155105.1443303-1-pierre.gondois@arm.com/ Cc: Pierre Gondois <pierre.gondois@arm.com> Cc: Valentin Schneider <vschneid@redhat.com> Cc: Will Deacon <will@kernel.org> Reviewed-and-tested-by: Pierre Gondois <pierre.gondois@arm.com> Link: https://lore.kernel.org/r/20220930111844.1522365-4-mark.rutland@arm.com Signed-off-by: Will Deacon <will@kernel.org>
-rw-r--r--drivers/perf/arm_pmu.c17
-rw-r--r--drivers/perf/arm_pmu_acpi.c95
-rw-r--r--include/linux/perf/arm_pmu.h1
3 files changed, 52 insertions, 61 deletions
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 3f07df5a7e95..82a6d22e8ee2 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -861,16 +861,16 @@ static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu)
&cpu_pmu->node);
}
-static struct arm_pmu *__armpmu_alloc(gfp_t flags)
+struct arm_pmu *armpmu_alloc(void)
{
struct arm_pmu *pmu;
int cpu;
- pmu = kzalloc(sizeof(*pmu), flags);
+ pmu = kzalloc(sizeof(*pmu), GFP_KERNEL);
if (!pmu)
goto out;
- pmu->hw_events = alloc_percpu_gfp(struct pmu_hw_events, flags);
+ pmu->hw_events = alloc_percpu_gfp(struct pmu_hw_events, GFP_KERNEL);
if (!pmu->hw_events) {
pr_info("failed to allocate per-cpu PMU data.\n");
goto out_free_pmu;
@@ -916,17 +916,6 @@ out:
return NULL;
}
-struct arm_pmu *armpmu_alloc(void)
-{
- return __armpmu_alloc(GFP_KERNEL);
-}
-
-struct arm_pmu *armpmu_alloc_atomic(void)
-{
- return __armpmu_alloc(GFP_ATOMIC);
-}
-
-
void armpmu_free(struct arm_pmu *pmu)
{
free_percpu(pmu->hw_events);
diff --git a/drivers/perf/arm_pmu_acpi.c b/drivers/perf/arm_pmu_acpi.c
index 99abea3b2cc9..a085e45b509e 100644
--- a/drivers/perf/arm_pmu_acpi.c
+++ b/drivers/perf/arm_pmu_acpi.c
@@ -13,6 +13,7 @@
#include <linux/percpu.h>
#include <linux/perf/arm_pmu.h>
+#include <asm/cpu.h>
#include <asm/cputype.h>
static DEFINE_PER_CPU(struct arm_pmu *, probed_pmus);
@@ -204,26 +205,6 @@ static struct arm_pmu *arm_pmu_acpi_find_pmu(void)
return NULL;
}
-static struct arm_pmu *arm_pmu_acpi_find_alloc_pmu(void)
-{
- struct arm_pmu *pmu;
-
- pmu = arm_pmu_acpi_find_pmu();
- if (pmu)
- return pmu;
-
- pmu = armpmu_alloc_atomic();
- if (!pmu) {
- pr_warn("Unable to allocate PMU for CPU%d\n",
- smp_processor_id());
- return NULL;
- }
-
- pmu->acpi_cpuid = read_cpuid_id();
-
- return pmu;
-}
-
/*
* Check whether the new IRQ is compatible with those already associated with
* the PMU (e.g. we don't have mismatched PPIs).
@@ -286,26 +267,45 @@ static int arm_pmu_acpi_cpu_starting(unsigned int cpu)
if (per_cpu(probed_pmus, cpu))
return 0;
- pmu = arm_pmu_acpi_find_alloc_pmu();
- if (!pmu)
- return -ENOMEM;
+ pmu = arm_pmu_acpi_find_pmu();
+ if (!pmu) {
+ pr_warn_ratelimited("Unable to associate CPU%d with a PMU\n",
+ cpu);
+ return 0;
+ }
arm_pmu_acpi_associate_pmu_cpu(pmu, cpu);
-
- /*
- * Ideally, we'd probe the PMU here when we find the first matching
- * CPU. We can't do that for several reasons; see the comment in
- * arm_pmu_acpi_init().
- *
- * So for the time being, we're done.
- */
return 0;
}
+static void arm_pmu_acpi_probe_matching_cpus(struct arm_pmu *pmu,
+ unsigned long cpuid)
+{
+ int cpu;
+
+ for_each_online_cpu(cpu) {
+ unsigned long cpu_cpuid = per_cpu(cpu_data, cpu).reg_midr;
+
+ if (cpu_cpuid == cpuid)
+ arm_pmu_acpi_associate_pmu_cpu(pmu, cpu);
+ }
+}
+
int arm_pmu_acpi_probe(armpmu_init_fn init_fn)
{
int pmu_idx = 0;
- int cpu, ret;
+ unsigned int cpu;
+ int ret;
+
+ ret = arm_pmu_acpi_parse_irqs();
+ if (ret)
+ return ret;
+
+ ret = cpuhp_setup_state_nocalls(CPUHP_AP_PERF_ARM_ACPI_STARTING,
+ "perf/arm/pmu_acpi:starting",
+ arm_pmu_acpi_cpu_starting, NULL);
+ if (ret)
+ return ret;
/*
* Initialise and register the set of PMUs which we know about right
@@ -320,13 +320,26 @@ int arm_pmu_acpi_probe(armpmu_init_fn init_fn)
* For the moment, as with the platform/DT case, we need at least one
* of a PMU's CPUs to be online at probe time.
*/
- for_each_possible_cpu(cpu) {
+ for_each_online_cpu(cpu) {
struct arm_pmu *pmu = per_cpu(probed_pmus, cpu);
+ unsigned long cpuid;
char *base_name;
- if (!pmu || pmu->name)
+ /* If we've already probed this CPU, we have nothing to do */
+ if (pmu)
continue;
+ pmu = armpmu_alloc();
+ if (!pmu) {
+ pr_warn("Unable to allocate PMU for CPU%d\n",
+ cpu);
+ }
+
+ cpuid = per_cpu(cpu_data, cpu).reg_midr;
+ pmu->acpi_cpuid = cpuid;
+
+ arm_pmu_acpi_probe_matching_cpus(pmu, cpuid);
+
ret = init_fn(pmu);
if (ret == -ENODEV) {
/* PMU not handled by this driver, or not present */
@@ -351,26 +364,16 @@ int arm_pmu_acpi_probe(armpmu_init_fn init_fn)
}
}
- return 0;
+ return ret;
}
static int arm_pmu_acpi_init(void)
{
- int ret;
-
if (acpi_disabled)
return 0;
arm_spe_acpi_register_device();
- ret = arm_pmu_acpi_parse_irqs();
- if (ret)
- return ret;
-
- ret = cpuhp_setup_state(CPUHP_AP_PERF_ARM_ACPI_STARTING,
- "perf/arm/pmu_acpi:starting",
- arm_pmu_acpi_cpu_starting, NULL);
-
- return ret;
+ return 0;
}
subsys_initcall(arm_pmu_acpi_init)
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index 0356cb6a215d..0c15c5b7f801 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -174,7 +174,6 @@ void kvm_host_pmu_init(struct arm_pmu *pmu);
/* Internal functions only for core arm_pmu code */
struct arm_pmu *armpmu_alloc(void);
-struct arm_pmu *armpmu_alloc_atomic(void);
void armpmu_free(struct arm_pmu *pmu);
int armpmu_register(struct arm_pmu *pmu);
int armpmu_request_irq(int irq, int cpu);