From 6251d38059ae22304ede4f3748af9f795bdbf4fd Mon Sep 17 00:00:00 2001
From: Besar Wicaksono <bwicaksono@nvidia.com>
Date: Wed, 28 Sep 2022 19:28:34 -0500
Subject: ACPI: ARM Performance Monitoring Unit Table (APMT) initial support

ARM Performance Monitoring Unit Table describes the properties of PMU
support in ARM-based system. The APMT table contains a list of nodes,
each represents a PMU in the system that conforms to ARM CoreSight PMU
architecture. The properties of each node include information required
to access the PMU (e.g. MMIO base address, interrupt number) and also
identification. For more detailed information, please refer to the
specification below:
 * APMT: https://developer.arm.com/documentation/den0117/latest
 * ARM Coresight PMU:
        https://developer.arm.com/documentation/ihi0091/latest

The initial support adds the detection of APMT table and generic
infrastructure to create platform devices for ARM CoreSight PMUs.
Similar to IORT the root pointer of APMT is preserved during runtime
and each PMU platform device is given a pointer to the corresponding
APMT node.

Signed-off-by: Besar Wicaksono <bwicaksono@nvidia.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Link: https://lore.kernel.org/r/20220929002834.32664-1-bwicaksono@nvidia.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/acpi/arm64/Kconfig  |   3 +
 drivers/acpi/arm64/Makefile |   1 +
 drivers/acpi/arm64/apmt.c   | 177 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/acpi/bus.c          |   2 +
 4 files changed, 183 insertions(+)
 create mode 100644 drivers/acpi/arm64/apmt.c

(limited to 'drivers')

diff --git a/drivers/acpi/arm64/Kconfig b/drivers/acpi/arm64/Kconfig
index d4a72835f328..b3ed6212244c 100644
--- a/drivers/acpi/arm64/Kconfig
+++ b/drivers/acpi/arm64/Kconfig
@@ -18,3 +18,6 @@ config ACPI_AGDI
 	  reset command.
 
 	  If set, the kernel parses AGDI table and listens for the command.
+
+config ACPI_APMT
+	bool
diff --git a/drivers/acpi/arm64/Makefile b/drivers/acpi/arm64/Makefile
index 7b9e4045659d..e21a9e84e394 100644
--- a/drivers/acpi/arm64/Makefile
+++ b/drivers/acpi/arm64/Makefile
@@ -2,4 +2,5 @@
 obj-$(CONFIG_ACPI_AGDI) 	+= agdi.o
 obj-$(CONFIG_ACPI_IORT) 	+= iort.o
 obj-$(CONFIG_ACPI_GTDT) 	+= gtdt.o
+obj-$(CONFIG_ACPI_APMT) 	+= apmt.o
 obj-y				+= dma.o
diff --git a/drivers/acpi/arm64/apmt.c b/drivers/acpi/arm64/apmt.c
new file mode 100644
index 000000000000..f55167ca51e7
--- /dev/null
+++ b/drivers/acpi/arm64/apmt.c
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM APMT table support.
+ * Design document number: ARM DEN0117.
+ *
+ * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES.
+ *
+ */
+
+#define pr_fmt(fmt)	"ACPI: APMT: " fmt
+
+#include <linux/acpi.h>
+#include <linux/acpi_apmt.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+
+#define DEV_NAME "arm-cs-arch-pmu"
+
+/* There can be up to 3 resources: page 0 and 1 address, and interrupt. */
+#define DEV_MAX_RESOURCE_COUNT 3
+
+/* Root pointer to the mapped APMT table */
+static struct acpi_table_header *apmt_table;
+
+static int __init apmt_init_resources(struct resource *res,
+					      struct acpi_apmt_node *node)
+{
+	int irq, trigger;
+	int num_res = 0;
+
+	res[num_res].start = node->base_address0;
+	res[num_res].end = node->base_address0 + SZ_4K - 1;
+	res[num_res].flags = IORESOURCE_MEM;
+
+	num_res++;
+
+	res[num_res].start = node->base_address1;
+	res[num_res].end = node->base_address1 + SZ_4K - 1;
+	res[num_res].flags = IORESOURCE_MEM;
+
+	num_res++;
+
+	if (node->ovflw_irq != 0) {
+		trigger = (node->ovflw_irq_flags & ACPI_APMT_OVFLW_IRQ_FLAGS_MODE);
+		trigger = (trigger == ACPI_APMT_OVFLW_IRQ_FLAGS_MODE_LEVEL) ?
+			ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE;
+		irq = acpi_register_gsi(NULL, node->ovflw_irq, trigger,
+						ACPI_ACTIVE_HIGH);
+
+		if (irq <= 0) {
+			pr_warn("APMT could not register gsi hwirq %d\n", irq);
+			return num_res;
+		}
+
+		res[num_res].start = irq;
+		res[num_res].end = irq;
+		res[num_res].flags = IORESOURCE_IRQ;
+
+		num_res++;
+	}
+
+	return num_res;
+}
+
+/**
+ * apmt_add_platform_device() - Allocate a platform device for APMT node
+ * @node: Pointer to device ACPI APMT node
+ *
+ * Returns: 0 on success, <0 failure
+ */
+static int __init apmt_add_platform_device(struct acpi_apmt_node *node,
+							struct fwnode_handle *fwnode)
+{
+	struct platform_device *pdev;
+	int ret, count;
+	struct resource res[DEV_MAX_RESOURCE_COUNT];
+
+	pdev = platform_device_alloc(DEV_NAME, PLATFORM_DEVID_AUTO);
+	if (!pdev)
+		return -ENOMEM;
+
+	memset(res, 0, sizeof(res));
+
+	count = apmt_init_resources(res, node);
+
+	ret = platform_device_add_resources(pdev, res, count);
+	if (ret)
+		goto dev_put;
+
+	/*
+	 * Add a copy of APMT node pointer to platform_data to be used to
+	 * retrieve APMT data information.
+	 */
+	ret = platform_device_add_data(pdev, &node, sizeof(node));
+	if (ret)
+		goto dev_put;
+
+	pdev->dev.fwnode = fwnode;
+
+	ret = platform_device_add(pdev);
+
+	if (ret)
+		goto dev_put;
+
+	return 0;
+
+dev_put:
+	platform_device_put(pdev);
+
+	return ret;
+}
+
+static int __init apmt_init_platform_devices(void)
+{
+	struct acpi_apmt_node *apmt_node;
+	struct acpi_table_apmt *apmt;
+	struct fwnode_handle *fwnode;
+	u64 offset, end;
+	int ret;
+
+	/*
+	 * apmt_table and apmt both point to the start of APMT table, but
+	 * have different struct types
+	 */
+	apmt = (struct acpi_table_apmt *)apmt_table;
+	offset = sizeof(*apmt);
+	end = apmt->header.length;
+
+	while (offset < end) {
+		apmt_node = ACPI_ADD_PTR(struct acpi_apmt_node, apmt,
+				 offset);
+
+		fwnode = acpi_alloc_fwnode_static();
+		if (!fwnode)
+			return -ENOMEM;
+
+		ret = apmt_add_platform_device(apmt_node, fwnode);
+		if (ret) {
+			acpi_free_fwnode_static(fwnode);
+			return ret;
+		}
+
+		offset += apmt_node->length;
+	}
+
+	return 0;
+}
+
+void __init acpi_apmt_init(void)
+{
+	acpi_status status;
+	int ret;
+
+	/**
+	 * APMT table nodes will be used at runtime after the apmt init,
+	 * so we don't need to call acpi_put_table() to release
+	 * the APMT table mapping.
+	 */
+	status = acpi_get_table(ACPI_SIG_APMT, 0, &apmt_table);
+
+	if (ACPI_FAILURE(status)) {
+		if (status != AE_NOT_FOUND) {
+			const char *msg = acpi_format_exception(status);
+
+			pr_err("Failed to get APMT table, %s\n", msg);
+		}
+
+		return;
+	}
+
+	ret = apmt_init_platform_devices();
+	if (ret) {
+		pr_err("Failed to initialize APMT platform devices, ret: %d\n", ret);
+		acpi_put_table(apmt_table);
+	}
+}
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index d466c8195314..351208eda9be 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -27,6 +27,7 @@
 #include <linux/dmi.h>
 #endif
 #include <linux/acpi_agdi.h>
+#include <linux/acpi_apmt.h>
 #include <linux/acpi_iort.h>
 #include <linux/acpi_viot.h>
 #include <linux/pci.h>
@@ -1423,6 +1424,7 @@ static int __init acpi_init(void)
 	acpi_setup_sb_notify_handler();
 	acpi_viot_init();
 	acpi_agdi_init();
+	acpi_apmt_init();
 	return 0;
 }
 
-- 
cgit v1.2.3


From 05da178ce0aa152f0592e10fa874054187f3621b Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Wed, 28 Sep 2022 20:21:26 +0100
Subject: ACPI/IORT: Update SMMUv3 DeviceID support

IORT E.e now allows SMMUv3 nodes to describe the DeviceID for MSIs
independently of wired GSIVs, where the previous oddly-restrictive
definition meant that an SMMU without PRI support had to provide a
DeviceID even if it didn't support MSIs either. Support this, with
the usual temporary flag definition while the real one is making
its way through ACPICA.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Reviewed-by: Nicolin Chen <nicolinc@nvidia.com>
Link: https://lore.kernel.org/r/4b3e2ead4f392d1a47a7528da119d57918e5d806.1664392886.git.robin.murphy@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/acpi/arm64/iort.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

(limited to 'drivers')

diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index 8059baf4ef27..38fb84974f35 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -402,6 +402,10 @@ static struct acpi_iort_node *iort_node_get_id(struct acpi_iort_node *node,
 	return NULL;
 }
 
+#ifndef ACPI_IORT_SMMU_V3_DEVICEID_VALID
+#define ACPI_IORT_SMMU_V3_DEVICEID_VALID (1 << 4)
+#endif
+
 static int iort_get_id_mapping_index(struct acpi_iort_node *node)
 {
 	struct acpi_iort_smmu_v3 *smmu;
@@ -418,12 +422,16 @@ static int iort_get_id_mapping_index(struct acpi_iort_node *node)
 
 		smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
 		/*
-		 * ID mapping index is only ignored if all interrupts are
-		 * GSIV based
+		 * Until IORT E.e (node rev. 5), the ID mapping index was
+		 * defined to be valid unless all interrupts are GSIV-based.
 		 */
-		if (smmu->event_gsiv && smmu->pri_gsiv && smmu->gerr_gsiv
-		    && smmu->sync_gsiv)
+		if (node->revision < 5) {
+			if (smmu->event_gsiv && smmu->pri_gsiv &&
+			    smmu->gerr_gsiv && smmu->sync_gsiv)
+				return -EINVAL;
+		} else if (!(smmu->flags & ACPI_IORT_SMMU_V3_DEVICEID_VALID)) {
 			return -EINVAL;
+		}
 
 		if (smmu->id_mapping_index >= node->mapping_count) {
 			pr_err(FW_BUG "[node %p type %d] ID mapping index overflows valid mappings\n",
-- 
cgit v1.2.3


From ad51b5043bb39e0d715e8ad910fa5ac899ebc70b Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 30 Sep 2022 12:18:42 +0100
Subject: arm_pmu: acpi: factor out PMU<->CPU association

A subsequent patch will rework the ACPI probing of PMUs, and we'll need
to associate a CPU with a PMU in two separate paths.

Factor out the association logic into a helper function so that it can
be reused.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Pierre Gondois <pierre.gondois@arm.com>
Cc: Will Deacon <will@kernel.org>
Reviewed-and-tested-by: Pierre Gondois <pierre.gondois@arm.com>
Link: https://lore.kernel.org/r/20220930111844.1522365-2-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm_pmu_acpi.c | 29 +++++++++++++++++------------
 1 file changed, 17 insertions(+), 12 deletions(-)

(limited to 'drivers')

diff --git a/drivers/perf/arm_pmu_acpi.c b/drivers/perf/arm_pmu_acpi.c
index 96ffadd654ff..a52a4aafd629 100644
--- a/drivers/perf/arm_pmu_acpi.c
+++ b/drivers/perf/arm_pmu_acpi.c
@@ -242,6 +242,22 @@ static bool pmu_irq_matches(struct arm_pmu *pmu, int irq)
 	return true;
 }
 
+static void arm_pmu_acpi_associate_pmu_cpu(struct arm_pmu *pmu,
+					   unsigned int cpu)
+{
+	int irq = per_cpu(pmu_irqs, cpu);
+
+	per_cpu(probed_pmus, cpu) = pmu;
+
+	if (pmu_irq_matches(pmu, irq)) {
+		struct pmu_hw_events __percpu *hw_events;
+		hw_events = pmu->hw_events;
+		per_cpu(hw_events->irq, cpu) = irq;
+	}
+
+	cpumask_set_cpu(cpu, &pmu->supported_cpus);
+}
+
 /*
  * This must run before the common arm_pmu hotplug logic, so that we can
  * associate a CPU and its interrupt before the common code tries to manage the
@@ -254,27 +270,16 @@ static bool pmu_irq_matches(struct arm_pmu *pmu, int irq)
 static int arm_pmu_acpi_cpu_starting(unsigned int cpu)
 {
 	struct arm_pmu *pmu;
-	struct pmu_hw_events __percpu *hw_events;
-	int irq;
 
 	/* If we've already probed this CPU, we have nothing to do */
 	if (per_cpu(probed_pmus, cpu))
 		return 0;
 
-	irq = per_cpu(pmu_irqs, cpu);
-
 	pmu = arm_pmu_acpi_find_alloc_pmu();
 	if (!pmu)
 		return -ENOMEM;
 
-	per_cpu(probed_pmus, cpu) = pmu;
-
-	if (pmu_irq_matches(pmu, irq)) {
-		hw_events = pmu->hw_events;
-		per_cpu(hw_events->irq, cpu) = irq;
-	}
-
-	cpumask_set_cpu(cpu, &pmu->supported_cpus);
+	arm_pmu_acpi_associate_pmu_cpu(pmu, cpu);
 
 	/*
 	 * Ideally, we'd probe the PMU here when we find the first matching
-- 
cgit v1.2.3


From 6349a2470d07561bc25e34681985c6ff9c807dfb Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 30 Sep 2022 12:18:43 +0100
Subject: arm_pmu: factor out PMU matching

A subsequent patch will rework the ACPI probing of PMUs, and we'll need
to match a CPU with a known cpuid in two separate paths.

Factor out the matching logic into a helper function so that it can be
reused.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Pierre Gondois <pierre.gondois@arm.com>
Cc: Will Deacon <will@kernel.org>
Reviewed-and-tested-by: Pierre Gondois <pierre.gondois@arm.com>
Link: https://lore.kernel.org/r/20220930111844.1522365-3-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm_pmu_acpi.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/perf/arm_pmu_acpi.c b/drivers/perf/arm_pmu_acpi.c
index a52a4aafd629..99abea3b2cc9 100644
--- a/drivers/perf/arm_pmu_acpi.c
+++ b/drivers/perf/arm_pmu_acpi.c
@@ -187,7 +187,7 @@ out_err:
 	return err;
 }
 
-static struct arm_pmu *arm_pmu_acpi_find_alloc_pmu(void)
+static struct arm_pmu *arm_pmu_acpi_find_pmu(void)
 {
 	unsigned long cpuid = read_cpuid_id();
 	struct arm_pmu *pmu;
@@ -201,6 +201,17 @@ static struct arm_pmu *arm_pmu_acpi_find_alloc_pmu(void)
 		return pmu;
 	}
 
+	return NULL;
+}
+
+static struct arm_pmu *arm_pmu_acpi_find_alloc_pmu(void)
+{
+	struct arm_pmu *pmu;
+
+	pmu = arm_pmu_acpi_find_pmu();
+	if (pmu)
+		return pmu;
+
 	pmu = armpmu_alloc_atomic();
 	if (!pmu) {
 		pr_warn("Unable to allocate PMU for CPU%d\n",
@@ -208,7 +219,7 @@ static struct arm_pmu *arm_pmu_acpi_find_alloc_pmu(void)
 		return NULL;
 	}
 
-	pmu->acpi_cpuid = cpuid;
+	pmu->acpi_cpuid = read_cpuid_id();
 
 	return pmu;
 }
-- 
cgit v1.2.3


From fe40ffdb7656d1f9c42dd402740765ff8b418b17 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 30 Sep 2022 12:18:44 +0100
Subject: arm_pmu: rework ACPI probing

The current ACPI PMU probing logic tries to associate PMUs with CPUs
when the CPU is first brought online, in order to handle late hotplug,
though PMUs are only registered during early boot, and so for late
hotplugged CPUs this can only associate the CPU with an existing PMU.

We tried to be clever and the have the arm_pmu_acpi_cpu_starting()
callback allocate a struct arm_pmu when no matching instance is found,
in order to avoid duplication of logic. However, as above this doesn't
do anything useful for late hotplugged CPUs, and this requires us to
allocate memory in an atomic context, which is especially problematic
for PREEMPT_RT, as reported by Valentin and Pierre.

This patch reworks the probing to detect PMUs for all online CPUs in the
arm_pmu_acpi_probe() function, which is more aligned with how DT probing
works. The arm_pmu_acpi_cpu_starting() callback only tries to associate
CPUs with an existing arm_pmu instance, avoiding the problem of
allocating in atomic context.

Note that as we didn't previously register PMUs for late-hotplugged
CPUs, this change doesn't result in a loss of existing functionality,
though we will now warn when we cannot associate a CPU with a PMU.

This change allows us to pull the hotplug callback registration into the
arm_pmu_acpi_probe() function, as we no longer need the callbacks to be
invoked shortly after probing the boot CPUs, and can register it without
invoking the calls.

For the moment the arm_pmu_acpi_init() initcall remains to register the
SPE PMU, though in future this should probably be moved elsewhere (e.g.
the arm64 ACPI init code), since this doesn't need to be tied to the
regular CPU PMU code.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reported-by: Valentin Schneider <valentin.schneider@arm.com>
Link: https://lore.kernel.org/r/20210810134127.1394269-2-valentin.schneider@arm.com/
Reported-by: Pierre Gondois <pierre.gondois@arm.com>
Link: https://lore.kernel.org/linux-arm-kernel/20220912155105.1443303-1-pierre.gondois@arm.com/
Cc: Pierre Gondois <pierre.gondois@arm.com>
Cc: Valentin Schneider <vschneid@redhat.com>
Cc: Will Deacon <will@kernel.org>
Reviewed-and-tested-by: Pierre Gondois <pierre.gondois@arm.com>
Link: https://lore.kernel.org/r/20220930111844.1522365-4-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm_pmu.c       | 17 ++------
 drivers/perf/arm_pmu_acpi.c  | 95 +++++++++++++++++++++++---------------------
 include/linux/perf/arm_pmu.h |  1 -
 3 files changed, 52 insertions(+), 61 deletions(-)

(limited to 'drivers')

diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 3f07df5a7e95..82a6d22e8ee2 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -861,16 +861,16 @@ static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu)
 					    &cpu_pmu->node);
 }
 
-static struct arm_pmu *__armpmu_alloc(gfp_t flags)
+struct arm_pmu *armpmu_alloc(void)
 {
 	struct arm_pmu *pmu;
 	int cpu;
 
-	pmu = kzalloc(sizeof(*pmu), flags);
+	pmu = kzalloc(sizeof(*pmu), GFP_KERNEL);
 	if (!pmu)
 		goto out;
 
-	pmu->hw_events = alloc_percpu_gfp(struct pmu_hw_events, flags);
+	pmu->hw_events = alloc_percpu_gfp(struct pmu_hw_events, GFP_KERNEL);
 	if (!pmu->hw_events) {
 		pr_info("failed to allocate per-cpu PMU data.\n");
 		goto out_free_pmu;
@@ -916,17 +916,6 @@ out:
 	return NULL;
 }
 
-struct arm_pmu *armpmu_alloc(void)
-{
-	return __armpmu_alloc(GFP_KERNEL);
-}
-
-struct arm_pmu *armpmu_alloc_atomic(void)
-{
-	return __armpmu_alloc(GFP_ATOMIC);
-}
-
-
 void armpmu_free(struct arm_pmu *pmu)
 {
 	free_percpu(pmu->hw_events);
diff --git a/drivers/perf/arm_pmu_acpi.c b/drivers/perf/arm_pmu_acpi.c
index 99abea3b2cc9..a085e45b509e 100644
--- a/drivers/perf/arm_pmu_acpi.c
+++ b/drivers/perf/arm_pmu_acpi.c
@@ -13,6 +13,7 @@
 #include <linux/percpu.h>
 #include <linux/perf/arm_pmu.h>
 
+#include <asm/cpu.h>
 #include <asm/cputype.h>
 
 static DEFINE_PER_CPU(struct arm_pmu *, probed_pmus);
@@ -204,26 +205,6 @@ static struct arm_pmu *arm_pmu_acpi_find_pmu(void)
 	return NULL;
 }
 
-static struct arm_pmu *arm_pmu_acpi_find_alloc_pmu(void)
-{
-	struct arm_pmu *pmu;
-
-	pmu = arm_pmu_acpi_find_pmu();
-	if (pmu)
-		return pmu;
-
-	pmu = armpmu_alloc_atomic();
-	if (!pmu) {
-		pr_warn("Unable to allocate PMU for CPU%d\n",
-			smp_processor_id());
-		return NULL;
-	}
-
-	pmu->acpi_cpuid = read_cpuid_id();
-
-	return pmu;
-}
-
 /*
  * Check whether the new IRQ is compatible with those already associated with
  * the PMU (e.g. we don't have mismatched PPIs).
@@ -286,26 +267,45 @@ static int arm_pmu_acpi_cpu_starting(unsigned int cpu)
 	if (per_cpu(probed_pmus, cpu))
 		return 0;
 
-	pmu = arm_pmu_acpi_find_alloc_pmu();
-	if (!pmu)
-		return -ENOMEM;
+	pmu = arm_pmu_acpi_find_pmu();
+	if (!pmu) {
+		pr_warn_ratelimited("Unable to associate CPU%d with a PMU\n",
+				    cpu);
+		return 0;
+	}
 
 	arm_pmu_acpi_associate_pmu_cpu(pmu, cpu);
-
-	/*
-	 * Ideally, we'd probe the PMU here when we find the first matching
-	 * CPU. We can't do that for several reasons; see the comment in
-	 * arm_pmu_acpi_init().
-	 *
-	 * So for the time being, we're done.
-	 */
 	return 0;
 }
 
+static void arm_pmu_acpi_probe_matching_cpus(struct arm_pmu *pmu,
+					     unsigned long cpuid)
+{
+	int cpu;
+
+	for_each_online_cpu(cpu) {
+		unsigned long cpu_cpuid = per_cpu(cpu_data, cpu).reg_midr;
+
+		if (cpu_cpuid == cpuid)
+			arm_pmu_acpi_associate_pmu_cpu(pmu, cpu);
+	}
+}
+
 int arm_pmu_acpi_probe(armpmu_init_fn init_fn)
 {
 	int pmu_idx = 0;
-	int cpu, ret;
+	unsigned int cpu;
+	int ret;
+
+	ret = arm_pmu_acpi_parse_irqs();
+	if (ret)
+		return ret;
+
+	ret = cpuhp_setup_state_nocalls(CPUHP_AP_PERF_ARM_ACPI_STARTING,
+					"perf/arm/pmu_acpi:starting",
+					arm_pmu_acpi_cpu_starting, NULL);
+	if (ret)
+		return ret;
 
 	/*
 	 * Initialise and register the set of PMUs which we know about right
@@ -320,13 +320,26 @@ int arm_pmu_acpi_probe(armpmu_init_fn init_fn)
 	 * For the moment, as with the platform/DT case, we need at least one
 	 * of a PMU's CPUs to be online at probe time.
 	 */
-	for_each_possible_cpu(cpu) {
+	for_each_online_cpu(cpu) {
 		struct arm_pmu *pmu = per_cpu(probed_pmus, cpu);
+		unsigned long cpuid;
 		char *base_name;
 
-		if (!pmu || pmu->name)
+		/* If we've already probed this CPU, we have nothing to do */
+		if (pmu)
 			continue;
 
+		pmu = armpmu_alloc();
+		if (!pmu) {
+			pr_warn("Unable to allocate PMU for CPU%d\n",
+				cpu);
+		}
+
+		cpuid = per_cpu(cpu_data, cpu).reg_midr;
+		pmu->acpi_cpuid = cpuid;
+
+		arm_pmu_acpi_probe_matching_cpus(pmu, cpuid);
+
 		ret = init_fn(pmu);
 		if (ret == -ENODEV) {
 			/* PMU not handled by this driver, or not present */
@@ -351,26 +364,16 @@ int arm_pmu_acpi_probe(armpmu_init_fn init_fn)
 		}
 	}
 
-	return 0;
+	return ret;
 }
 
 static int arm_pmu_acpi_init(void)
 {
-	int ret;
-
 	if (acpi_disabled)
 		return 0;
 
 	arm_spe_acpi_register_device();
 
-	ret = arm_pmu_acpi_parse_irqs();
-	if (ret)
-		return ret;
-
-	ret = cpuhp_setup_state(CPUHP_AP_PERF_ARM_ACPI_STARTING,
-				"perf/arm/pmu_acpi:starting",
-				arm_pmu_acpi_cpu_starting, NULL);
-
-	return ret;
+	return 0;
 }
 subsys_initcall(arm_pmu_acpi_init)
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index 0356cb6a215d..0c15c5b7f801 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -174,7 +174,6 @@ void kvm_host_pmu_init(struct arm_pmu *pmu);
 
 /* Internal functions only for core arm_pmu code */
 struct arm_pmu *armpmu_alloc(void);
-struct arm_pmu *armpmu_alloc_atomic(void);
 void armpmu_free(struct arm_pmu *pmu);
 int armpmu_register(struct arm_pmu *pmu);
 int armpmu_request_irq(int irq, int cpu);
-- 
cgit v1.2.3


From a8731264e5ce083eb761e5a6a8273db536743e9e Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 8 Nov 2022 09:37:25 +0000
Subject: arm_pmu: acpi: handle allocation failure

One of the failure paths in the arm_pmu ACPI code is missing an early
return, permitting a NULL pointer dereference upon a memory allocation
failure.

Add the missing return.

Fixes: fe40ffdb7656 ("arm_pmu: rework ACPI probing")
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reported-by: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20221108093725.1239563-1-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm_pmu_acpi.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'drivers')

diff --git a/drivers/perf/arm_pmu_acpi.c b/drivers/perf/arm_pmu_acpi.c
index a085e45b509e..90815ad762eb 100644
--- a/drivers/perf/arm_pmu_acpi.c
+++ b/drivers/perf/arm_pmu_acpi.c
@@ -333,6 +333,7 @@ int arm_pmu_acpi_probe(armpmu_init_fn init_fn)
 		if (!pmu) {
 			pr_warn("Unable to allocate PMU for CPU%d\n",
 				cpu);
+			return -ENOMEM;
 		}
 
 		cpuid = per_cpu(cpu_data, cpu).reg_midr;
-- 
cgit v1.2.3


From 68c76ad4a9571a2b603665c85cf8229bcf04982a Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Thu, 27 Oct 2022 17:59:06 +0200
Subject: arm64: unwind: add asynchronous unwind tables to kernel and modules

Enable asynchronous unwind table generation for both the core kernel as
well as modules, and emit the resulting .eh_frame sections as init code
so we can use the unwind directives for code patching at boot or module
load time.

This will be used by dynamic shadow call stack support, which will rely
on code patching rather than compiler codegen to emit the shadow call
stack push and pop instructions.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Sami Tolvanen <samitolvanen@google.com>
Tested-by: Sami Tolvanen <samitolvanen@google.com>
Link: https://lore.kernel.org/r/20221027155908.1940624-2-ardb@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/Kconfig                    |  3 +++
 arch/arm64/Makefile                   |  5 +++++
 arch/arm64/include/asm/module.lds.h   |  8 ++++++++
 arch/arm64/kernel/pi/Makefile         |  1 +
 arch/arm64/kernel/vmlinux.lds.S       | 13 +++++++++++++
 arch/arm64/kvm/hyp/nvhe/Makefile      |  1 +
 drivers/firmware/efi/libstub/Makefile |  1 +
 include/asm-generic/vmlinux.lds.h     |  9 +++++++--
 scripts/module.lds.S                  |  6 ++++++
 9 files changed, 45 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 505c8a1ccbe0..7e3a9cf2193d 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -370,6 +370,9 @@ config KASAN_SHADOW_OFFSET
 	default 0xeffffff800000000 if ARM64_VA_BITS_36 && KASAN_SW_TAGS
 	default 0xffffffffffffffff
 
+config UNWIND_TABLES
+	bool
+
 source "arch/arm64/Kconfig.platforms"
 
 menu "Kernel Features"
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 5e56d26a2239..7868a176993f 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -45,8 +45,13 @@ KBUILD_CFLAGS	+= $(call cc-option,-mabi=lp64)
 KBUILD_AFLAGS	+= $(call cc-option,-mabi=lp64)
 
 # Avoid generating .eh_frame* sections.
+ifneq ($(CONFIG_UNWIND_TABLES),y)
 KBUILD_CFLAGS	+= -fno-asynchronous-unwind-tables -fno-unwind-tables
 KBUILD_AFLAGS	+= -fno-asynchronous-unwind-tables -fno-unwind-tables
+else
+KBUILD_CFLAGS	+= -fasynchronous-unwind-tables
+KBUILD_AFLAGS	+= -fasynchronous-unwind-tables
+endif
 
 ifeq ($(CONFIG_STACKPROTECTOR_PER_TASK),y)
 prepare: stack_protector_prepare
diff --git a/arch/arm64/include/asm/module.lds.h b/arch/arm64/include/asm/module.lds.h
index 094701ec5500..dbba4b7559aa 100644
--- a/arch/arm64/include/asm/module.lds.h
+++ b/arch/arm64/include/asm/module.lds.h
@@ -17,4 +17,12 @@ SECTIONS {
 	 */
 	.text.hot : { *(.text.hot) }
 #endif
+
+#ifdef CONFIG_UNWIND_TABLES
+	/*
+	 * Currently, we only use unwind info at module load time, so we can
+	 * put it into the .init allocation.
+	 */
+	.init.eh_frame : { *(.eh_frame) }
+#endif
 }
diff --git a/arch/arm64/kernel/pi/Makefile b/arch/arm64/kernel/pi/Makefile
index 839291430cb3..4c0ea3cd4ea4 100644
--- a/arch/arm64/kernel/pi/Makefile
+++ b/arch/arm64/kernel/pi/Makefile
@@ -7,6 +7,7 @@ KBUILD_CFLAGS	:= $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) -fpie \
 		   -I$(srctree)/scripts/dtc/libfdt -fno-stack-protector \
 		   -include $(srctree)/include/linux/hidden.h \
 		   -D__DISABLE_EXPORTS -ffreestanding -D__NO_FORTIFY \
+		   -fno-asynchronous-unwind-tables -fno-unwind-tables \
 		   $(call cc-option,-fno-addrsig)
 
 # remove SCS flags from all objects in this directory
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 45131e354e27..4c13dafc98b8 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -121,6 +121,17 @@ jiffies = jiffies_64;
 #define TRAMP_TEXT
 #endif
 
+#ifdef CONFIG_UNWIND_TABLES
+#define UNWIND_DATA_SECTIONS				\
+	.eh_frame : {					\
+		__eh_frame_start = .;			\
+		*(.eh_frame)				\
+		__eh_frame_end = .;			\
+	}
+#else
+#define UNWIND_DATA_SECTIONS
+#endif
+
 /*
  * The size of the PE/COFF section that covers the kernel image, which
  * runs from _stext to _edata, must be a round multiple of the PE/COFF
@@ -231,6 +242,8 @@ SECTIONS
 		__alt_instructions_end = .;
 	}
 
+	UNWIND_DATA_SECTIONS
+
 	. = ALIGN(SEGMENT_ALIGN);
 	__inittext_end = .;
 	__initdata_begin = .;
diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
index be0a2bc3e20d..530347cdebe3 100644
--- a/arch/arm64/kvm/hyp/nvhe/Makefile
+++ b/arch/arm64/kvm/hyp/nvhe/Makefile
@@ -96,6 +96,7 @@ KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS) $(CC_FLAGS_CFI)
 # when profile optimization is applied. gen-hyprel does not support SHT_REL and
 # causes a build failure. Remove profile optimization flags.
 KBUILD_CFLAGS := $(filter-out -fprofile-sample-use=% -fprofile-use=%, $(KBUILD_CFLAGS))
+KBUILD_CFLAGS += -fno-asynchronous-unwind-tables -fno-unwind-tables
 
 # KVM nVHE code is run at a different exception code with a different map, so
 # compiler instrumentation that inserts callbacks or checks into the code may
diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index b1601aad7e1a..1016f0b5311d 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -20,6 +20,7 @@ cflags-$(CONFIG_X86)		+= -m$(BITS) -D__KERNEL__ \
 # disable the stackleak plugin
 cflags-$(CONFIG_ARM64)		:= $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) \
 				   -fpie $(DISABLE_STACKLEAK_PLUGIN) \
+				   -fno-unwind-tables -fno-asynchronous-unwind-tables \
 				   $(call cc-option,-mbranch-protection=none)
 cflags-$(CONFIG_ARM)		:= $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) \
 				   -fno-builtin -fpic \
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index d06ada2341cb..0cca179e5106 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -1027,14 +1027,19 @@
  * keep any .init_array.* sections.
  * https://bugs.llvm.org/show_bug.cgi?id=46478
  */
+#ifdef CONFIG_UNWIND_TABLES
+#define DISCARD_EH_FRAME
+#else
+#define DISCARD_EH_FRAME	*(.eh_frame)
+#endif
 #if defined(CONFIG_GCOV_KERNEL) || defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KCSAN)
 # ifdef CONFIG_CONSTRUCTORS
 #  define SANITIZER_DISCARDS						\
-	*(.eh_frame)
+	DISCARD_EH_FRAME
 # else
 #  define SANITIZER_DISCARDS						\
 	*(.init_array) *(.init_array.*)					\
-	*(.eh_frame)
+	DISCARD_EH_FRAME
 # endif
 #else
 # define SANITIZER_DISCARDS
diff --git a/scripts/module.lds.S b/scripts/module.lds.S
index da4bddd26171..bf5bcf2836d8 100644
--- a/scripts/module.lds.S
+++ b/scripts/module.lds.S
@@ -3,6 +3,12 @@
  * Archs are free to supply their own linker scripts.  ld will
  * combine them automatically.
  */
+#ifdef CONFIG_UNWIND_TABLES
+#define DISCARD_EH_FRAME
+#else
+#define DISCARD_EH_FRAME	*(.eh_frame)
+#endif
+
 SECTIONS {
 	/DISCARD/ : {
 		*(.discard)
-- 
cgit v1.2.3


From c2465f95c4e73af1a6564b00ebc9acc15485edf0 Mon Sep 17 00:00:00 2001
From: Jeremy Linton <jeremy.linton@arm.com>
Date: Wed, 9 Nov 2022 11:47:20 -0600
Subject: ACPI: Enable FPDT on arm64

FPDT provides some boot timing records useful for analyzing
parts of the UEFI boot stack. Given the existing code works
on arm64, and allows reading the values without utilizing
/dev/mem it seems like a good idea to turn it on.

Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
Acked-by: Sudeep Holla <sudeep.holla@arm.com>
Link: https://lore.kernel.org/r/20221109174720.203723-1-jeremy.linton@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 Documentation/arm64/acpi_object_usage.rst | 2 +-
 drivers/acpi/Kconfig                      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/Documentation/arm64/acpi_object_usage.rst b/Documentation/arm64/acpi_object_usage.rst
index 0609da73970b..484ef9676653 100644
--- a/Documentation/arm64/acpi_object_usage.rst
+++ b/Documentation/arm64/acpi_object_usage.rst
@@ -163,7 +163,7 @@ FPDT   Section 5.2.23 (signature == "FPDT")
 
        **Firmware Performance Data Table**
 
-       Optional, not currently supported.
+       Optional, useful for boot performance profiling.
 
 GTDT   Section 5.2.24 (signature == "GTDT")
 
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 473241b5193f..1cc11d2a2a88 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -90,7 +90,7 @@ config ACPI_SPCR_TABLE
 
 config ACPI_FPDT
 	bool "ACPI Firmware Performance Data Table (FPDT) support"
-	depends on X86_64
+	depends on X86_64 || ARM64
 	help
 	  Enable support for the Firmware Performance Data Table (FPDT).
 	  This table provides information on the timing of the system
-- 
cgit v1.2.3


From 39522031798dbb53b169b95d4ab25b53301d4eaf Mon Sep 17 00:00:00 2001
From: Besar Wicaksono <bwicaksono@nvidia.com>
Date: Fri, 11 Nov 2022 17:43:23 -0600
Subject: ACPI: APMT: Fix kerneldoc and indentation

Add missing kerneldoc and fix alignment on one of the arguments of
apmt_add_platform_device function.

Signed-off-by: Besar Wicaksono <bwicaksono@nvidia.com>
Link: https://lore.kernel.org/r/20221111234323.16182-1-bwicaksono@nvidia.com
[will: Fixed up additional indentation issue]
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/acpi/arm64/apmt.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/acpi/arm64/apmt.c b/drivers/acpi/arm64/apmt.c
index f55167ca51e7..8cab69fa5d59 100644
--- a/drivers/acpi/arm64/apmt.c
+++ b/drivers/acpi/arm64/apmt.c
@@ -24,7 +24,7 @@
 static struct acpi_table_header *apmt_table;
 
 static int __init apmt_init_resources(struct resource *res,
-					      struct acpi_apmt_node *node)
+				      struct acpi_apmt_node *node)
 {
 	int irq, trigger;
 	int num_res = 0;
@@ -66,11 +66,12 @@ static int __init apmt_init_resources(struct resource *res,
 /**
  * apmt_add_platform_device() - Allocate a platform device for APMT node
  * @node: Pointer to device ACPI APMT node
+ * @fwnode: fwnode associated with the APMT node
  *
  * Returns: 0 on success, <0 failure
  */
 static int __init apmt_add_platform_device(struct acpi_apmt_node *node,
-							struct fwnode_handle *fwnode)
+					   struct fwnode_handle *fwnode)
 {
 	struct platform_device *pdev;
 	int ret, count;
-- 
cgit v1.2.3


From facafab7611f7b872c6b9eeaff53461ef11f482e Mon Sep 17 00:00:00 2001
From: Yuan Can <yuancan@huawei.com>
Date: Tue, 15 Nov 2022 07:02:06 +0000
Subject: perf: arm_dsu: Fix hotplug callback leak in dsu_pmu_init()

dsu_pmu_init() won't remove the callback added by cpuhp_setup_state_multi()
when platform_driver_register() failed. Remove the callback by
cpuhp_remove_multi_state() in fail path.

Similar to the handling of arm_ccn_init() in commit 26242b330093 ("bus:
arm-ccn: Prevent hotplug callback leak")

Fixes: 7520fa99246d ("perf: ARM DynamIQ Shared Unit PMU support")
Signed-off-by: Yuan Can <yuancan@huawei.com>
Acked-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Link: https://lore.kernel.org/r/20221115070207.32634-2-yuancan@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm_dsu_pmu.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/perf/arm_dsu_pmu.c b/drivers/perf/arm_dsu_pmu.c
index 4a15c86f45ef..fe2abb412c00 100644
--- a/drivers/perf/arm_dsu_pmu.c
+++ b/drivers/perf/arm_dsu_pmu.c
@@ -858,7 +858,11 @@ static int __init dsu_pmu_init(void)
 	if (ret < 0)
 		return ret;
 	dsu_pmu_cpuhp_state = ret;
-	return platform_driver_register(&dsu_pmu_driver);
+	ret = platform_driver_register(&dsu_pmu_driver);
+	if (ret)
+		cpuhp_remove_multi_state(dsu_pmu_cpuhp_state);
+
+	return ret;
 }
 
 static void __exit dsu_pmu_exit(void)
-- 
cgit v1.2.3


From 973ae93d80d9d262f695eb485a1902b74c4b9098 Mon Sep 17 00:00:00 2001
From: Yuan Can <yuancan@huawei.com>
Date: Tue, 15 Nov 2022 07:02:07 +0000
Subject: drivers: perf: marvell_cn10k: Fix hotplug callback leak in
 tad_pmu_init()

tad_pmu_init() won't remove the callback added by cpuhp_setup_state_multi()
when platform_driver_register() failed. Remove the callback by
cpuhp_remove_multi_state() in fail path.

Similar to the handling of arm_ccn_init() in commit 26242b330093 ("bus:
arm-ccn: Prevent hotplug callback leak")

Fixes: 036a7584bede ("drivers: perf: Add LLC-TAD perf counter support")
Signed-off-by: Yuan Can <yuancan@huawei.com>
Link: https://lore.kernel.org/r/20221115070207.32634-3-yuancan@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/marvell_cn10k_tad_pmu.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/perf/marvell_cn10k_tad_pmu.c b/drivers/perf/marvell_cn10k_tad_pmu.c
index 69c3050a4348..a1166afb3702 100644
--- a/drivers/perf/marvell_cn10k_tad_pmu.c
+++ b/drivers/perf/marvell_cn10k_tad_pmu.c
@@ -408,7 +408,11 @@ static int __init tad_pmu_init(void)
 	if (ret < 0)
 		return ret;
 	tad_pmu_cpuhp_state = ret;
-	return platform_driver_register(&tad_pmu_driver);
+	ret = platform_driver_register(&tad_pmu_driver);
+	if (ret)
+		cpuhp_remove_multi_state(tad_pmu_cpuhp_state);
+
+	return ret;
 }
 
 static void __exit tad_pmu_exit(void)
-- 
cgit v1.2.3


From d9f564c966e63925aac4ba273a9319d7fb6f4b4e Mon Sep 17 00:00:00 2001
From: Shang XiaoJing <shangxiaojing@huawei.com>
Date: Tue, 15 Nov 2022 19:55:39 +0800
Subject: perf/arm_dmc620: Fix hotplug callback leak in dmc620_pmu_init()

dmc620_pmu_init() won't remove the callback added by
cpuhp_setup_state_multi() when platform_driver_register() failed. Remove
the callback by cpuhp_remove_multi_state() in fail path.

Similar to the handling of arm_ccn_init() in commit 26242b330093 ("bus:
arm-ccn: Prevent hotplug callback leak")

Fixes: 53c218da220c ("driver/perf: Add PMU driver for the ARM DMC-620 memory controller")
Signed-off-by: Shang XiaoJing <shangxiaojing@huawei.com>
Reviewed-by: Punit Agrawal <punit.agrawal@bytedance.com>
Link: https://lore.kernel.org/r/20221115115540.6245-2-shangxiaojing@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm_dmc620_pmu.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c
index 280a6ae3e27c..54aa4658fb36 100644
--- a/drivers/perf/arm_dmc620_pmu.c
+++ b/drivers/perf/arm_dmc620_pmu.c
@@ -725,6 +725,8 @@ static struct platform_driver dmc620_pmu_driver = {
 
 static int __init dmc620_pmu_init(void)
 {
+	int ret;
+
 	cpuhp_state_num = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
 				      DMC620_DRVNAME,
 				      NULL,
@@ -732,7 +734,11 @@ static int __init dmc620_pmu_init(void)
 	if (cpuhp_state_num < 0)
 		return cpuhp_state_num;
 
-	return platform_driver_register(&dmc620_pmu_driver);
+	ret = platform_driver_register(&dmc620_pmu_driver);
+	if (ret)
+		cpuhp_remove_multi_state(cpuhp_state_num);
+
+	return ret;
 }
 
 static void __exit dmc620_pmu_exit(void)
-- 
cgit v1.2.3


From 6f2d566b46436a50a80d6445e82879686b89588c Mon Sep 17 00:00:00 2001
From: Shang XiaoJing <shangxiaojing@huawei.com>
Date: Tue, 15 Nov 2022 19:55:40 +0800
Subject: perf/smmuv3: Fix hotplug callback leak in arm_smmu_pmu_init()

arm_smmu_pmu_init() won't remove the callback added by
cpuhp_setup_state_multi() when platform_driver_register() failed. Remove
the callback by cpuhp_remove_multi_state() in fail path.

Similar to the handling of arm_ccn_init() in commit 26242b330093 ("bus:
arm-ccn: Prevent hotplug callback leak")

Fixes: 7d839b4b9e00 ("perf/smmuv3: Add arm64 smmuv3 pmu driver")
Signed-off-by: Shang XiaoJing <shangxiaojing@huawei.com>
Reviewed-by: Punit Agrawal <punit.agrawal@bytedance.com>
Link: https://lore.kernel.org/r/20221115115540.6245-3-shangxiaojing@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm_smmuv3_pmu.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c
index 00d4c45a8017..25a269d431e4 100644
--- a/drivers/perf/arm_smmuv3_pmu.c
+++ b/drivers/perf/arm_smmuv3_pmu.c
@@ -959,6 +959,8 @@ static struct platform_driver smmu_pmu_driver = {
 
 static int __init arm_smmu_pmu_init(void)
 {
+	int ret;
+
 	cpuhp_state_num = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
 						  "perf/arm/pmcg:online",
 						  NULL,
@@ -966,7 +968,11 @@ static int __init arm_smmu_pmu_init(void)
 	if (cpuhp_state_num < 0)
 		return cpuhp_state_num;
 
-	return platform_driver_register(&smmu_pmu_driver);
+	ret = platform_driver_register(&smmu_pmu_driver);
+	if (ret)
+		cpuhp_remove_multi_state(cpuhp_state_num);
+
+	return ret;
 }
 module_init(arm_smmu_pmu_init);
 
-- 
cgit v1.2.3


From e37dfd65731dc4f001fa7dfa7f705e6840017d5a Mon Sep 17 00:00:00 2001
From: Besar Wicaksono <bwicaksono@nvidia.com>
Date: Fri, 11 Nov 2022 16:23:28 -0600
Subject: perf: arm_cspmu: Add support for ARM CoreSight PMU driver

Add support for ARM CoreSight PMU driver framework and interfaces.
The driver provides generic implementation to operate uncore PMU based
on ARM CoreSight PMU architecture. The driver also provides interface
to get vendor/implementation specific information, for example event
attributes and formating.

The specification used in this implementation can be found below:
 * ACPI Arm Performance Monitoring Unit table:
        https://developer.arm.com/documentation/den0117/latest
 * ARM Coresight PMU architecture:
        https://developer.arm.com/documentation/ihi0091/latest

Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Besar Wicaksono <bwicaksono@nvidia.com>
Link: https://lore.kernel.org/r/20221111222330.48602-2-bwicaksono@nvidia.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/Kconfig               |    2 +
 drivers/perf/Makefile              |    1 +
 drivers/perf/arm_cspmu/Kconfig     |   13 +
 drivers/perf/arm_cspmu/Makefile    |    6 +
 drivers/perf/arm_cspmu/arm_cspmu.c | 1292 ++++++++++++++++++++++++++++++++++++
 drivers/perf/arm_cspmu/arm_cspmu.h |  151 +++++
 6 files changed, 1465 insertions(+)
 create mode 100644 drivers/perf/arm_cspmu/Kconfig
 create mode 100644 drivers/perf/arm_cspmu/Makefile
 create mode 100644 drivers/perf/arm_cspmu/arm_cspmu.c
 create mode 100644 drivers/perf/arm_cspmu/arm_cspmu.h

(limited to 'drivers')

diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index 341010f20b77..fa87dedf8c92 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -199,4 +199,6 @@ config MARVELL_CN10K_DDR_PMU
 	  Enable perf support for Marvell DDR Performance monitoring
 	  event on CN10K platform.
 
+source "drivers/perf/arm_cspmu/Kconfig"
+
 endmenu
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index 050d04ee19dd..35bb0d979a70 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -21,3 +21,4 @@ obj-$(CONFIG_MARVELL_CN10K_TAD_PMU) += marvell_cn10k_tad_pmu.o
 obj-$(CONFIG_MARVELL_CN10K_DDR_PMU) += marvell_cn10k_ddr_pmu.o
 obj-$(CONFIG_APPLE_M1_CPU_PMU) += apple_m1_cpu_pmu.o
 obj-$(CONFIG_ALIBABA_UNCORE_DRW_PMU) += alibaba_uncore_drw_pmu.o
+obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += arm_cspmu/
diff --git a/drivers/perf/arm_cspmu/Kconfig b/drivers/perf/arm_cspmu/Kconfig
new file mode 100644
index 000000000000..058223bef661
--- /dev/null
+++ b/drivers/perf/arm_cspmu/Kconfig
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+config ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
+	tristate "ARM Coresight Architecture PMU"
+	depends on ACPI
+	depends on ACPI_APMT || COMPILE_TEST
+	help
+	  Provides support for performance monitoring unit (PMU) devices
+	  based on ARM CoreSight PMU architecture. Note that this PMU
+	  architecture does not have relationship with the ARM CoreSight
+	  Self-Hosted Tracing.
diff --git a/drivers/perf/arm_cspmu/Makefile b/drivers/perf/arm_cspmu/Makefile
new file mode 100644
index 000000000000..9bc76de4002d
--- /dev/null
+++ b/drivers/perf/arm_cspmu/Makefile
@@ -0,0 +1,6 @@
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += \
+	arm_cspmu.o
diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c
new file mode 100644
index 000000000000..e8f57aa9f047
--- /dev/null
+++ b/drivers/perf/arm_cspmu/arm_cspmu.c
@@ -0,0 +1,1292 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM CoreSight Architecture PMU driver.
+ *
+ * This driver adds support for uncore PMU based on ARM CoreSight Performance
+ * Monitoring Unit Architecture. The PMU is accessible via MMIO registers and
+ * like other uncore PMUs, it does not support process specific events and
+ * cannot be used in sampling mode.
+ *
+ * This code is based on other uncore PMUs like ARM DSU PMU. It provides a
+ * generic implementation to operate the PMU according to CoreSight PMU
+ * architecture and ACPI ARM PMU table (APMT) documents below:
+ *   - ARM CoreSight PMU architecture document number: ARM IHI 0091 A.a-00bet0.
+ *   - APMT document number: ARM DEN0117.
+ *
+ * The user should refer to the vendor technical documentation to get details
+ * about the supported events.
+ *
+ * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ *
+ */
+
+#include <linux/acpi.h>
+#include <linux/cacheinfo.h>
+#include <linux/ctype.h>
+#include <linux/interrupt.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/module.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <acpi/processor.h>
+
+#include "arm_cspmu.h"
+
+#define PMUNAME "arm_cspmu"
+#define DRVNAME "arm-cs-arch-pmu"
+
+#define ARM_CSPMU_CPUMASK_ATTR(_name, _config)			\
+	ARM_CSPMU_EXT_ATTR(_name, arm_cspmu_cpumask_show,	\
+				(unsigned long)_config)
+
+/*
+ * CoreSight PMU Arch register offsets.
+ */
+#define PMEVCNTR_LO					0x0
+#define PMEVCNTR_HI					0x4
+#define PMEVTYPER					0x400
+#define PMCCFILTR					0x47C
+#define PMEVFILTR					0xA00
+#define PMCNTENSET					0xC00
+#define PMCNTENCLR					0xC20
+#define PMINTENSET					0xC40
+#define PMINTENCLR					0xC60
+#define PMOVSCLR					0xC80
+#define PMOVSSET					0xCC0
+#define PMCFGR						0xE00
+#define PMCR						0xE04
+#define PMIIDR						0xE08
+
+/* PMCFGR register field */
+#define PMCFGR_NCG					GENMASK(31, 28)
+#define PMCFGR_HDBG					BIT(24)
+#define PMCFGR_TRO					BIT(23)
+#define PMCFGR_SS					BIT(22)
+#define PMCFGR_FZO					BIT(21)
+#define PMCFGR_MSI					BIT(20)
+#define PMCFGR_UEN					BIT(19)
+#define PMCFGR_NA					BIT(17)
+#define PMCFGR_EX					BIT(16)
+#define PMCFGR_CCD					BIT(15)
+#define PMCFGR_CC					BIT(14)
+#define PMCFGR_SIZE					GENMASK(13, 8)
+#define PMCFGR_N					GENMASK(7, 0)
+
+/* PMCR register field */
+#define PMCR_TRO					BIT(11)
+#define PMCR_HDBG					BIT(10)
+#define PMCR_FZO					BIT(9)
+#define PMCR_NA						BIT(8)
+#define PMCR_DP						BIT(5)
+#define PMCR_X						BIT(4)
+#define PMCR_D						BIT(3)
+#define PMCR_C						BIT(2)
+#define PMCR_P						BIT(1)
+#define PMCR_E						BIT(0)
+
+/* Each SET/CLR register supports up to 32 counters. */
+#define ARM_CSPMU_SET_CLR_COUNTER_SHIFT		5
+#define ARM_CSPMU_SET_CLR_COUNTER_NUM		\
+	(1 << ARM_CSPMU_SET_CLR_COUNTER_SHIFT)
+
+/* Convert counter idx into SET/CLR register number. */
+#define COUNTER_TO_SET_CLR_ID(idx)			\
+	(idx >> ARM_CSPMU_SET_CLR_COUNTER_SHIFT)
+
+/* Convert counter idx into SET/CLR register bit. */
+#define COUNTER_TO_SET_CLR_BIT(idx)			\
+	(idx & (ARM_CSPMU_SET_CLR_COUNTER_NUM - 1))
+
+#define ARM_CSPMU_ACTIVE_CPU_MASK		0x0
+#define ARM_CSPMU_ASSOCIATED_CPU_MASK		0x1
+
+/* Check if field f in flags is set with value v */
+#define CHECK_APMT_FLAG(flags, f, v) \
+	((flags & (ACPI_APMT_FLAGS_ ## f)) == (ACPI_APMT_FLAGS_ ## f ## _ ## v))
+
+/* Check and use default if implementer doesn't provide attribute callback */
+#define CHECK_DEFAULT_IMPL_OPS(ops, callback)			\
+	do {							\
+		if (!ops->callback)				\
+			ops->callback = arm_cspmu_ ## callback;	\
+	} while (0)
+
+/*
+ * Maximum poll count for reading counter value using high-low-high sequence.
+ */
+#define HILOHI_MAX_POLL	1000
+
+static unsigned long arm_cspmu_cpuhp_state;
+
+/*
+ * In CoreSight PMU architecture, all of the MMIO registers are 32-bit except
+ * counter register. The counter register can be implemented as 32-bit or 64-bit
+ * register depending on the value of PMCFGR.SIZE field. For 64-bit access,
+ * single-copy 64-bit atomic support is implementation defined. APMT node flag
+ * is used to identify if the PMU supports 64-bit single copy atomic. If 64-bit
+ * single copy atomic is not supported, the driver treats the register as a pair
+ * of 32-bit register.
+ */
+
+/*
+ * Read 64-bit register as a pair of 32-bit registers using hi-lo-hi sequence.
+ */
+static u64 read_reg64_hilohi(const void __iomem *addr, u32 max_poll_count)
+{
+	u32 val_lo, val_hi;
+	u64 val;
+
+	/* Use high-low-high sequence to avoid tearing */
+	do {
+		if (max_poll_count-- == 0) {
+			pr_err("ARM CSPMU: timeout hi-low-high sequence\n");
+			return 0;
+		}
+
+		val_hi = readl(addr + 4);
+		val_lo = readl(addr);
+	} while (val_hi != readl(addr + 4));
+
+	val = (((u64)val_hi << 32) | val_lo);
+
+	return val;
+}
+
+/* Check if PMU supports 64-bit single copy atomic. */
+static inline bool supports_64bit_atomics(const struct arm_cspmu *cspmu)
+{
+	return CHECK_APMT_FLAG(cspmu->apmt_node->flags, ATOMIC, SUPP);
+}
+
+/* Check if cycle counter is supported. */
+static inline bool supports_cycle_counter(const struct arm_cspmu *cspmu)
+{
+	return (cspmu->pmcfgr & PMCFGR_CC);
+}
+
+/* Get counter size, which is (PMCFGR_SIZE + 1). */
+static inline u32 counter_size(const struct arm_cspmu *cspmu)
+{
+	return FIELD_GET(PMCFGR_SIZE, cspmu->pmcfgr) + 1;
+}
+
+/* Get counter mask. */
+static inline u64 counter_mask(const struct arm_cspmu *cspmu)
+{
+	return GENMASK_ULL(counter_size(cspmu) - 1, 0);
+}
+
+/* Check if counter is implemented as 64-bit register. */
+static inline bool use_64b_counter_reg(const struct arm_cspmu *cspmu)
+{
+	return (counter_size(cspmu) > 32);
+}
+
+ssize_t arm_cspmu_sysfs_event_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct dev_ext_attribute *eattr =
+		container_of(attr, struct dev_ext_attribute, attr);
+	return sysfs_emit(buf, "event=0x%llx\n",
+			  (unsigned long long)eattr->var);
+}
+EXPORT_SYMBOL_GPL(arm_cspmu_sysfs_event_show);
+
+/* Default event list. */
+static struct attribute *arm_cspmu_event_attrs[] = {
+	ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
+	NULL,
+};
+
+static struct attribute **
+arm_cspmu_get_event_attrs(const struct arm_cspmu *cspmu)
+{
+	struct attribute **attrs;
+
+	attrs = devm_kmemdup(cspmu->dev, arm_cspmu_event_attrs,
+		sizeof(arm_cspmu_event_attrs), GFP_KERNEL);
+
+	return attrs;
+}
+
+static umode_t
+arm_cspmu_event_attr_is_visible(struct kobject *kobj,
+				struct attribute *attr, int unused)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct arm_cspmu *cspmu = to_arm_cspmu(dev_get_drvdata(dev));
+	struct perf_pmu_events_attr *eattr;
+
+	eattr = container_of(attr, typeof(*eattr), attr.attr);
+
+	/* Hide cycle event if not supported */
+	if (!supports_cycle_counter(cspmu) &&
+	    eattr->id == ARM_CSPMU_EVT_CYCLES_DEFAULT)
+		return 0;
+
+	return attr->mode;
+}
+
+ssize_t arm_cspmu_sysfs_format_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	struct dev_ext_attribute *eattr =
+		container_of(attr, struct dev_ext_attribute, attr);
+	return sysfs_emit(buf, "%s\n", (char *)eattr->var);
+}
+EXPORT_SYMBOL_GPL(arm_cspmu_sysfs_format_show);
+
+static struct attribute *arm_cspmu_format_attrs[] = {
+	ARM_CSPMU_FORMAT_EVENT_ATTR,
+	ARM_CSPMU_FORMAT_FILTER_ATTR,
+	NULL,
+};
+
+static struct attribute **
+arm_cspmu_get_format_attrs(const struct arm_cspmu *cspmu)
+{
+	struct attribute **attrs;
+
+	attrs = devm_kmemdup(cspmu->dev, arm_cspmu_format_attrs,
+		sizeof(arm_cspmu_format_attrs), GFP_KERNEL);
+
+	return attrs;
+}
+
+static u32 arm_cspmu_event_type(const struct perf_event *event)
+{
+	return event->attr.config & ARM_CSPMU_EVENT_MASK;
+}
+
+static bool arm_cspmu_is_cycle_counter_event(const struct perf_event *event)
+{
+	return (event->attr.config == ARM_CSPMU_EVT_CYCLES_DEFAULT);
+}
+
+static u32 arm_cspmu_event_filter(const struct perf_event *event)
+{
+	return event->attr.config1 & ARM_CSPMU_FILTER_MASK;
+}
+
+static ssize_t arm_cspmu_identifier_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *page)
+{
+	struct arm_cspmu *cspmu = to_arm_cspmu(dev_get_drvdata(dev));
+
+	return sysfs_emit(page, "%s\n", cspmu->identifier);
+}
+
+static struct device_attribute arm_cspmu_identifier_attr =
+	__ATTR(identifier, 0444, arm_cspmu_identifier_show, NULL);
+
+static struct attribute *arm_cspmu_identifier_attrs[] = {
+	&arm_cspmu_identifier_attr.attr,
+	NULL,
+};
+
+static struct attribute_group arm_cspmu_identifier_attr_group = {
+	.attrs = arm_cspmu_identifier_attrs,
+};
+
+static const char *arm_cspmu_get_identifier(const struct arm_cspmu *cspmu)
+{
+	const char *identifier =
+		devm_kasprintf(cspmu->dev, GFP_KERNEL, "%x",
+			       cspmu->impl.pmiidr);
+	return identifier;
+}
+
+static const char *arm_cspmu_type_str[ACPI_APMT_NODE_TYPE_COUNT] = {
+	"mc",
+	"smmu",
+	"pcie",
+	"acpi",
+	"cache",
+};
+
+static const char *arm_cspmu_get_name(const struct arm_cspmu *cspmu)
+{
+	struct device *dev;
+	struct acpi_apmt_node *apmt_node;
+	u8 pmu_type;
+	char *name;
+	char acpi_hid_string[ACPI_ID_LEN] = { 0 };
+	static atomic_t pmu_idx[ACPI_APMT_NODE_TYPE_COUNT] = { 0 };
+
+	dev = cspmu->dev;
+	apmt_node = cspmu->apmt_node;
+	pmu_type = apmt_node->type;
+
+	if (pmu_type >= ACPI_APMT_NODE_TYPE_COUNT) {
+		dev_err(dev, "unsupported PMU type-%u\n", pmu_type);
+		return NULL;
+	}
+
+	if (pmu_type == ACPI_APMT_NODE_TYPE_ACPI) {
+		memcpy(acpi_hid_string,
+			&apmt_node->inst_primary,
+			sizeof(apmt_node->inst_primary));
+		name = devm_kasprintf(dev, GFP_KERNEL, "%s_%s_%s_%u", PMUNAME,
+				      arm_cspmu_type_str[pmu_type],
+				      acpi_hid_string,
+				      apmt_node->inst_secondary);
+	} else {
+		name = devm_kasprintf(dev, GFP_KERNEL, "%s_%s_%d", PMUNAME,
+				      arm_cspmu_type_str[pmu_type],
+				      atomic_fetch_inc(&pmu_idx[pmu_type]));
+	}
+
+	return name;
+}
+
+static ssize_t arm_cspmu_cpumask_show(struct device *dev,
+				      struct device_attribute *attr,
+				      char *buf)
+{
+	struct pmu *pmu = dev_get_drvdata(dev);
+	struct arm_cspmu *cspmu = to_arm_cspmu(pmu);
+	struct dev_ext_attribute *eattr =
+		container_of(attr, struct dev_ext_attribute, attr);
+	unsigned long mask_id = (unsigned long)eattr->var;
+	const cpumask_t *cpumask;
+
+	switch (mask_id) {
+	case ARM_CSPMU_ACTIVE_CPU_MASK:
+		cpumask = &cspmu->active_cpu;
+		break;
+	case ARM_CSPMU_ASSOCIATED_CPU_MASK:
+		cpumask = &cspmu->associated_cpus;
+		break;
+	default:
+		return 0;
+	}
+	return cpumap_print_to_pagebuf(true, buf, cpumask);
+}
+
+static struct attribute *arm_cspmu_cpumask_attrs[] = {
+	ARM_CSPMU_CPUMASK_ATTR(cpumask, ARM_CSPMU_ACTIVE_CPU_MASK),
+	ARM_CSPMU_CPUMASK_ATTR(associated_cpus, ARM_CSPMU_ASSOCIATED_CPU_MASK),
+	NULL,
+};
+
+static struct attribute_group arm_cspmu_cpumask_attr_group = {
+	.attrs = arm_cspmu_cpumask_attrs,
+};
+
+struct impl_match {
+	u32 pmiidr;
+	u32 mask;
+	int (*impl_init_ops)(struct arm_cspmu *cspmu);
+};
+
+static const struct impl_match impl_match[] = {
+	{}
+};
+
+static int arm_cspmu_init_impl_ops(struct arm_cspmu *cspmu)
+{
+	int ret;
+	struct acpi_apmt_node *apmt_node = cspmu->apmt_node;
+	struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
+	const struct impl_match *match = impl_match;
+
+	/*
+	 * Get PMU implementer and product id from APMT node.
+	 * If APMT node doesn't have implementer/product id, try get it
+	 * from PMIIDR.
+	 */
+	cspmu->impl.pmiidr =
+		(apmt_node->impl_id) ? apmt_node->impl_id :
+				       readl(cspmu->base0 + PMIIDR);
+
+	/* Find implementer specific attribute ops. */
+	for (; match->pmiidr; match++) {
+		const u32 mask = match->mask;
+
+		if ((match->pmiidr & mask) == (cspmu->impl.pmiidr & mask)) {
+			ret = match->impl_init_ops(cspmu);
+			if (ret)
+				return ret;
+
+			break;
+		}
+	}
+
+	/* Use default callbacks if implementer doesn't provide one. */
+	CHECK_DEFAULT_IMPL_OPS(impl_ops, get_event_attrs);
+	CHECK_DEFAULT_IMPL_OPS(impl_ops, get_format_attrs);
+	CHECK_DEFAULT_IMPL_OPS(impl_ops, get_identifier);
+	CHECK_DEFAULT_IMPL_OPS(impl_ops, get_name);
+	CHECK_DEFAULT_IMPL_OPS(impl_ops, is_cycle_counter_event);
+	CHECK_DEFAULT_IMPL_OPS(impl_ops, event_type);
+	CHECK_DEFAULT_IMPL_OPS(impl_ops, event_filter);
+	CHECK_DEFAULT_IMPL_OPS(impl_ops, event_attr_is_visible);
+
+	return 0;
+}
+
+static struct attribute_group *
+arm_cspmu_alloc_event_attr_group(struct arm_cspmu *cspmu)
+{
+	struct attribute_group *event_group;
+	struct device *dev = cspmu->dev;
+	const struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
+
+	event_group =
+		devm_kzalloc(dev, sizeof(struct attribute_group), GFP_KERNEL);
+	if (!event_group)
+		return NULL;
+
+	event_group->name = "events";
+	event_group->is_visible = impl_ops->event_attr_is_visible;
+	event_group->attrs = impl_ops->get_event_attrs(cspmu);
+
+	if (!event_group->attrs)
+		return NULL;
+
+	return event_group;
+}
+
+static struct attribute_group *
+arm_cspmu_alloc_format_attr_group(struct arm_cspmu *cspmu)
+{
+	struct attribute_group *format_group;
+	struct device *dev = cspmu->dev;
+
+	format_group =
+		devm_kzalloc(dev, sizeof(struct attribute_group), GFP_KERNEL);
+	if (!format_group)
+		return NULL;
+
+	format_group->name = "format";
+	format_group->attrs = cspmu->impl.ops.get_format_attrs(cspmu);
+
+	if (!format_group->attrs)
+		return NULL;
+
+	return format_group;
+}
+
+static struct attribute_group **
+arm_cspmu_alloc_attr_group(struct arm_cspmu *cspmu)
+{
+	struct attribute_group **attr_groups = NULL;
+	struct device *dev = cspmu->dev;
+	const struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
+	int ret;
+
+	ret = arm_cspmu_init_impl_ops(cspmu);
+	if (ret)
+		return NULL;
+
+	cspmu->identifier = impl_ops->get_identifier(cspmu);
+	cspmu->name = impl_ops->get_name(cspmu);
+
+	if (!cspmu->identifier || !cspmu->name)
+		return NULL;
+
+	attr_groups = devm_kcalloc(dev, 5, sizeof(struct attribute_group *),
+				   GFP_KERNEL);
+	if (!attr_groups)
+		return NULL;
+
+	attr_groups[0] = arm_cspmu_alloc_event_attr_group(cspmu);
+	attr_groups[1] = arm_cspmu_alloc_format_attr_group(cspmu);
+	attr_groups[2] = &arm_cspmu_identifier_attr_group;
+	attr_groups[3] = &arm_cspmu_cpumask_attr_group;
+
+	if (!attr_groups[0] || !attr_groups[1])
+		return NULL;
+
+	return attr_groups;
+}
+
+static inline void arm_cspmu_reset_counters(struct arm_cspmu *cspmu)
+{
+	u32 pmcr = 0;
+
+	pmcr |= PMCR_P;
+	pmcr |= PMCR_C;
+	writel(pmcr, cspmu->base0 + PMCR);
+}
+
+static inline void arm_cspmu_start_counters(struct arm_cspmu *cspmu)
+{
+	writel(PMCR_E, cspmu->base0 + PMCR);
+}
+
+static inline void arm_cspmu_stop_counters(struct arm_cspmu *cspmu)
+{
+	writel(0, cspmu->base0 + PMCR);
+}
+
+static void arm_cspmu_enable(struct pmu *pmu)
+{
+	bool disabled;
+	struct arm_cspmu *cspmu = to_arm_cspmu(pmu);
+
+	disabled = bitmap_empty(cspmu->hw_events.used_ctrs,
+				cspmu->num_logical_ctrs);
+
+	if (disabled)
+		return;
+
+	arm_cspmu_start_counters(cspmu);
+}
+
+static void arm_cspmu_disable(struct pmu *pmu)
+{
+	struct arm_cspmu *cspmu = to_arm_cspmu(pmu);
+
+	arm_cspmu_stop_counters(cspmu);
+}
+
+static int arm_cspmu_get_event_idx(struct arm_cspmu_hw_events *hw_events,
+				struct perf_event *event)
+{
+	int idx;
+	struct arm_cspmu *cspmu = to_arm_cspmu(event->pmu);
+
+	if (supports_cycle_counter(cspmu)) {
+		if (cspmu->impl.ops.is_cycle_counter_event(event)) {
+			/* Search for available cycle counter. */
+			if (test_and_set_bit(cspmu->cycle_counter_logical_idx,
+					     hw_events->used_ctrs))
+				return -EAGAIN;
+
+			return cspmu->cycle_counter_logical_idx;
+		}
+
+		/*
+		 * Search a regular counter from the used counter bitmap.
+		 * The cycle counter divides the bitmap into two parts. Search
+		 * the first then second half to exclude the cycle counter bit.
+		 */
+		idx = find_first_zero_bit(hw_events->used_ctrs,
+					  cspmu->cycle_counter_logical_idx);
+		if (idx >= cspmu->cycle_counter_logical_idx) {
+			idx = find_next_zero_bit(
+				hw_events->used_ctrs,
+				cspmu->num_logical_ctrs,
+				cspmu->cycle_counter_logical_idx + 1);
+		}
+	} else {
+		idx = find_first_zero_bit(hw_events->used_ctrs,
+					  cspmu->num_logical_ctrs);
+	}
+
+	if (idx >= cspmu->num_logical_ctrs)
+		return -EAGAIN;
+
+	set_bit(idx, hw_events->used_ctrs);
+
+	return idx;
+}
+
+static bool arm_cspmu_validate_event(struct pmu *pmu,
+				 struct arm_cspmu_hw_events *hw_events,
+				 struct perf_event *event)
+{
+	if (is_software_event(event))
+		return true;
+
+	/* Reject groups spanning multiple HW PMUs. */
+	if (event->pmu != pmu)
+		return false;
+
+	return (arm_cspmu_get_event_idx(hw_events, event) >= 0);
+}
+
+/*
+ * Make sure the group of events can be scheduled at once
+ * on the PMU.
+ */
+static bool arm_cspmu_validate_group(struct perf_event *event)
+{
+	struct perf_event *sibling, *leader = event->group_leader;
+	struct arm_cspmu_hw_events fake_hw_events;
+
+	if (event->group_leader == event)
+		return true;
+
+	memset(&fake_hw_events, 0, sizeof(fake_hw_events));
+
+	if (!arm_cspmu_validate_event(event->pmu, &fake_hw_events, leader))
+		return false;
+
+	for_each_sibling_event(sibling, leader) {
+		if (!arm_cspmu_validate_event(event->pmu, &fake_hw_events,
+						  sibling))
+			return false;
+	}
+
+	return arm_cspmu_validate_event(event->pmu, &fake_hw_events, event);
+}
+
+static int arm_cspmu_event_init(struct perf_event *event)
+{
+	struct arm_cspmu *cspmu;
+	struct hw_perf_event *hwc = &event->hw;
+
+	cspmu = to_arm_cspmu(event->pmu);
+
+	/*
+	 * Following other "uncore" PMUs, we do not support sampling mode or
+	 * attach to a task (per-process mode).
+	 */
+	if (is_sampling_event(event)) {
+		dev_dbg(cspmu->pmu.dev,
+			"Can't support sampling events\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (event->cpu < 0 || event->attach_state & PERF_ATTACH_TASK) {
+		dev_dbg(cspmu->pmu.dev,
+			"Can't support per-task counters\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * Make sure the CPU assignment is on one of the CPUs associated with
+	 * this PMU.
+	 */
+	if (!cpumask_test_cpu(event->cpu, &cspmu->associated_cpus)) {
+		dev_dbg(cspmu->pmu.dev,
+			"Requested cpu is not associated with the PMU\n");
+		return -EINVAL;
+	}
+
+	/* Enforce the current active CPU to handle the events in this PMU. */
+	event->cpu = cpumask_first(&cspmu->active_cpu);
+	if (event->cpu >= nr_cpu_ids)
+		return -EINVAL;
+
+	if (!arm_cspmu_validate_group(event))
+		return -EINVAL;
+
+	/*
+	 * The logical counter id is tracked with hw_perf_event.extra_reg.idx.
+	 * The physical counter id is tracked with hw_perf_event.idx.
+	 * We don't assign an index until we actually place the event onto
+	 * hardware. Use -1 to signify that we haven't decided where to put it
+	 * yet.
+	 */
+	hwc->idx = -1;
+	hwc->extra_reg.idx = -1;
+	hwc->config = cspmu->impl.ops.event_type(event);
+
+	return 0;
+}
+
+static inline u32 counter_offset(u32 reg_sz, u32 ctr_idx)
+{
+	return (PMEVCNTR_LO + (reg_sz * ctr_idx));
+}
+
+static void arm_cspmu_write_counter(struct perf_event *event, u64 val)
+{
+	u32 offset;
+	struct arm_cspmu *cspmu = to_arm_cspmu(event->pmu);
+
+	if (use_64b_counter_reg(cspmu)) {
+		offset = counter_offset(sizeof(u64), event->hw.idx);
+
+		writeq(val, cspmu->base1 + offset);
+	} else {
+		offset = counter_offset(sizeof(u32), event->hw.idx);
+
+		writel(lower_32_bits(val), cspmu->base1 + offset);
+	}
+}
+
+static u64 arm_cspmu_read_counter(struct perf_event *event)
+{
+	u32 offset;
+	const void __iomem *counter_addr;
+	struct arm_cspmu *cspmu = to_arm_cspmu(event->pmu);
+
+	if (use_64b_counter_reg(cspmu)) {
+		offset = counter_offset(sizeof(u64), event->hw.idx);
+		counter_addr = cspmu->base1 + offset;
+
+		return supports_64bit_atomics(cspmu) ?
+			       readq(counter_addr) :
+			       read_reg64_hilohi(counter_addr, HILOHI_MAX_POLL);
+	}
+
+	offset = counter_offset(sizeof(u32), event->hw.idx);
+	return readl(cspmu->base1 + offset);
+}
+
+/*
+ * arm_cspmu_set_event_period: Set the period for the counter.
+ *
+ * To handle cases of extreme interrupt latency, we program
+ * the counter with half of the max count for the counters.
+ */
+static void arm_cspmu_set_event_period(struct perf_event *event)
+{
+	struct arm_cspmu *cspmu = to_arm_cspmu(event->pmu);
+	u64 val = counter_mask(cspmu) >> 1ULL;
+
+	local64_set(&event->hw.prev_count, val);
+	arm_cspmu_write_counter(event, val);
+}
+
+static void arm_cspmu_enable_counter(struct arm_cspmu *cspmu, int idx)
+{
+	u32 reg_id, reg_bit, inten_off, cnten_off;
+
+	reg_id = COUNTER_TO_SET_CLR_ID(idx);
+	reg_bit = COUNTER_TO_SET_CLR_BIT(idx);
+
+	inten_off = PMINTENSET + (4 * reg_id);
+	cnten_off = PMCNTENSET + (4 * reg_id);
+
+	writel(BIT(reg_bit), cspmu->base0 + inten_off);
+	writel(BIT(reg_bit), cspmu->base0 + cnten_off);
+}
+
+static void arm_cspmu_disable_counter(struct arm_cspmu *cspmu, int idx)
+{
+	u32 reg_id, reg_bit, inten_off, cnten_off;
+
+	reg_id = COUNTER_TO_SET_CLR_ID(idx);
+	reg_bit = COUNTER_TO_SET_CLR_BIT(idx);
+
+	inten_off = PMINTENCLR + (4 * reg_id);
+	cnten_off = PMCNTENCLR + (4 * reg_id);
+
+	writel(BIT(reg_bit), cspmu->base0 + cnten_off);
+	writel(BIT(reg_bit), cspmu->base0 + inten_off);
+}
+
+static void arm_cspmu_event_update(struct perf_event *event)
+{
+	struct arm_cspmu *cspmu = to_arm_cspmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	u64 delta, prev, now;
+
+	do {
+		prev = local64_read(&hwc->prev_count);
+		now = arm_cspmu_read_counter(event);
+	} while (local64_cmpxchg(&hwc->prev_count, prev, now) != prev);
+
+	delta = (now - prev) & counter_mask(cspmu);
+	local64_add(delta, &event->count);
+}
+
+static inline void arm_cspmu_set_event(struct arm_cspmu *cspmu,
+					struct hw_perf_event *hwc)
+{
+	u32 offset = PMEVTYPER + (4 * hwc->idx);
+
+	writel(hwc->config, cspmu->base0 + offset);
+}
+
+static inline void arm_cspmu_set_ev_filter(struct arm_cspmu *cspmu,
+					   struct hw_perf_event *hwc,
+					   u32 filter)
+{
+	u32 offset = PMEVFILTR + (4 * hwc->idx);
+
+	writel(filter, cspmu->base0 + offset);
+}
+
+static inline void arm_cspmu_set_cc_filter(struct arm_cspmu *cspmu, u32 filter)
+{
+	u32 offset = PMCCFILTR;
+
+	writel(filter, cspmu->base0 + offset);
+}
+
+static void arm_cspmu_start(struct perf_event *event, int pmu_flags)
+{
+	struct arm_cspmu *cspmu = to_arm_cspmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	u32 filter;
+
+	/* We always reprogram the counter */
+	if (pmu_flags & PERF_EF_RELOAD)
+		WARN_ON(!(hwc->state & PERF_HES_UPTODATE));
+
+	arm_cspmu_set_event_period(event);
+
+	filter = cspmu->impl.ops.event_filter(event);
+
+	if (event->hw.extra_reg.idx == cspmu->cycle_counter_logical_idx) {
+		arm_cspmu_set_cc_filter(cspmu, filter);
+	} else {
+		arm_cspmu_set_event(cspmu, hwc);
+		arm_cspmu_set_ev_filter(cspmu, hwc, filter);
+	}
+
+	hwc->state = 0;
+
+	arm_cspmu_enable_counter(cspmu, hwc->idx);
+}
+
+static void arm_cspmu_stop(struct perf_event *event, int pmu_flags)
+{
+	struct arm_cspmu *cspmu = to_arm_cspmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (hwc->state & PERF_HES_STOPPED)
+		return;
+
+	arm_cspmu_disable_counter(cspmu, hwc->idx);
+	arm_cspmu_event_update(event);
+
+	hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+}
+
+static inline u32 to_phys_idx(struct arm_cspmu *cspmu, u32 idx)
+{
+	return (idx == cspmu->cycle_counter_logical_idx) ?
+		ARM_CSPMU_CYCLE_CNTR_IDX : idx;
+}
+
+static int arm_cspmu_add(struct perf_event *event, int flags)
+{
+	struct arm_cspmu *cspmu = to_arm_cspmu(event->pmu);
+	struct arm_cspmu_hw_events *hw_events = &cspmu->hw_events;
+	struct hw_perf_event *hwc = &event->hw;
+	int idx;
+
+	if (WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(),
+					   &cspmu->associated_cpus)))
+		return -ENOENT;
+
+	idx = arm_cspmu_get_event_idx(hw_events, event);
+	if (idx < 0)
+		return idx;
+
+	hw_events->events[idx] = event;
+	hwc->idx = to_phys_idx(cspmu, idx);
+	hwc->extra_reg.idx = idx;
+	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+
+	if (flags & PERF_EF_START)
+		arm_cspmu_start(event, PERF_EF_RELOAD);
+
+	/* Propagate changes to the userspace mapping. */
+	perf_event_update_userpage(event);
+
+	return 0;
+}
+
+static void arm_cspmu_del(struct perf_event *event, int flags)
+{
+	struct arm_cspmu *cspmu = to_arm_cspmu(event->pmu);
+	struct arm_cspmu_hw_events *hw_events = &cspmu->hw_events;
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->extra_reg.idx;
+
+	arm_cspmu_stop(event, PERF_EF_UPDATE);
+
+	hw_events->events[idx] = NULL;
+
+	clear_bit(idx, hw_events->used_ctrs);
+
+	perf_event_update_userpage(event);
+}
+
+static void arm_cspmu_read(struct perf_event *event)
+{
+	arm_cspmu_event_update(event);
+}
+
+static struct arm_cspmu *arm_cspmu_alloc(struct platform_device *pdev)
+{
+	struct acpi_apmt_node *apmt_node;
+	struct arm_cspmu *cspmu;
+	struct device *dev;
+
+	dev = &pdev->dev;
+	apmt_node = *(struct acpi_apmt_node **)dev_get_platdata(dev);
+	if (!apmt_node) {
+		dev_err(dev, "failed to get APMT node\n");
+		return NULL;
+	}
+
+	cspmu = devm_kzalloc(dev, sizeof(*cspmu), GFP_KERNEL);
+	if (!cspmu)
+		return NULL;
+
+	cspmu->dev = dev;
+	cspmu->apmt_node = apmt_node;
+
+	platform_set_drvdata(pdev, cspmu);
+
+	return cspmu;
+}
+
+static int arm_cspmu_init_mmio(struct arm_cspmu *cspmu)
+{
+	struct device *dev;
+	struct platform_device *pdev;
+	struct acpi_apmt_node *apmt_node;
+
+	dev = cspmu->dev;
+	pdev = to_platform_device(dev);
+	apmt_node = cspmu->apmt_node;
+
+	/* Base address for page 0. */
+	cspmu->base0 = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(cspmu->base0)) {
+		dev_err(dev, "ioremap failed for page-0 resource\n");
+		return PTR_ERR(cspmu->base0);
+	}
+
+	/* Base address for page 1 if supported. Otherwise point to page 0. */
+	cspmu->base1 = cspmu->base0;
+	if (CHECK_APMT_FLAG(apmt_node->flags, DUAL_PAGE, SUPP)) {
+		cspmu->base1 = devm_platform_ioremap_resource(pdev, 1);
+		if (IS_ERR(cspmu->base1)) {
+			dev_err(dev, "ioremap failed for page-1 resource\n");
+			return PTR_ERR(cspmu->base1);
+		}
+	}
+
+	cspmu->pmcfgr = readl(cspmu->base0 + PMCFGR);
+
+	cspmu->num_logical_ctrs = FIELD_GET(PMCFGR_N, cspmu->pmcfgr) + 1;
+
+	cspmu->cycle_counter_logical_idx = ARM_CSPMU_MAX_HW_CNTRS;
+
+	if (supports_cycle_counter(cspmu)) {
+		/*
+		 * The last logical counter is mapped to cycle counter if
+		 * there is a gap between regular and cycle counter. Otherwise,
+		 * logical and physical have 1-to-1 mapping.
+		 */
+		cspmu->cycle_counter_logical_idx =
+			(cspmu->num_logical_ctrs <= ARM_CSPMU_CYCLE_CNTR_IDX) ?
+				cspmu->num_logical_ctrs - 1 :
+				ARM_CSPMU_CYCLE_CNTR_IDX;
+	}
+
+	cspmu->num_set_clr_reg =
+		DIV_ROUND_UP(cspmu->num_logical_ctrs,
+				ARM_CSPMU_SET_CLR_COUNTER_NUM);
+
+	cspmu->hw_events.events =
+		devm_kcalloc(dev, cspmu->num_logical_ctrs,
+			     sizeof(*cspmu->hw_events.events), GFP_KERNEL);
+
+	if (!cspmu->hw_events.events)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static inline int arm_cspmu_get_reset_overflow(struct arm_cspmu *cspmu,
+					       u32 *pmovs)
+{
+	int i;
+	u32 pmovclr_offset = PMOVSCLR;
+	u32 has_overflowed = 0;
+
+	for (i = 0; i < cspmu->num_set_clr_reg; ++i) {
+		pmovs[i] = readl(cspmu->base1 + pmovclr_offset);
+		has_overflowed |= pmovs[i];
+		writel(pmovs[i], cspmu->base1 + pmovclr_offset);
+		pmovclr_offset += sizeof(u32);
+	}
+
+	return has_overflowed != 0;
+}
+
+static irqreturn_t arm_cspmu_handle_irq(int irq_num, void *dev)
+{
+	int idx, has_overflowed;
+	struct perf_event *event;
+	struct arm_cspmu *cspmu = dev;
+	DECLARE_BITMAP(pmovs, ARM_CSPMU_MAX_HW_CNTRS);
+	bool handled = false;
+
+	arm_cspmu_stop_counters(cspmu);
+
+	has_overflowed = arm_cspmu_get_reset_overflow(cspmu, (u32 *)pmovs);
+	if (!has_overflowed)
+		goto done;
+
+	for_each_set_bit(idx, cspmu->hw_events.used_ctrs,
+			cspmu->num_logical_ctrs) {
+		event = cspmu->hw_events.events[idx];
+
+		if (!event)
+			continue;
+
+		if (!test_bit(event->hw.idx, pmovs))
+			continue;
+
+		arm_cspmu_event_update(event);
+		arm_cspmu_set_event_period(event);
+
+		handled = true;
+	}
+
+done:
+	arm_cspmu_start_counters(cspmu);
+	return IRQ_RETVAL(handled);
+}
+
+static int arm_cspmu_request_irq(struct arm_cspmu *cspmu)
+{
+	int irq, ret;
+	struct device *dev;
+	struct platform_device *pdev;
+	struct acpi_apmt_node *apmt_node;
+
+	dev = cspmu->dev;
+	pdev = to_platform_device(dev);
+	apmt_node = cspmu->apmt_node;
+
+	/* Skip IRQ request if the PMU does not support overflow interrupt. */
+	if (apmt_node->ovflw_irq == 0)
+		return 0;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+
+	ret = devm_request_irq(dev, irq, arm_cspmu_handle_irq,
+			       IRQF_NOBALANCING | IRQF_NO_THREAD, dev_name(dev),
+			       cspmu);
+	if (ret) {
+		dev_err(dev, "Could not request IRQ %d\n", irq);
+		return ret;
+	}
+
+	cspmu->irq = irq;
+
+	return 0;
+}
+
+static inline int arm_cspmu_find_cpu_container(int cpu, u32 container_uid)
+{
+	u32 acpi_uid;
+	struct device *cpu_dev = get_cpu_device(cpu);
+	struct acpi_device *acpi_dev = ACPI_COMPANION(cpu_dev);
+
+	if (!cpu_dev)
+		return -ENODEV;
+
+	while (acpi_dev) {
+		if (!strcmp(acpi_device_hid(acpi_dev),
+			    ACPI_PROCESSOR_CONTAINER_HID) &&
+		    !kstrtouint(acpi_device_uid(acpi_dev), 0, &acpi_uid) &&
+		    acpi_uid == container_uid)
+			return 0;
+
+		acpi_dev = acpi_dev_parent(acpi_dev);
+	}
+
+	return -ENODEV;
+}
+
+static int arm_cspmu_get_cpus(struct arm_cspmu *cspmu)
+{
+	struct device *dev;
+	struct acpi_apmt_node *apmt_node;
+	int affinity_flag;
+	int cpu;
+
+	dev = cspmu->pmu.dev;
+	apmt_node = cspmu->apmt_node;
+	affinity_flag = apmt_node->flags & ACPI_APMT_FLAGS_AFFINITY;
+
+	if (affinity_flag == ACPI_APMT_FLAGS_AFFINITY_PROC) {
+		for_each_possible_cpu(cpu) {
+			if (apmt_node->proc_affinity ==
+			    get_acpi_id_for_cpu(cpu)) {
+				cpumask_set_cpu(cpu, &cspmu->associated_cpus);
+				break;
+			}
+		}
+	} else {
+		for_each_possible_cpu(cpu) {
+			if (arm_cspmu_find_cpu_container(
+				    cpu, apmt_node->proc_affinity))
+				continue;
+
+			cpumask_set_cpu(cpu, &cspmu->associated_cpus);
+		}
+	}
+
+	if (cpumask_empty(&cspmu->associated_cpus)) {
+		dev_dbg(dev, "No cpu associated with the PMU\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static int arm_cspmu_register_pmu(struct arm_cspmu *cspmu)
+{
+	int ret, capabilities;
+	struct attribute_group **attr_groups;
+
+	attr_groups = arm_cspmu_alloc_attr_group(cspmu);
+	if (!attr_groups)
+		return -ENOMEM;
+
+	ret = cpuhp_state_add_instance(arm_cspmu_cpuhp_state,
+				       &cspmu->cpuhp_node);
+	if (ret)
+		return ret;
+
+	capabilities = PERF_PMU_CAP_NO_EXCLUDE;
+	if (cspmu->irq == 0)
+		capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+
+	cspmu->pmu = (struct pmu){
+		.task_ctx_nr	= perf_invalid_context,
+		.module		= THIS_MODULE,
+		.pmu_enable	= arm_cspmu_enable,
+		.pmu_disable	= arm_cspmu_disable,
+		.event_init	= arm_cspmu_event_init,
+		.add		= arm_cspmu_add,
+		.del		= arm_cspmu_del,
+		.start		= arm_cspmu_start,
+		.stop		= arm_cspmu_stop,
+		.read		= arm_cspmu_read,
+		.attr_groups	= (const struct attribute_group **)attr_groups,
+		.capabilities	= capabilities,
+	};
+
+	/* Hardware counter init */
+	arm_cspmu_stop_counters(cspmu);
+	arm_cspmu_reset_counters(cspmu);
+
+	ret = perf_pmu_register(&cspmu->pmu, cspmu->name, -1);
+	if (ret) {
+		cpuhp_state_remove_instance(arm_cspmu_cpuhp_state,
+					    &cspmu->cpuhp_node);
+	}
+
+	return ret;
+}
+
+static int arm_cspmu_device_probe(struct platform_device *pdev)
+{
+	int ret;
+	struct arm_cspmu *cspmu;
+
+	cspmu = arm_cspmu_alloc(pdev);
+	if (!cspmu)
+		return -ENOMEM;
+
+	ret = arm_cspmu_init_mmio(cspmu);
+	if (ret)
+		return ret;
+
+	ret = arm_cspmu_request_irq(cspmu);
+	if (ret)
+		return ret;
+
+	ret = arm_cspmu_get_cpus(cspmu);
+	if (ret)
+		return ret;
+
+	ret = arm_cspmu_register_pmu(cspmu);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int arm_cspmu_device_remove(struct platform_device *pdev)
+{
+	struct arm_cspmu *cspmu = platform_get_drvdata(pdev);
+
+	perf_pmu_unregister(&cspmu->pmu);
+	cpuhp_state_remove_instance(arm_cspmu_cpuhp_state, &cspmu->cpuhp_node);
+
+	return 0;
+}
+
+static struct platform_driver arm_cspmu_driver = {
+	.driver = {
+			.name = DRVNAME,
+			.suppress_bind_attrs = true,
+		},
+	.probe = arm_cspmu_device_probe,
+	.remove = arm_cspmu_device_remove,
+};
+
+static void arm_cspmu_set_active_cpu(int cpu, struct arm_cspmu *cspmu)
+{
+	cpumask_set_cpu(cpu, &cspmu->active_cpu);
+	WARN_ON(irq_set_affinity(cspmu->irq, &cspmu->active_cpu));
+}
+
+static int arm_cspmu_cpu_online(unsigned int cpu, struct hlist_node *node)
+{
+	struct arm_cspmu *cspmu =
+		hlist_entry_safe(node, struct arm_cspmu, cpuhp_node);
+
+	if (!cpumask_test_cpu(cpu, &cspmu->associated_cpus))
+		return 0;
+
+	/* If the PMU is already managed, there is nothing to do */
+	if (!cpumask_empty(&cspmu->active_cpu))
+		return 0;
+
+	/* Use this CPU for event counting */
+	arm_cspmu_set_active_cpu(cpu, cspmu);
+
+	return 0;
+}
+
+static int arm_cspmu_cpu_teardown(unsigned int cpu, struct hlist_node *node)
+{
+	int dst;
+	struct cpumask online_supported;
+
+	struct arm_cspmu *cspmu =
+		hlist_entry_safe(node, struct arm_cspmu, cpuhp_node);
+
+	/* Nothing to do if this CPU doesn't own the PMU */
+	if (!cpumask_test_and_clear_cpu(cpu, &cspmu->active_cpu))
+		return 0;
+
+	/* Choose a new CPU to migrate ownership of the PMU to */
+	cpumask_and(&online_supported, &cspmu->associated_cpus,
+		    cpu_online_mask);
+	dst = cpumask_any_but(&online_supported, cpu);
+	if (dst >= nr_cpu_ids)
+		return 0;
+
+	/* Use this CPU for event counting */
+	perf_pmu_migrate_context(&cspmu->pmu, cpu, dst);
+	arm_cspmu_set_active_cpu(dst, cspmu);
+
+	return 0;
+}
+
+static int __init arm_cspmu_init(void)
+{
+	int ret;
+
+	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+					"perf/arm/cspmu:online",
+					arm_cspmu_cpu_online,
+					arm_cspmu_cpu_teardown);
+	if (ret < 0)
+		return ret;
+	arm_cspmu_cpuhp_state = ret;
+	return platform_driver_register(&arm_cspmu_driver);
+}
+
+static void __exit arm_cspmu_exit(void)
+{
+	platform_driver_unregister(&arm_cspmu_driver);
+	cpuhp_remove_multi_state(arm_cspmu_cpuhp_state);
+}
+
+module_init(arm_cspmu_init);
+module_exit(arm_cspmu_exit);
diff --git a/drivers/perf/arm_cspmu/arm_cspmu.h b/drivers/perf/arm_cspmu/arm_cspmu.h
new file mode 100644
index 000000000000..51323b175a4a
--- /dev/null
+++ b/drivers/perf/arm_cspmu/arm_cspmu.h
@@ -0,0 +1,151 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * ARM CoreSight Architecture PMU driver.
+ * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ *
+ */
+
+#ifndef __ARM_CSPMU_H__
+#define __ARM_CSPMU_H__
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/cpumask.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/types.h>
+
+#define to_arm_cspmu(p) (container_of(p, struct arm_cspmu, pmu))
+
+#define ARM_CSPMU_EXT_ATTR(_name, _func, _config)			\
+	(&((struct dev_ext_attribute[]){				\
+		{							\
+			.attr = __ATTR(_name, 0444, _func, NULL),	\
+			.var = (void *)_config				\
+		}							\
+	})[0].attr.attr)
+
+#define ARM_CSPMU_FORMAT_ATTR(_name, _config)				\
+	ARM_CSPMU_EXT_ATTR(_name, arm_cspmu_sysfs_format_show, (char *)_config)
+
+#define ARM_CSPMU_EVENT_ATTR(_name, _config)				\
+	PMU_EVENT_ATTR_ID(_name, arm_cspmu_sysfs_event_show, _config)
+
+
+/* Default event id mask */
+#define ARM_CSPMU_EVENT_MASK	GENMASK_ULL(63, 0)
+
+/* Default filter value mask */
+#define ARM_CSPMU_FILTER_MASK	GENMASK_ULL(63, 0)
+
+/* Default event format */
+#define ARM_CSPMU_FORMAT_EVENT_ATTR	\
+	ARM_CSPMU_FORMAT_ATTR(event, "config:0-32")
+
+/* Default filter format */
+#define ARM_CSPMU_FORMAT_FILTER_ATTR	\
+	ARM_CSPMU_FORMAT_ATTR(filter, "config1:0-31")
+
+/*
+ * This is the default event number for cycle count, if supported, since the
+ * ARM Coresight PMU specification does not define a standard event code
+ * for cycle count.
+ */
+#define ARM_CSPMU_EVT_CYCLES_DEFAULT	(0x1ULL << 32)
+
+/*
+ * The ARM Coresight PMU supports up to 256 event counters.
+ * If the counters are larger-than 32-bits, then the PMU includes at
+ * most 128 counters.
+ */
+#define ARM_CSPMU_MAX_HW_CNTRS		256
+
+/* The cycle counter, if implemented, is located at counter[31]. */
+#define ARM_CSPMU_CYCLE_CNTR_IDX	31
+
+/* PMIIDR register field */
+#define ARM_CSPMU_PMIIDR_IMPLEMENTER	GENMASK(11, 0)
+#define ARM_CSPMU_PMIIDR_PRODUCTID	GENMASK(31, 20)
+
+struct arm_cspmu;
+
+/* This tracks the events assigned to each counter in the PMU. */
+struct arm_cspmu_hw_events {
+	/* The events that are active on the PMU for a given logical index. */
+	struct perf_event **events;
+
+	/*
+	 * Each bit indicates a logical counter is being used (or not) for an
+	 * event. If cycle counter is supported and there is a gap between
+	 * regular and cycle counter, the last logical counter is mapped to
+	 * cycle counter. Otherwise, logical and physical have 1-to-1 mapping.
+	 */
+	DECLARE_BITMAP(used_ctrs, ARM_CSPMU_MAX_HW_CNTRS);
+};
+
+/* Contains ops to query vendor/implementer specific attribute. */
+struct arm_cspmu_impl_ops {
+	/* Get event attributes */
+	struct attribute **(*get_event_attrs)(const struct arm_cspmu *cspmu);
+	/* Get format attributes */
+	struct attribute **(*get_format_attrs)(const struct arm_cspmu *cspmu);
+	/* Get string identifier */
+	const char *(*get_identifier)(const struct arm_cspmu *cspmu);
+	/* Get PMU name to register to core perf */
+	const char *(*get_name)(const struct arm_cspmu *cspmu);
+	/* Check if the event corresponds to cycle count event */
+	bool (*is_cycle_counter_event)(const struct perf_event *event);
+	/* Decode event type/id from configs */
+	u32 (*event_type)(const struct perf_event *event);
+	/* Decode filter value from configs */
+	u32 (*event_filter)(const struct perf_event *event);
+	/* Hide/show unsupported events */
+	umode_t (*event_attr_is_visible)(struct kobject *kobj,
+					 struct attribute *attr, int unused);
+};
+
+/* Vendor/implementer descriptor. */
+struct arm_cspmu_impl {
+	u32 pmiidr;
+	struct arm_cspmu_impl_ops ops;
+	void *ctx;
+};
+
+/* Coresight PMU descriptor. */
+struct arm_cspmu {
+	struct pmu pmu;
+	struct device *dev;
+	struct acpi_apmt_node *apmt_node;
+	const char *name;
+	const char *identifier;
+	void __iomem *base0;
+	void __iomem *base1;
+	int irq;
+	cpumask_t associated_cpus;
+	cpumask_t active_cpu;
+	struct hlist_node cpuhp_node;
+
+	u32 pmcfgr;
+	u32 num_logical_ctrs;
+	u32 num_set_clr_reg;
+	int cycle_counter_logical_idx;
+
+	struct arm_cspmu_hw_events hw_events;
+
+	struct arm_cspmu_impl impl;
+};
+
+/* Default function to show event attribute in sysfs. */
+ssize_t arm_cspmu_sysfs_event_show(struct device *dev,
+				   struct device_attribute *attr,
+				   char *buf);
+
+/* Default function to show format attribute in sysfs. */
+ssize_t arm_cspmu_sysfs_format_show(struct device *dev,
+				    struct device_attribute *attr,
+				    char *buf);
+
+#endif /* __ARM_CSPMU_H__ */
-- 
cgit v1.2.3


From 84481be7167eba1957c8718c8b044a47965fdecb Mon Sep 17 00:00:00 2001
From: Besar Wicaksono <bwicaksono@nvidia.com>
Date: Fri, 11 Nov 2022 16:23:29 -0600
Subject: perf: arm_cspmu: Add support for NVIDIA SCF and MCF attribute

Add support for NVIDIA System Cache Fabric (SCF) and Memory Control
Fabric (MCF) PMU attributes for CoreSight PMU implementation in
NVIDIA devices.

Acked-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Besar Wicaksono <bwicaksono@nvidia.com>
Link: https://lore.kernel.org/r/20221111222330.48602-3-bwicaksono@nvidia.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 Documentation/admin-guide/perf/index.rst      |   1 +
 Documentation/admin-guide/perf/nvidia-pmu.rst | 299 +++++++++++++++++++
 drivers/perf/arm_cspmu/Makefile               |   3 +-
 drivers/perf/arm_cspmu/arm_cspmu.c            |   9 +
 drivers/perf/arm_cspmu/nvidia_cspmu.c         | 398 ++++++++++++++++++++++++++
 drivers/perf/arm_cspmu/nvidia_cspmu.h         |  17 ++
 6 files changed, 726 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/admin-guide/perf/nvidia-pmu.rst
 create mode 100644 drivers/perf/arm_cspmu/nvidia_cspmu.c
 create mode 100644 drivers/perf/arm_cspmu/nvidia_cspmu.h

(limited to 'drivers')

diff --git a/Documentation/admin-guide/perf/index.rst b/Documentation/admin-guide/perf/index.rst
index 793e1970bc05..7ed239408138 100644
--- a/Documentation/admin-guide/perf/index.rst
+++ b/Documentation/admin-guide/perf/index.rst
@@ -19,3 +19,4 @@ Performance monitor support
    arm_dsu_pmu
    thunderx2-pmu
    alibaba_pmu
+   nvidia-pmu
diff --git a/Documentation/admin-guide/perf/nvidia-pmu.rst b/Documentation/admin-guide/perf/nvidia-pmu.rst
new file mode 100644
index 000000000000..2e0d47cfe7ea
--- /dev/null
+++ b/Documentation/admin-guide/perf/nvidia-pmu.rst
@@ -0,0 +1,299 @@
+=========================================================
+NVIDIA Tegra SoC Uncore Performance Monitoring Unit (PMU)
+=========================================================
+
+The NVIDIA Tegra SoC includes various system PMUs to measure key performance
+metrics like memory bandwidth, latency, and utilization:
+
+* Scalable Coherency Fabric (SCF)
+* NVLink-C2C0
+* NVLink-C2C1
+* CNVLink
+* PCIE
+
+PMU Driver
+----------
+
+The PMUs in this document are based on ARM CoreSight PMU Architecture as
+described in document: ARM IHI 0091. Since this is a standard architecture, the
+PMUs are managed by a common driver "arm-cs-arch-pmu". This driver describes
+the available events and configuration of each PMU in sysfs. Please see the
+sections below to get the sysfs path of each PMU. Like other uncore PMU drivers,
+the driver provides "cpumask" sysfs attribute to show the CPU id used to handle
+the PMU event. There is also "associated_cpus" sysfs attribute, which contains a
+list of CPUs associated with the PMU instance.
+
+.. _SCF_PMU_Section:
+
+SCF PMU
+-------
+
+The SCF PMU monitors system level cache events, CPU traffic, and
+strongly-ordered (SO) PCIE write traffic to local/remote memory. Please see
+:ref:`NVIDIA_Uncore_PMU_Traffic_Coverage_Section` for more info about the PMU
+traffic coverage.
+
+The events and configuration options of this PMU device are described in sysfs,
+see /sys/bus/event_sources/devices/nvidia_scf_pmu_<socket-id>.
+
+Example usage:
+
+* Count event id 0x0 in socket 0::
+
+   perf stat -a -e nvidia_scf_pmu_0/event=0x0/
+
+* Count event id 0x0 in socket 1::
+
+   perf stat -a -e nvidia_scf_pmu_1/event=0x0/
+
+NVLink-C2C0 PMU
+--------------------
+
+The NVLink-C2C0 PMU monitors incoming traffic from a GPU/CPU connected with
+NVLink-C2C (Chip-2-Chip) interconnect. The type of traffic captured by this PMU
+varies dependent on the chip configuration:
+
+* NVIDIA Grace Hopper Superchip: Hopper GPU is connected with Grace SoC.
+
+  In this config, the PMU captures GPU ATS translated or EGM traffic from the GPU.
+
+* NVIDIA Grace CPU Superchip: two Grace CPU SoCs are connected.
+
+  In this config, the PMU captures read and relaxed ordered (RO) writes from
+  PCIE device of the remote SoC.
+
+Please see :ref:`NVIDIA_Uncore_PMU_Traffic_Coverage_Section` for more info about
+the PMU traffic coverage.
+
+The events and configuration options of this PMU device are described in sysfs,
+see /sys/bus/event_sources/devices/nvidia_nvlink_c2c0_pmu_<socket-id>.
+
+Example usage:
+
+* Count event id 0x0 from the GPU/CPU connected with socket 0::
+
+   perf stat -a -e nvidia_nvlink_c2c0_pmu_0/event=0x0/
+
+* Count event id 0x0 from the GPU/CPU connected with socket 1::
+
+   perf stat -a -e nvidia_nvlink_c2c0_pmu_1/event=0x0/
+
+* Count event id 0x0 from the GPU/CPU connected with socket 2::
+
+   perf stat -a -e nvidia_nvlink_c2c0_pmu_2/event=0x0/
+
+* Count event id 0x0 from the GPU/CPU connected with socket 3::
+
+   perf stat -a -e nvidia_nvlink_c2c0_pmu_3/event=0x0/
+
+NVLink-C2C1 PMU
+-------------------
+
+The NVLink-C2C1 PMU monitors incoming traffic from a GPU connected with
+NVLink-C2C (Chip-2-Chip) interconnect. This PMU captures untranslated GPU
+traffic, in contrast with NvLink-C2C0 PMU that captures ATS translated traffic.
+Please see :ref:`NVIDIA_Uncore_PMU_Traffic_Coverage_Section` for more info about
+the PMU traffic coverage.
+
+The events and configuration options of this PMU device are described in sysfs,
+see /sys/bus/event_sources/devices/nvidia_nvlink_c2c1_pmu_<socket-id>.
+
+Example usage:
+
+* Count event id 0x0 from the GPU connected with socket 0::
+
+   perf stat -a -e nvidia_nvlink_c2c1_pmu_0/event=0x0/
+
+* Count event id 0x0 from the GPU connected with socket 1::
+
+   perf stat -a -e nvidia_nvlink_c2c1_pmu_1/event=0x0/
+
+* Count event id 0x0 from the GPU connected with socket 2::
+
+   perf stat -a -e nvidia_nvlink_c2c1_pmu_2/event=0x0/
+
+* Count event id 0x0 from the GPU connected with socket 3::
+
+   perf stat -a -e nvidia_nvlink_c2c1_pmu_3/event=0x0/
+
+CNVLink PMU
+---------------
+
+The CNVLink PMU monitors traffic from GPU and PCIE device on remote sockets
+to local memory. For PCIE traffic, this PMU captures read and relaxed ordered
+(RO) write traffic. Please see :ref:`NVIDIA_Uncore_PMU_Traffic_Coverage_Section`
+for more info about the PMU traffic coverage.
+
+The events and configuration options of this PMU device are described in sysfs,
+see /sys/bus/event_sources/devices/nvidia_cnvlink_pmu_<socket-id>.
+
+Each SoC socket can be connected to one or more sockets via CNVLink. The user can
+use "rem_socket" bitmap parameter to select the remote socket(s) to monitor.
+Each bit represents the socket number, e.g. "rem_socket=0xE" corresponds to
+socket 1 to 3.
+/sys/bus/event_sources/devices/nvidia_cnvlink_pmu_<socket-id>/format/rem_socket
+shows the valid bits that can be set in the "rem_socket" parameter.
+
+The PMU can not distinguish the remote traffic initiator, therefore it does not
+provide filter to select the traffic source to monitor. It reports combined
+traffic from remote GPU and PCIE devices.
+
+Example usage:
+
+* Count event id 0x0 for the traffic from remote socket 1, 2, and 3 to socket 0::
+
+   perf stat -a -e nvidia_cnvlink_pmu_0/event=0x0,rem_socket=0xE/
+
+* Count event id 0x0 for the traffic from remote socket 0, 2, and 3 to socket 1::
+
+   perf stat -a -e nvidia_cnvlink_pmu_1/event=0x0,rem_socket=0xD/
+
+* Count event id 0x0 for the traffic from remote socket 0, 1, and 3 to socket 2::
+
+   perf stat -a -e nvidia_cnvlink_pmu_2/event=0x0,rem_socket=0xB/
+
+* Count event id 0x0 for the traffic from remote socket 0, 1, and 2 to socket 3::
+
+   perf stat -a -e nvidia_cnvlink_pmu_3/event=0x0,rem_socket=0x7/
+
+
+PCIE PMU
+------------
+
+The PCIE PMU monitors all read/write traffic from PCIE root ports to
+local/remote memory. Please see :ref:`NVIDIA_Uncore_PMU_Traffic_Coverage_Section`
+for more info about the PMU traffic coverage.
+
+The events and configuration options of this PMU device are described in sysfs,
+see /sys/bus/event_sources/devices/nvidia_pcie_pmu_<socket-id>.
+
+Each SoC socket can support multiple root ports. The user can use
+"root_port" bitmap parameter to select the port(s) to monitor, i.e.
+"root_port=0xF" corresponds to root port 0 to 3.
+/sys/bus/event_sources/devices/nvidia_pcie_pmu_<socket-id>/format/root_port
+shows the valid bits that can be set in the "root_port" parameter.
+
+Example usage:
+
+* Count event id 0x0 from root port 0 and 1 of socket 0::
+
+   perf stat -a -e nvidia_pcie_pmu_0/event=0x0,root_port=0x3/
+
+* Count event id 0x0 from root port 0 and 1 of socket 1::
+
+   perf stat -a -e nvidia_pcie_pmu_1/event=0x0,root_port=0x3/
+
+.. _NVIDIA_Uncore_PMU_Traffic_Coverage_Section:
+
+Traffic Coverage
+----------------
+
+The PMU traffic coverage may vary dependent on the chip configuration:
+
+* **NVIDIA Grace Hopper Superchip**: Hopper GPU is connected with Grace SoC.
+
+  Example configuration with two Grace SoCs::
+
+   *********************************          *********************************
+   * SOCKET-A                      *          * SOCKET-B                      *
+   *                               *          *                               *
+   *                     ::::::::  *          *  ::::::::                     *
+   *                     : PCIE :  *          *  : PCIE :                     *
+   *                     ::::::::  *          *  ::::::::                     *
+   *                         |     *          *      |                        *
+   *                         |     *          *      |                        *
+   *  :::::::            ::::::::: *          *  :::::::::            ::::::: *
+   *  :     :            :       : *          *  :       :            :     : *
+   *  : GPU :<--NVLink-->: Grace :<---CNVLink--->: Grace :<--NVLink-->: GPU : *
+   *  :     :    C2C     :  SoC  : *          *  :  SoC  :    C2C     :     : *
+   *  :::::::            ::::::::: *          *  :::::::::            ::::::: *
+   *     |                   |     *          *      |                   |    *
+   *     |                   |     *          *      |                   |    *
+   *  &&&&&&&&           &&&&&&&&  *          *   &&&&&&&&           &&&&&&&& *
+   *  & GMEM &           & CMEM &  *          *   & CMEM &           & GMEM & *
+   *  &&&&&&&&           &&&&&&&&  *          *   &&&&&&&&           &&&&&&&& *
+   *                               *          *                               *
+   *********************************          *********************************
+
+   GMEM = GPU Memory (e.g. HBM)
+   CMEM = CPU Memory (e.g. LPDDR5X)
+
+  |
+  | Following table contains traffic coverage of Grace SoC PMU in socket-A:
+
+  ::
+
+   +--------------+-------+-----------+-----------+-----+----------+----------+
+   |              |                        Source                             |
+   +              +-------+-----------+-----------+-----+----------+----------+
+   | Destination  |       |GPU ATS    |GPU Not-ATS|     | Socket-B | Socket-B |
+   |              |PCI R/W|Translated,|Translated | CPU | CPU/PCIE1| GPU/PCIE2|
+   |              |       |EGM        |           |     |          |          |
+   +==============+=======+===========+===========+=====+==========+==========+
+   | Local        | PCIE  |NVLink-C2C0|NVLink-C2C1| SCF | SCF PMU  | CNVLink  |
+   | SYSRAM/CMEM  | PMU   |PMU        |PMU        | PMU |          | PMU      |
+   +--------------+-------+-----------+-----------+-----+----------+----------+
+   | Local GMEM   | PCIE  |    N/A    |NVLink-C2C1| SCF | SCF PMU  | CNVLink  |
+   |              | PMU   |           |PMU        | PMU |          | PMU      |
+   +--------------+-------+-----------+-----------+-----+----------+----------+
+   | Remote       | PCIE  |NVLink-C2C0|NVLink-C2C1| SCF |          |          |
+   | SYSRAM/CMEM  | PMU   |PMU        |PMU        | PMU |   N/A    |   N/A    |
+   | over CNVLink |       |           |           |     |          |          |
+   +--------------+-------+-----------+-----------+-----+----------+----------+
+   | Remote GMEM  | PCIE  |NVLink-C2C0|NVLink-C2C1| SCF |          |          |
+   | over CNVLink | PMU   |PMU        |PMU        | PMU |   N/A    |   N/A    |
+   +--------------+-------+-----------+-----------+-----+----------+----------+
+
+   PCIE1 traffic represents strongly ordered (SO) writes.
+   PCIE2 traffic represents reads and relaxed ordered (RO) writes.
+
+* **NVIDIA Grace CPU Superchip**: two Grace CPU SoCs are connected.
+
+  Example configuration with two Grace SoCs::
+
+   *******************             *******************
+   * SOCKET-A        *             * SOCKET-B        *
+   *                 *             *                 *
+   *    ::::::::     *             *    ::::::::     *
+   *    : PCIE :     *             *    : PCIE :     *
+   *    ::::::::     *             *    ::::::::     *
+   *        |        *             *        |        *
+   *        |        *             *        |        *
+   *    :::::::::    *             *    :::::::::    *
+   *    :       :    *             *    :       :    *
+   *    : Grace :<--------NVLink------->: Grace :    *
+   *    :  SoC  :    *     C2C     *    :  SoC  :    *
+   *    :::::::::    *             *    :::::::::    *
+   *        |        *             *        |        *
+   *        |        *             *        |        *
+   *     &&&&&&&&    *             *     &&&&&&&&    *
+   *     & CMEM &    *             *     & CMEM &    *
+   *     &&&&&&&&    *             *     &&&&&&&&    *
+   *                 *             *                 *
+   *******************             *******************
+
+   GMEM = GPU Memory (e.g. HBM)
+   CMEM = CPU Memory (e.g. LPDDR5X)
+
+  |
+  | Following table contains traffic coverage of Grace SoC PMU in socket-A:
+
+  ::
+
+   +-----------------+-----------+---------+----------+-------------+
+   |                 |                      Source                  |
+   +                 +-----------+---------+----------+-------------+
+   | Destination     |           |         | Socket-B | Socket-B    |
+   |                 |  PCI R/W  |   CPU   | CPU/PCIE1| PCIE2       |
+   |                 |           |         |          |             |
+   +=================+===========+=========+==========+=============+
+   | Local           |  PCIE PMU | SCF PMU | SCF PMU  | NVLink-C2C0 |
+   | SYSRAM/CMEM     |           |         |          | PMU         |
+   +-----------------+-----------+---------+----------+-------------+
+   | Remote          |           |         |          |             |
+   | SYSRAM/CMEM     |  PCIE PMU | SCF PMU |   N/A    |     N/A     |
+   | over NVLink-C2C |           |         |          |             |
+   +-----------------+-----------+---------+----------+-------------+
+
+   PCIE1 traffic represents strongly ordered (SO) writes.
+   PCIE2 traffic represents reads and relaxed ordered (RO) writes.
diff --git a/drivers/perf/arm_cspmu/Makefile b/drivers/perf/arm_cspmu/Makefile
index 9bc76de4002d..641db85c018b 100644
--- a/drivers/perf/arm_cspmu/Makefile
+++ b/drivers/perf/arm_cspmu/Makefile
@@ -3,4 +3,5 @@
 # SPDX-License-Identifier: GPL-2.0
 
 obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += \
-	arm_cspmu.o
+	arm_cspmu.o \
+	nvidia_cspmu.o
diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c
index e8f57aa9f047..e851eeb33f4a 100644
--- a/drivers/perf/arm_cspmu/arm_cspmu.c
+++ b/drivers/perf/arm_cspmu/arm_cspmu.c
@@ -31,6 +31,7 @@
 #include <acpi/processor.h>
 
 #include "arm_cspmu.h"
+#include "nvidia_cspmu.h"
 
 #define PMUNAME "arm_cspmu"
 #define DRVNAME "arm-cs-arch-pmu"
@@ -116,6 +117,9 @@
  */
 #define HILOHI_MAX_POLL	1000
 
+/* JEDEC-assigned JEP106 identification code */
+#define ARM_CSPMU_IMPL_ID_NVIDIA		0x36B
+
 static unsigned long arm_cspmu_cpuhp_state;
 
 /*
@@ -382,6 +386,11 @@ struct impl_match {
 };
 
 static const struct impl_match impl_match[] = {
+	{
+	  .pmiidr = ARM_CSPMU_IMPL_ID_NVIDIA,
+	  .mask = ARM_CSPMU_PMIIDR_IMPLEMENTER,
+	  .impl_init_ops = nv_cspmu_init_ops
+	},
 	{}
 };
 
diff --git a/drivers/perf/arm_cspmu/nvidia_cspmu.c b/drivers/perf/arm_cspmu/nvidia_cspmu.c
new file mode 100644
index 000000000000..c795414ec7c7
--- /dev/null
+++ b/drivers/perf/arm_cspmu/nvidia_cspmu.c
@@ -0,0 +1,398 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ *
+ */
+
+/* Support for NVIDIA specific attributes. */
+
+#include <linux/topology.h>
+
+#include "nvidia_cspmu.h"
+
+#define NV_PCIE_PORT_COUNT           10ULL
+#define NV_PCIE_FILTER_ID_MASK       GENMASK_ULL(NV_PCIE_PORT_COUNT - 1, 0)
+
+#define NV_NVL_C2C_PORT_COUNT        2ULL
+#define NV_NVL_C2C_FILTER_ID_MASK    GENMASK_ULL(NV_NVL_C2C_PORT_COUNT - 1, 0)
+
+#define NV_CNVL_PORT_COUNT           4ULL
+#define NV_CNVL_FILTER_ID_MASK       GENMASK_ULL(NV_CNVL_PORT_COUNT - 1, 0)
+
+#define NV_GENERIC_FILTER_ID_MASK    GENMASK_ULL(31, 0)
+
+#define NV_PRODID_MASK               GENMASK(31, 0)
+
+#define NV_FORMAT_NAME_GENERIC	0
+
+#define to_nv_cspmu_ctx(cspmu)	((struct nv_cspmu_ctx *)(cspmu->impl.ctx))
+
+#define NV_CSPMU_EVENT_ATTR_4_INNER(_pref, _num, _suff, _config)	\
+	ARM_CSPMU_EVENT_ATTR(_pref##_num##_suff, _config)
+
+#define NV_CSPMU_EVENT_ATTR_4(_pref, _suff, _config)			\
+	NV_CSPMU_EVENT_ATTR_4_INNER(_pref, _0_, _suff, _config),	\
+	NV_CSPMU_EVENT_ATTR_4_INNER(_pref, _1_, _suff, _config + 1),	\
+	NV_CSPMU_EVENT_ATTR_4_INNER(_pref, _2_, _suff, _config + 2),	\
+	NV_CSPMU_EVENT_ATTR_4_INNER(_pref, _3_, _suff, _config + 3)
+
+struct nv_cspmu_ctx {
+	const char *name;
+	u32 filter_mask;
+	u32 filter_default_val;
+	struct attribute **event_attr;
+	struct attribute **format_attr;
+};
+
+static struct attribute *scf_pmu_event_attrs[] = {
+	ARM_CSPMU_EVENT_ATTR(bus_cycles,			0x1d),
+
+	ARM_CSPMU_EVENT_ATTR(scf_cache_allocate,		0xF0),
+	ARM_CSPMU_EVENT_ATTR(scf_cache_refill,			0xF1),
+	ARM_CSPMU_EVENT_ATTR(scf_cache,				0xF2),
+	ARM_CSPMU_EVENT_ATTR(scf_cache_wb,			0xF3),
+
+	NV_CSPMU_EVENT_ATTR_4(socket, rd_data,			0x101),
+	NV_CSPMU_EVENT_ATTR_4(socket, dl_rsp,			0x105),
+	NV_CSPMU_EVENT_ATTR_4(socket, wb_data,			0x109),
+	NV_CSPMU_EVENT_ATTR_4(socket, ev_rsp,			0x10d),
+	NV_CSPMU_EVENT_ATTR_4(socket, prb_data,			0x111),
+
+	NV_CSPMU_EVENT_ATTR_4(socket, rd_outstanding,		0x115),
+	NV_CSPMU_EVENT_ATTR_4(socket, dl_outstanding,		0x119),
+	NV_CSPMU_EVENT_ATTR_4(socket, wb_outstanding,		0x11d),
+	NV_CSPMU_EVENT_ATTR_4(socket, wr_outstanding,		0x121),
+	NV_CSPMU_EVENT_ATTR_4(socket, ev_outstanding,		0x125),
+	NV_CSPMU_EVENT_ATTR_4(socket, prb_outstanding,		0x129),
+
+	NV_CSPMU_EVENT_ATTR_4(socket, rd_access,		0x12d),
+	NV_CSPMU_EVENT_ATTR_4(socket, dl_access,		0x131),
+	NV_CSPMU_EVENT_ATTR_4(socket, wb_access,		0x135),
+	NV_CSPMU_EVENT_ATTR_4(socket, wr_access,		0x139),
+	NV_CSPMU_EVENT_ATTR_4(socket, ev_access,		0x13d),
+	NV_CSPMU_EVENT_ATTR_4(socket, prb_access,		0x141),
+
+	NV_CSPMU_EVENT_ATTR_4(ocu, gmem_rd_data,		0x145),
+	NV_CSPMU_EVENT_ATTR_4(ocu, gmem_rd_access,		0x149),
+	NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wb_access,		0x14d),
+	NV_CSPMU_EVENT_ATTR_4(ocu, gmem_rd_outstanding,		0x151),
+	NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wr_outstanding,		0x155),
+
+	NV_CSPMU_EVENT_ATTR_4(ocu, rem_rd_data,			0x159),
+	NV_CSPMU_EVENT_ATTR_4(ocu, rem_rd_access,		0x15d),
+	NV_CSPMU_EVENT_ATTR_4(ocu, rem_wb_access,		0x161),
+	NV_CSPMU_EVENT_ATTR_4(ocu, rem_rd_outstanding,		0x165),
+	NV_CSPMU_EVENT_ATTR_4(ocu, rem_wr_outstanding,		0x169),
+
+	ARM_CSPMU_EVENT_ATTR(gmem_rd_data,			0x16d),
+	ARM_CSPMU_EVENT_ATTR(gmem_rd_access,			0x16e),
+	ARM_CSPMU_EVENT_ATTR(gmem_rd_outstanding,		0x16f),
+	ARM_CSPMU_EVENT_ATTR(gmem_dl_rsp,			0x170),
+	ARM_CSPMU_EVENT_ATTR(gmem_dl_access,			0x171),
+	ARM_CSPMU_EVENT_ATTR(gmem_dl_outstanding,		0x172),
+	ARM_CSPMU_EVENT_ATTR(gmem_wb_data,			0x173),
+	ARM_CSPMU_EVENT_ATTR(gmem_wb_access,			0x174),
+	ARM_CSPMU_EVENT_ATTR(gmem_wb_outstanding,		0x175),
+	ARM_CSPMU_EVENT_ATTR(gmem_ev_rsp,			0x176),
+	ARM_CSPMU_EVENT_ATTR(gmem_ev_access,			0x177),
+	ARM_CSPMU_EVENT_ATTR(gmem_ev_outstanding,		0x178),
+	ARM_CSPMU_EVENT_ATTR(gmem_wr_data,			0x179),
+	ARM_CSPMU_EVENT_ATTR(gmem_wr_outstanding,		0x17a),
+	ARM_CSPMU_EVENT_ATTR(gmem_wr_access,			0x17b),
+
+	NV_CSPMU_EVENT_ATTR_4(socket, wr_data,			0x17c),
+
+	NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wr_data,		0x180),
+	NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wb_data,		0x184),
+	NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wr_access,		0x188),
+	NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wb_outstanding,		0x18c),
+
+	NV_CSPMU_EVENT_ATTR_4(ocu, rem_wr_data,			0x190),
+	NV_CSPMU_EVENT_ATTR_4(ocu, rem_wb_data,			0x194),
+	NV_CSPMU_EVENT_ATTR_4(ocu, rem_wr_access,		0x198),
+	NV_CSPMU_EVENT_ATTR_4(ocu, rem_wb_outstanding,		0x19c),
+
+	ARM_CSPMU_EVENT_ATTR(gmem_wr_total_bytes,		0x1a0),
+	ARM_CSPMU_EVENT_ATTR(remote_socket_wr_total_bytes,	0x1a1),
+	ARM_CSPMU_EVENT_ATTR(remote_socket_rd_data,		0x1a2),
+	ARM_CSPMU_EVENT_ATTR(remote_socket_rd_outstanding,	0x1a3),
+	ARM_CSPMU_EVENT_ATTR(remote_socket_rd_access,		0x1a4),
+
+	ARM_CSPMU_EVENT_ATTR(cmem_rd_data,			0x1a5),
+	ARM_CSPMU_EVENT_ATTR(cmem_rd_access,			0x1a6),
+	ARM_CSPMU_EVENT_ATTR(cmem_rd_outstanding,		0x1a7),
+	ARM_CSPMU_EVENT_ATTR(cmem_dl_rsp,			0x1a8),
+	ARM_CSPMU_EVENT_ATTR(cmem_dl_access,			0x1a9),
+	ARM_CSPMU_EVENT_ATTR(cmem_dl_outstanding,		0x1aa),
+	ARM_CSPMU_EVENT_ATTR(cmem_wb_data,			0x1ab),
+	ARM_CSPMU_EVENT_ATTR(cmem_wb_access,			0x1ac),
+	ARM_CSPMU_EVENT_ATTR(cmem_wb_outstanding,		0x1ad),
+	ARM_CSPMU_EVENT_ATTR(cmem_ev_rsp,			0x1ae),
+	ARM_CSPMU_EVENT_ATTR(cmem_ev_access,			0x1af),
+	ARM_CSPMU_EVENT_ATTR(cmem_ev_outstanding,		0x1b0),
+	ARM_CSPMU_EVENT_ATTR(cmem_wr_data,			0x1b1),
+	ARM_CSPMU_EVENT_ATTR(cmem_wr_outstanding,		0x1b2),
+
+	NV_CSPMU_EVENT_ATTR_4(ocu, cmem_rd_data,		0x1b3),
+	NV_CSPMU_EVENT_ATTR_4(ocu, cmem_rd_access,		0x1b7),
+	NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wb_access,		0x1bb),
+	NV_CSPMU_EVENT_ATTR_4(ocu, cmem_rd_outstanding,		0x1bf),
+	NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wr_outstanding,		0x1c3),
+
+	ARM_CSPMU_EVENT_ATTR(ocu_prb_access,			0x1c7),
+	ARM_CSPMU_EVENT_ATTR(ocu_prb_data,			0x1c8),
+	ARM_CSPMU_EVENT_ATTR(ocu_prb_outstanding,		0x1c9),
+
+	ARM_CSPMU_EVENT_ATTR(cmem_wr_access,			0x1ca),
+
+	NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wr_access,		0x1cb),
+	NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wb_data,		0x1cf),
+	NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wr_data,		0x1d3),
+	NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wb_outstanding,		0x1d7),
+
+	ARM_CSPMU_EVENT_ATTR(cmem_wr_total_bytes,		0x1db),
+
+	ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
+	NULL,
+};
+
+static struct attribute *mcf_pmu_event_attrs[] = {
+	ARM_CSPMU_EVENT_ATTR(rd_bytes_loc,			0x0),
+	ARM_CSPMU_EVENT_ATTR(rd_bytes_rem,			0x1),
+	ARM_CSPMU_EVENT_ATTR(wr_bytes_loc,			0x2),
+	ARM_CSPMU_EVENT_ATTR(wr_bytes_rem,			0x3),
+	ARM_CSPMU_EVENT_ATTR(total_bytes_loc,			0x4),
+	ARM_CSPMU_EVENT_ATTR(total_bytes_rem,			0x5),
+	ARM_CSPMU_EVENT_ATTR(rd_req_loc,			0x6),
+	ARM_CSPMU_EVENT_ATTR(rd_req_rem,			0x7),
+	ARM_CSPMU_EVENT_ATTR(wr_req_loc,			0x8),
+	ARM_CSPMU_EVENT_ATTR(wr_req_rem,			0x9),
+	ARM_CSPMU_EVENT_ATTR(total_req_loc,			0xa),
+	ARM_CSPMU_EVENT_ATTR(total_req_rem,			0xb),
+	ARM_CSPMU_EVENT_ATTR(rd_cum_outs_loc,			0xc),
+	ARM_CSPMU_EVENT_ATTR(rd_cum_outs_rem,			0xd),
+	ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
+	NULL,
+};
+
+static struct attribute *generic_pmu_event_attrs[] = {
+	ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
+	NULL,
+};
+
+static struct attribute *scf_pmu_format_attrs[] = {
+	ARM_CSPMU_FORMAT_EVENT_ATTR,
+	NULL,
+};
+
+static struct attribute *pcie_pmu_format_attrs[] = {
+	ARM_CSPMU_FORMAT_EVENT_ATTR,
+	ARM_CSPMU_FORMAT_ATTR(root_port, "config1:0-9"),
+	NULL,
+};
+
+static struct attribute *nvlink_c2c_pmu_format_attrs[] = {
+	ARM_CSPMU_FORMAT_EVENT_ATTR,
+	NULL,
+};
+
+static struct attribute *cnvlink_pmu_format_attrs[] = {
+	ARM_CSPMU_FORMAT_EVENT_ATTR,
+	ARM_CSPMU_FORMAT_ATTR(rem_socket, "config1:0-3"),
+	NULL,
+};
+
+static struct attribute *generic_pmu_format_attrs[] = {
+	ARM_CSPMU_FORMAT_EVENT_ATTR,
+	ARM_CSPMU_FORMAT_FILTER_ATTR,
+	NULL,
+};
+
+static struct attribute **
+nv_cspmu_get_event_attrs(const struct arm_cspmu *cspmu)
+{
+	const struct nv_cspmu_ctx *ctx = to_nv_cspmu_ctx(cspmu);
+
+	return ctx->event_attr;
+}
+
+static struct attribute **
+nv_cspmu_get_format_attrs(const struct arm_cspmu *cspmu)
+{
+	const struct nv_cspmu_ctx *ctx = to_nv_cspmu_ctx(cspmu);
+
+	return ctx->format_attr;
+}
+
+static const char *
+nv_cspmu_get_name(const struct arm_cspmu *cspmu)
+{
+	const struct nv_cspmu_ctx *ctx = to_nv_cspmu_ctx(cspmu);
+
+	return ctx->name;
+}
+
+static u32 nv_cspmu_event_filter(const struct perf_event *event)
+{
+	const struct nv_cspmu_ctx *ctx =
+		to_nv_cspmu_ctx(to_arm_cspmu(event->pmu));
+
+	if (ctx->filter_mask == 0)
+		return ctx->filter_default_val;
+
+	return event->attr.config1 & ctx->filter_mask;
+}
+
+enum nv_cspmu_name_fmt {
+	NAME_FMT_GENERIC,
+	NAME_FMT_SOCKET
+};
+
+struct nv_cspmu_match {
+	u32 prodid;
+	u32 prodid_mask;
+	u64 filter_mask;
+	u32 filter_default_val;
+	const char *name_pattern;
+	enum nv_cspmu_name_fmt name_fmt;
+	struct attribute **event_attr;
+	struct attribute **format_attr;
+};
+
+static const struct nv_cspmu_match nv_cspmu_match[] = {
+	{
+	  .prodid = 0x103,
+	  .prodid_mask = NV_PRODID_MASK,
+	  .filter_mask = NV_PCIE_FILTER_ID_MASK,
+	  .filter_default_val = NV_PCIE_FILTER_ID_MASK,
+	  .name_pattern = "nvidia_pcie_pmu_%u",
+	  .name_fmt = NAME_FMT_SOCKET,
+	  .event_attr = mcf_pmu_event_attrs,
+	  .format_attr = pcie_pmu_format_attrs
+	},
+	{
+	  .prodid = 0x104,
+	  .prodid_mask = NV_PRODID_MASK,
+	  .filter_mask = 0x0,
+	  .filter_default_val = NV_NVL_C2C_FILTER_ID_MASK,
+	  .name_pattern = "nvidia_nvlink_c2c1_pmu_%u",
+	  .name_fmt = NAME_FMT_SOCKET,
+	  .event_attr = mcf_pmu_event_attrs,
+	  .format_attr = nvlink_c2c_pmu_format_attrs
+	},
+	{
+	  .prodid = 0x105,
+	  .prodid_mask = NV_PRODID_MASK,
+	  .filter_mask = 0x0,
+	  .filter_default_val = NV_NVL_C2C_FILTER_ID_MASK,
+	  .name_pattern = "nvidia_nvlink_c2c0_pmu_%u",
+	  .name_fmt = NAME_FMT_SOCKET,
+	  .event_attr = mcf_pmu_event_attrs,
+	  .format_attr = nvlink_c2c_pmu_format_attrs
+	},
+	{
+	  .prodid = 0x106,
+	  .prodid_mask = NV_PRODID_MASK,
+	  .filter_mask = NV_CNVL_FILTER_ID_MASK,
+	  .filter_default_val = NV_CNVL_FILTER_ID_MASK,
+	  .name_pattern = "nvidia_cnvlink_pmu_%u",
+	  .name_fmt = NAME_FMT_SOCKET,
+	  .event_attr = mcf_pmu_event_attrs,
+	  .format_attr = cnvlink_pmu_format_attrs
+	},
+	{
+	  .prodid = 0x2CF,
+	  .prodid_mask = NV_PRODID_MASK,
+	  .filter_mask = 0x0,
+	  .filter_default_val = 0x0,
+	  .name_pattern = "nvidia_scf_pmu_%u",
+	  .name_fmt = NAME_FMT_SOCKET,
+	  .event_attr = scf_pmu_event_attrs,
+	  .format_attr = scf_pmu_format_attrs
+	},
+	{
+	  .prodid = 0,
+	  .prodid_mask = 0,
+	  .filter_mask = NV_GENERIC_FILTER_ID_MASK,
+	  .filter_default_val = NV_GENERIC_FILTER_ID_MASK,
+	  .name_pattern = "nvidia_uncore_pmu_%u",
+	  .name_fmt = NAME_FMT_GENERIC,
+	  .event_attr = generic_pmu_event_attrs,
+	  .format_attr = generic_pmu_format_attrs
+	},
+};
+
+static char *nv_cspmu_format_name(const struct arm_cspmu *cspmu,
+				  const struct nv_cspmu_match *match)
+{
+	char *name;
+	struct device *dev = cspmu->dev;
+
+	static atomic_t pmu_generic_idx = {0};
+
+	switch (match->name_fmt) {
+	case NAME_FMT_SOCKET: {
+		const int cpu = cpumask_first(&cspmu->associated_cpus);
+		const int socket = cpu_to_node(cpu);
+
+		name = devm_kasprintf(dev, GFP_KERNEL, match->name_pattern,
+				       socket);
+		break;
+	}
+	case NAME_FMT_GENERIC:
+		name = devm_kasprintf(dev, GFP_KERNEL, match->name_pattern,
+				       atomic_fetch_inc(&pmu_generic_idx));
+		break;
+	default:
+		name = NULL;
+		break;
+	}
+
+	return name;
+}
+
+int nv_cspmu_init_ops(struct arm_cspmu *cspmu)
+{
+	u32 prodid;
+	struct nv_cspmu_ctx *ctx;
+	struct device *dev = cspmu->dev;
+	struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
+	const struct nv_cspmu_match *match = nv_cspmu_match;
+
+	ctx = devm_kzalloc(dev, sizeof(struct nv_cspmu_ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	prodid = FIELD_GET(ARM_CSPMU_PMIIDR_PRODUCTID, cspmu->impl.pmiidr);
+
+	/* Find matching PMU. */
+	for (; match->prodid; match++) {
+		const u32 prodid_mask = match->prodid_mask;
+
+		if ((match->prodid & prodid_mask) == (prodid & prodid_mask))
+			break;
+	}
+
+	ctx->name		= nv_cspmu_format_name(cspmu, match);
+	ctx->filter_mask	= match->filter_mask;
+	ctx->filter_default_val = match->filter_default_val;
+	ctx->event_attr		= match->event_attr;
+	ctx->format_attr	= match->format_attr;
+
+	cspmu->impl.ctx = ctx;
+
+	/* NVIDIA specific callbacks. */
+	impl_ops->event_filter			= nv_cspmu_event_filter;
+	impl_ops->get_event_attrs		= nv_cspmu_get_event_attrs;
+	impl_ops->get_format_attrs		= nv_cspmu_get_format_attrs;
+	impl_ops->get_name			= nv_cspmu_get_name;
+
+	/* Set others to NULL to use default callback. */
+	impl_ops->event_type			= NULL;
+	impl_ops->event_attr_is_visible		= NULL;
+	impl_ops->get_identifier		= NULL;
+	impl_ops->is_cycle_counter_event	= NULL;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nv_cspmu_init_ops);
diff --git a/drivers/perf/arm_cspmu/nvidia_cspmu.h b/drivers/perf/arm_cspmu/nvidia_cspmu.h
new file mode 100644
index 000000000000..71e18f0dc50b
--- /dev/null
+++ b/drivers/perf/arm_cspmu/nvidia_cspmu.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ *
+ */
+
+/* Support for NVIDIA specific attributes. */
+
+#ifndef __NVIDIA_CSPMU_H__
+#define __NVIDIA_CSPMU_H__
+
+#include "arm_cspmu.h"
+
+/* Allocate NVIDIA descriptor. */
+int nv_cspmu_init_ops(struct arm_cspmu *cspmu);
+
+#endif /* __NVIDIA_CSPMU_H__ */
-- 
cgit v1.2.3


From 1830902eb896824ca313a50f3486645c2df21327 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Tue, 15 Nov 2022 18:24:03 +0000
Subject: perf: arm_cspmu: Fix modular builds due to missing MODULE_LICENSE()s

Building an arm64 allmodconfig target results in the following failure
from modpost:

  | ERROR: modpost: missing MODULE_LICENSE() in drivers/perf/arm_cspmu/arm_cspmu.o
  | ERROR: modpost: missing MODULE_LICENSE() in drivers/perf/arm_cspmu/nvidia_cspmu.o
  | make[1]: *** [scripts/Makefile.modpost:126: Module.symvers] Error 1
  | make: *** [Makefile:1944: modpost] Error 2

Add the missing MODULE_LICENSE() macros, following the license of the
source files and symbol exports.

Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm_cspmu/arm_cspmu.c    | 2 ++
 drivers/perf/arm_cspmu/nvidia_cspmu.c | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'drivers')

diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c
index e851eeb33f4a..e31302ab7e37 100644
--- a/drivers/perf/arm_cspmu/arm_cspmu.c
+++ b/drivers/perf/arm_cspmu/arm_cspmu.c
@@ -1299,3 +1299,5 @@ static void __exit arm_cspmu_exit(void)
 
 module_init(arm_cspmu_init);
 module_exit(arm_cspmu_exit);
+
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/arm_cspmu/nvidia_cspmu.c b/drivers/perf/arm_cspmu/nvidia_cspmu.c
index c795414ec7c7..72ef80caa3c8 100644
--- a/drivers/perf/arm_cspmu/nvidia_cspmu.c
+++ b/drivers/perf/arm_cspmu/nvidia_cspmu.c
@@ -396,3 +396,5 @@ int nv_cspmu_init_ops(struct arm_cspmu *cspmu)
 	return 0;
 }
 EXPORT_SYMBOL_GPL(nv_cspmu_init_ops);
+
+MODULE_LICENSE("GPL v2");
-- 
cgit v1.2.3


From e72dbf9085b56bbc19ff332f82adec1891077637 Mon Sep 17 00:00:00 2001
From: Besar Wicaksono <bwicaksono@nvidia.com>
Date: Wed, 16 Nov 2022 13:04:55 -0600
Subject: perf: arm_cspmu: Fix build failure on x86_64

Building on x86_64 allmodconfig failed:
  | drivers/perf/arm_cspmu/arm_cspmu.c:1114:29: error: implicit
  |    declaration of function 'get_acpi_id_for_cpu'

get_acpi_id_for_cpu is a helper function from ARM64.
Fix by adding ARM64 dependency.

Signed-off-by: Besar Wicaksono <bwicaksono@nvidia.com>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Link: https://lore.kernel.org/r/20221116190455.55651-1-bwicaksono@nvidia.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm_cspmu/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/perf/arm_cspmu/Kconfig b/drivers/perf/arm_cspmu/Kconfig
index 058223bef661..0b316fe69a45 100644
--- a/drivers/perf/arm_cspmu/Kconfig
+++ b/drivers/perf/arm_cspmu/Kconfig
@@ -4,7 +4,7 @@
 
 config ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
 	tristate "ARM Coresight Architecture PMU"
-	depends on ACPI
+	depends on ARM64 && ACPI
 	depends on ACPI_APMT || COMPILE_TEST
 	help
 	  Provides support for performance monitoring unit (PMU) devices
-- 
cgit v1.2.3


From a91bbd5c9984a2b15e68aad7a79c2809fbd10fbe Mon Sep 17 00:00:00 2001
From: Besar Wicaksono <bwicaksono@nvidia.com>
Date: Wed, 16 Nov 2022 14:39:52 -0600
Subject: perf: arm_cspmu: Fix module cyclic dependency

Build on arm64 allmodconfig failed with:
  | depmod: ERROR: Cycle detected: arm_cspmu -> nvidia_cspmu -> arm_cspmu
  | depmod: ERROR: Found 2 modules in dependency cycles!

The arm_cspmu.c provides standard functions to operate the PMU and the
vendor code provides vendor specific attributes. Both need to be built as
single kernel module.

Update the makefile to compile sources under arm_cspmu into one module.

Signed-off-by: Besar Wicaksono <bwicaksono@nvidia.com>
Reviewed-and-Tested-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Link: https://lore.kernel.org/r/20221116203952.34168-1-bwicaksono@nvidia.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm_cspmu/Makefile | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/perf/arm_cspmu/Makefile b/drivers/perf/arm_cspmu/Makefile
index 641db85c018b..fedb17df982d 100644
--- a/drivers/perf/arm_cspmu/Makefile
+++ b/drivers/perf/arm_cspmu/Makefile
@@ -2,6 +2,5 @@
 #
 # SPDX-License-Identifier: GPL-2.0
 
-obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += \
-	arm_cspmu.o \
-	nvidia_cspmu.o
+obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += arm_cspmu_module.o
+arm_cspmu_module-y := arm_cspmu.o nvidia_cspmu.o
-- 
cgit v1.2.3


From 2016e2113d35ba06866961a39e9a9c822f2ffabd Mon Sep 17 00:00:00 2001
From: Jiucheng Xu <jiucheng.xu@amlogic.com>
Date: Mon, 21 Nov 2022 10:15:58 +0800
Subject: perf/amlogic: Add support for Amlogic meson G12 SoC DDR PMU driver

Add support for Amlogic Meson G12 Series SOC - DDR bandwidth PMU driver
framework and interfaces. The PMU can not only monitor the total DDR
bandwidth, but also individual IP module bandwidth.

Signed-off-by: Jiucheng Xu <jiucheng.xu@amlogic.com>
Tested-by: Chris Healy <healych@amazon.com>
Link: https://lore.kernel.org/r/20221121021602.3306998-1-jiucheng.xu@amlogic.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 MAINTAINERS                               |   8 +
 drivers/perf/Kconfig                      |   2 +
 drivers/perf/Makefile                     |   1 +
 drivers/perf/amlogic/Kconfig              |  10 +
 drivers/perf/amlogic/Makefile             |   5 +
 drivers/perf/amlogic/meson_ddr_pmu_core.c | 562 ++++++++++++++++++++++++++++++
 drivers/perf/amlogic/meson_g12_ddr_pmu.c  | 394 +++++++++++++++++++++
 include/soc/amlogic/meson_ddr_pmu.h       |  66 ++++
 8 files changed, 1048 insertions(+)
 create mode 100644 drivers/perf/amlogic/Kconfig
 create mode 100644 drivers/perf/amlogic/Makefile
 create mode 100644 drivers/perf/amlogic/meson_ddr_pmu_core.c
 create mode 100644 drivers/perf/amlogic/meson_g12_ddr_pmu.c
 create mode 100644 include/soc/amlogic/meson_ddr_pmu.h

(limited to 'drivers')

diff --git a/MAINTAINERS b/MAINTAINERS
index 35ce60f6b8ee..86d090ce3525 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1093,6 +1093,14 @@ S:	Maintained
 F:	Documentation/hid/amd-sfh*
 F:	drivers/hid/amd-sfh-hid/
 
+AMLOGIC DDR PMU DRIVER
+M:	Jiucheng Xu <jiucheng.xu@amlogic.com>
+L:	linux-amlogic@lists.infradead.org
+S:	Supported
+W:	http://www.amlogic.com
+F:	drivers/perf/amlogic/
+F:	include/soc/amlogic/
+
 AMPHION VPU CODEC V4L2 DRIVER
 M:	Ming Qian <ming.qian@nxp.com>
 M:	Shijie Qin <shijie.qin@nxp.com>
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index fa87dedf8c92..77043bcdb33c 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -201,4 +201,6 @@ config MARVELL_CN10K_DDR_PMU
 
 source "drivers/perf/arm_cspmu/Kconfig"
 
+source "drivers/perf/amlogic/Kconfig"
+
 endmenu
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index 35bb0d979a70..13e45da61100 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -22,3 +22,4 @@ obj-$(CONFIG_MARVELL_CN10K_DDR_PMU) += marvell_cn10k_ddr_pmu.o
 obj-$(CONFIG_APPLE_M1_CPU_PMU) += apple_m1_cpu_pmu.o
 obj-$(CONFIG_ALIBABA_UNCORE_DRW_PMU) += alibaba_uncore_drw_pmu.o
 obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += arm_cspmu/
+obj-$(CONFIG_MESON_DDR_PMU) += amlogic/
diff --git a/drivers/perf/amlogic/Kconfig b/drivers/perf/amlogic/Kconfig
new file mode 100644
index 000000000000..f68db01a7f17
--- /dev/null
+++ b/drivers/perf/amlogic/Kconfig
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config MESON_DDR_PMU
+	tristate "Amlogic DDR Bandwidth Performance Monitor"
+	depends on ARCH_MESON || COMPILE_TEST
+	help
+          Provides support for the DDR performance monitor
+          in Amlogic SoCs, which can give information about
+          memory throughput and other related events. It
+          supports multiple channels to monitor the memory
+          bandwidth simultaneously.
diff --git a/drivers/perf/amlogic/Makefile b/drivers/perf/amlogic/Makefile
new file mode 100644
index 000000000000..d3ab2ac5353b
--- /dev/null
+++ b/drivers/perf/amlogic/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+obj-$(CONFIG_MESON_DDR_PMU) += meson_ddr_pmu_g12.o
+
+meson_ddr_pmu_g12-y	:= meson_ddr_pmu_core.o meson_g12_ddr_pmu.o
diff --git a/drivers/perf/amlogic/meson_ddr_pmu_core.c b/drivers/perf/amlogic/meson_ddr_pmu_core.c
new file mode 100644
index 000000000000..0ff7c0449ac2
--- /dev/null
+++ b/drivers/perf/amlogic/meson_ddr_pmu_core.c
@@ -0,0 +1,562 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2022 Amlogic, Inc. All rights reserved.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/init.h>
+#include <linux/irqreturn.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_irq.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/printk.h>
+#include <linux/sysfs.h>
+#include <linux/types.h>
+#include <linux/version.h>
+
+#include <soc/amlogic/meson_ddr_pmu.h>
+
+struct ddr_pmu {
+	struct pmu pmu;
+	struct dmc_info info;
+	struct dmc_counter counters;	/* save counters from hw */
+	bool pmu_enabled;
+	struct device *dev;
+	char *name;
+	struct hlist_node node;
+	enum cpuhp_state cpuhp_state;
+	int cpu;			/* for cpu hotplug */
+};
+
+#define DDR_PERF_DEV_NAME "meson_ddr_bw"
+#define MAX_AXI_PORTS_OF_CHANNEL	4	/* A DMC channel can monitor max 4 axi ports */
+
+#define to_ddr_pmu(p)		container_of(p, struct ddr_pmu, pmu)
+#define dmc_info_to_pmu(p)	container_of(p, struct ddr_pmu, info)
+
+static void dmc_pmu_enable(struct ddr_pmu *pmu)
+{
+	if (!pmu->pmu_enabled)
+		pmu->info.hw_info->enable(&pmu->info);
+
+	pmu->pmu_enabled = true;
+}
+
+static void dmc_pmu_disable(struct ddr_pmu *pmu)
+{
+	if (pmu->pmu_enabled)
+		pmu->info.hw_info->disable(&pmu->info);
+
+	pmu->pmu_enabled = false;
+}
+
+static void meson_ddr_set_axi_filter(struct perf_event *event, u8 axi_id)
+{
+	struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+	int chann;
+
+	if (event->attr.config > ALL_CHAN_COUNTER_ID &&
+	    event->attr.config < COUNTER_MAX_ID) {
+		chann = event->attr.config - CHAN1_COUNTER_ID;
+
+		pmu->info.hw_info->set_axi_filter(&pmu->info, axi_id, chann);
+	}
+}
+
+static void ddr_cnt_addition(struct dmc_counter *sum,
+			     struct dmc_counter *add1,
+			     struct dmc_counter *add2,
+			     int chann_nr)
+{
+	int i;
+	u64 cnt1, cnt2;
+
+	sum->all_cnt = add1->all_cnt + add2->all_cnt;
+	sum->all_req = add1->all_req + add2->all_req;
+	for (i = 0; i < chann_nr; i++) {
+		cnt1 = add1->channel_cnt[i];
+		cnt2 = add2->channel_cnt[i];
+
+		sum->channel_cnt[i] = cnt1 + cnt2;
+	}
+}
+
+static void meson_ddr_perf_event_update(struct perf_event *event)
+{
+	struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+	u64 new_raw_count = 0;
+	struct dmc_counter dc = {0}, sum_dc = {0};
+	int idx;
+	int chann_nr = pmu->info.hw_info->chann_nr;
+
+	/* get the remain counters in register. */
+	pmu->info.hw_info->get_counters(&pmu->info, &dc);
+
+	ddr_cnt_addition(&sum_dc, &pmu->counters, &dc, chann_nr);
+
+	switch (event->attr.config) {
+	case ALL_CHAN_COUNTER_ID:
+		new_raw_count = sum_dc.all_cnt;
+		break;
+	case CHAN1_COUNTER_ID:
+	case CHAN2_COUNTER_ID:
+	case CHAN3_COUNTER_ID:
+	case CHAN4_COUNTER_ID:
+	case CHAN5_COUNTER_ID:
+	case CHAN6_COUNTER_ID:
+	case CHAN7_COUNTER_ID:
+	case CHAN8_COUNTER_ID:
+		idx = event->attr.config - CHAN1_COUNTER_ID;
+		new_raw_count = sum_dc.channel_cnt[idx];
+		break;
+	}
+
+	local64_set(&event->count, new_raw_count);
+}
+
+static int meson_ddr_perf_event_init(struct perf_event *event)
+{
+	struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+	u64 config1 = event->attr.config1;
+	u64 config2 = event->attr.config2;
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+		return -EOPNOTSUPP;
+
+	if (event->cpu < 0)
+		return -EOPNOTSUPP;
+
+	/* check if the number of parameters is too much */
+	if (event->attr.config != ALL_CHAN_COUNTER_ID &&
+	    hweight64(config1) + hweight64(config2) > MAX_AXI_PORTS_OF_CHANNEL)
+		return -EOPNOTSUPP;
+
+	event->cpu = pmu->cpu;
+
+	return 0;
+}
+
+static void meson_ddr_perf_event_start(struct perf_event *event, int flags)
+{
+	struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+
+	memset(&pmu->counters, 0, sizeof(pmu->counters));
+	dmc_pmu_enable(pmu);
+}
+
+static int meson_ddr_perf_event_add(struct perf_event *event, int flags)
+{
+	u64 config1 = event->attr.config1;
+	u64 config2 = event->attr.config2;
+	int i;
+
+	for_each_set_bit(i, (const unsigned long *)&config1, sizeof(config1))
+		meson_ddr_set_axi_filter(event, i);
+
+	for_each_set_bit(i, (const unsigned long *)&config2, sizeof(config2))
+		meson_ddr_set_axi_filter(event, i + 64);
+
+	if (flags & PERF_EF_START)
+		meson_ddr_perf_event_start(event, flags);
+
+	return 0;
+}
+
+static void meson_ddr_perf_event_stop(struct perf_event *event, int flags)
+{
+	struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+
+	if (flags & PERF_EF_UPDATE)
+		meson_ddr_perf_event_update(event);
+
+	dmc_pmu_disable(pmu);
+}
+
+static void meson_ddr_perf_event_del(struct perf_event *event, int flags)
+{
+	meson_ddr_perf_event_stop(event, PERF_EF_UPDATE);
+}
+
+static ssize_t meson_ddr_perf_cpumask_show(struct device *dev,
+					   struct device_attribute *attr,
+					   char *buf)
+{
+	struct ddr_pmu *pmu = dev_get_drvdata(dev);
+
+	return cpumap_print_to_pagebuf(true, buf, cpumask_of(pmu->cpu));
+}
+
+static struct device_attribute meson_ddr_perf_cpumask_attr =
+__ATTR(cpumask, 0444, meson_ddr_perf_cpumask_show, NULL);
+
+static struct attribute *meson_ddr_perf_cpumask_attrs[] = {
+	&meson_ddr_perf_cpumask_attr.attr,
+	NULL,
+};
+
+static const struct attribute_group ddr_perf_cpumask_attr_group = {
+	.attrs = meson_ddr_perf_cpumask_attrs,
+};
+
+static ssize_t
+pmu_event_show(struct device *dev, struct device_attribute *attr,
+	       char *page)
+{
+	struct perf_pmu_events_attr *pmu_attr;
+
+	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+	return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id);
+}
+
+static ssize_t
+event_show_unit(struct device *dev, struct device_attribute *attr,
+		char *page)
+{
+	return sysfs_emit(page, "MB\n");
+}
+
+static ssize_t
+event_show_scale(struct device *dev, struct device_attribute *attr,
+		 char *page)
+{
+	/* one count = 16byte = 1.52587890625e-05 MB */
+	return sysfs_emit(page, "1.52587890625e-05\n");
+}
+
+#define AML_DDR_PMU_EVENT_ATTR(_name, _id)				\
+{									\
+	.attr = __ATTR(_name, 0444, pmu_event_show, NULL),		\
+	.id = _id,							\
+}
+
+#define AML_DDR_PMU_EVENT_UNIT_ATTR(_name)				\
+	__ATTR(_name.unit, 0444, event_show_unit, NULL)
+
+#define AML_DDR_PMU_EVENT_SCALE_ATTR(_name)				\
+	__ATTR(_name.scale, 0444, event_show_scale, NULL)
+
+static struct device_attribute event_unit_attrs[] = {
+	AML_DDR_PMU_EVENT_UNIT_ATTR(total_rw_bytes),
+	AML_DDR_PMU_EVENT_UNIT_ATTR(chan_1_rw_bytes),
+	AML_DDR_PMU_EVENT_UNIT_ATTR(chan_2_rw_bytes),
+	AML_DDR_PMU_EVENT_UNIT_ATTR(chan_3_rw_bytes),
+	AML_DDR_PMU_EVENT_UNIT_ATTR(chan_4_rw_bytes),
+	AML_DDR_PMU_EVENT_UNIT_ATTR(chan_5_rw_bytes),
+	AML_DDR_PMU_EVENT_UNIT_ATTR(chan_6_rw_bytes),
+	AML_DDR_PMU_EVENT_UNIT_ATTR(chan_7_rw_bytes),
+	AML_DDR_PMU_EVENT_UNIT_ATTR(chan_8_rw_bytes),
+};
+
+static struct device_attribute event_scale_attrs[] = {
+	AML_DDR_PMU_EVENT_SCALE_ATTR(total_rw_bytes),
+	AML_DDR_PMU_EVENT_SCALE_ATTR(chan_1_rw_bytes),
+	AML_DDR_PMU_EVENT_SCALE_ATTR(chan_2_rw_bytes),
+	AML_DDR_PMU_EVENT_SCALE_ATTR(chan_3_rw_bytes),
+	AML_DDR_PMU_EVENT_SCALE_ATTR(chan_4_rw_bytes),
+	AML_DDR_PMU_EVENT_SCALE_ATTR(chan_5_rw_bytes),
+	AML_DDR_PMU_EVENT_SCALE_ATTR(chan_6_rw_bytes),
+	AML_DDR_PMU_EVENT_SCALE_ATTR(chan_7_rw_bytes),
+	AML_DDR_PMU_EVENT_SCALE_ATTR(chan_8_rw_bytes),
+};
+
+static struct perf_pmu_events_attr event_attrs[] = {
+	AML_DDR_PMU_EVENT_ATTR(total_rw_bytes, ALL_CHAN_COUNTER_ID),
+	AML_DDR_PMU_EVENT_ATTR(chan_1_rw_bytes, CHAN1_COUNTER_ID),
+	AML_DDR_PMU_EVENT_ATTR(chan_2_rw_bytes, CHAN2_COUNTER_ID),
+	AML_DDR_PMU_EVENT_ATTR(chan_3_rw_bytes, CHAN3_COUNTER_ID),
+	AML_DDR_PMU_EVENT_ATTR(chan_4_rw_bytes, CHAN4_COUNTER_ID),
+	AML_DDR_PMU_EVENT_ATTR(chan_5_rw_bytes, CHAN5_COUNTER_ID),
+	AML_DDR_PMU_EVENT_ATTR(chan_6_rw_bytes, CHAN6_COUNTER_ID),
+	AML_DDR_PMU_EVENT_ATTR(chan_7_rw_bytes, CHAN7_COUNTER_ID),
+	AML_DDR_PMU_EVENT_ATTR(chan_8_rw_bytes, CHAN8_COUNTER_ID),
+};
+
+/* three attrs are combined an event */
+static struct attribute *ddr_perf_events_attrs[COUNTER_MAX_ID * 3];
+
+static struct attribute_group ddr_perf_events_attr_group = {
+	.name = "events",
+	.attrs = ddr_perf_events_attrs,
+};
+
+static umode_t meson_ddr_perf_format_attr_visible(struct kobject *kobj,
+						  struct attribute *attr,
+						  int n)
+{
+	struct pmu *pmu = dev_get_drvdata(kobj_to_dev(kobj));
+	struct ddr_pmu *ddr_pmu = to_ddr_pmu(pmu);
+	const u64 *capability = ddr_pmu->info.hw_info->capability;
+	struct device_attribute *dev_attr;
+	int id;
+	char value[20]; // config1:xxx, 20 is enough
+
+	dev_attr = container_of(attr, struct device_attribute, attr);
+	dev_attr->show(NULL, NULL, value);
+
+	if (sscanf(value, "config1:%d", &id) == 1)
+		return capability[0] & (1ULL << id) ? attr->mode : 0;
+
+	if (sscanf(value, "config2:%d", &id) == 1)
+		return capability[1] & (1ULL << id) ? attr->mode : 0;
+
+	return attr->mode;
+}
+
+static struct attribute_group ddr_perf_format_attr_group = {
+	.name = "format",
+	.is_visible = meson_ddr_perf_format_attr_visible,
+};
+
+static ssize_t meson_ddr_perf_identifier_show(struct device *dev,
+					      struct device_attribute *attr,
+					      char *page)
+{
+	struct ddr_pmu *pmu = dev_get_drvdata(dev);
+
+	return sysfs_emit(page, "%s\n", pmu->name);
+}
+
+static struct device_attribute meson_ddr_perf_identifier_attr =
+__ATTR(identifier, 0444, meson_ddr_perf_identifier_show, NULL);
+
+static struct attribute *meson_ddr_perf_identifier_attrs[] = {
+	&meson_ddr_perf_identifier_attr.attr,
+	NULL,
+};
+
+static const struct attribute_group ddr_perf_identifier_attr_group = {
+	.attrs = meson_ddr_perf_identifier_attrs,
+};
+
+static const struct attribute_group *attr_groups[] = {
+	&ddr_perf_events_attr_group,
+	&ddr_perf_format_attr_group,
+	&ddr_perf_cpumask_attr_group,
+	&ddr_perf_identifier_attr_group,
+	NULL,
+};
+
+static irqreturn_t dmc_irq_handler(int irq, void *dev_id)
+{
+	struct dmc_info *info = dev_id;
+	struct ddr_pmu *pmu;
+	struct dmc_counter counters, *sum_cnter;
+	int i;
+
+	pmu = dmc_info_to_pmu(info);
+
+	if (info->hw_info->irq_handler(info, &counters) != 0)
+		goto out;
+
+	sum_cnter = &pmu->counters;
+	sum_cnter->all_cnt += counters.all_cnt;
+	sum_cnter->all_req += counters.all_req;
+
+	for (i = 0; i < pmu->info.hw_info->chann_nr; i++)
+		sum_cnter->channel_cnt[i] += counters.channel_cnt[i];
+
+	if (pmu->pmu_enabled)
+		/*
+		 * the timer interrupt only supprt
+		 * one shot mode, we have to re-enable
+		 * it in ISR to support continue mode.
+		 */
+		info->hw_info->enable(info);
+
+	dev_dbg(pmu->dev, "counts: %llu %llu %llu, %llu, %llu, %llu\t\t"
+			"sum: %llu %llu %llu, %llu, %llu, %llu\n",
+			counters.all_req,
+			counters.all_cnt,
+			counters.channel_cnt[0],
+			counters.channel_cnt[1],
+			counters.channel_cnt[2],
+			counters.channel_cnt[3],
+
+			pmu->counters.all_req,
+			pmu->counters.all_cnt,
+			pmu->counters.channel_cnt[0],
+			pmu->counters.channel_cnt[1],
+			pmu->counters.channel_cnt[2],
+			pmu->counters.channel_cnt[3]);
+out:
+	return IRQ_HANDLED;
+}
+
+static int ddr_perf_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct ddr_pmu *pmu = hlist_entry_safe(node, struct ddr_pmu, node);
+	int target;
+
+	if (cpu != pmu->cpu)
+		return 0;
+
+	target = cpumask_any_but(cpu_online_mask, cpu);
+	if (target >= nr_cpu_ids)
+		return 0;
+
+	perf_pmu_migrate_context(&pmu->pmu, cpu, target);
+	pmu->cpu = target;
+
+	WARN_ON(irq_set_affinity(pmu->info.irq_num, cpumask_of(pmu->cpu)));
+
+	return 0;
+}
+
+static void fill_event_attr(struct ddr_pmu *pmu)
+{
+	int i, j, k;
+	struct attribute **dst = ddr_perf_events_attrs;
+
+	j = 0;
+	k = 0;
+
+	/* fill ALL_CHAN_COUNTER_ID event */
+	dst[j++] = &event_attrs[k].attr.attr;
+	dst[j++] = &event_unit_attrs[k].attr;
+	dst[j++] = &event_scale_attrs[k].attr;
+
+	k++;
+
+	/* fill each channel event */
+	for (i = 0; i < pmu->info.hw_info->chann_nr; i++, k++) {
+		dst[j++] = &event_attrs[k].attr.attr;
+		dst[j++] = &event_unit_attrs[k].attr;
+		dst[j++] = &event_scale_attrs[k].attr;
+	}
+
+	dst[j] = NULL; /* mark end */
+}
+
+static void fmt_attr_fill(struct attribute **fmt_attr)
+{
+	ddr_perf_format_attr_group.attrs = fmt_attr;
+}
+
+static int ddr_pmu_parse_dt(struct platform_device *pdev,
+			    struct dmc_info *info)
+{
+	void __iomem *base;
+	int i, ret;
+
+	info->hw_info = of_device_get_match_data(&pdev->dev);
+
+	for (i = 0; i < info->hw_info->dmc_nr; i++) {
+		/* resource 0 for ddr register base */
+		base = devm_platform_ioremap_resource(pdev, i);
+		if (IS_ERR(base))
+			return PTR_ERR(base);
+
+		info->ddr_reg[i] = base;
+	}
+
+	/* resource i for pll register base */
+	base = devm_platform_ioremap_resource(pdev, i);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	info->pll_reg = base;
+
+	ret = platform_get_irq(pdev, 0);
+	if (ret < 0)
+		return ret;
+
+	info->irq_num = ret;
+
+	ret = devm_request_irq(&pdev->dev, info->irq_num, dmc_irq_handler,
+			       IRQF_NOBALANCING, dev_name(&pdev->dev),
+			       (void *)info);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+int meson_ddr_pmu_create(struct platform_device *pdev)
+{
+	int ret;
+	char *name;
+	struct ddr_pmu *pmu;
+
+	pmu = devm_kzalloc(&pdev->dev, sizeof(struct ddr_pmu), GFP_KERNEL);
+	if (!pmu)
+		return -ENOMEM;
+
+	*pmu = (struct ddr_pmu) {
+		.pmu = {
+			.module		= THIS_MODULE,
+			.capabilities	= PERF_PMU_CAP_NO_EXCLUDE,
+			.task_ctx_nr	= perf_invalid_context,
+			.attr_groups	= attr_groups,
+			.event_init	= meson_ddr_perf_event_init,
+			.add		= meson_ddr_perf_event_add,
+			.del		= meson_ddr_perf_event_del,
+			.start		= meson_ddr_perf_event_start,
+			.stop		= meson_ddr_perf_event_stop,
+			.read		= meson_ddr_perf_event_update,
+		},
+	};
+
+	ret = ddr_pmu_parse_dt(pdev, &pmu->info);
+	if (ret < 0)
+		return ret;
+
+	fmt_attr_fill(pmu->info.hw_info->fmt_attr);
+
+	pmu->cpu = smp_processor_id();
+
+	name = devm_kasprintf(&pdev->dev, GFP_KERNEL, DDR_PERF_DEV_NAME);
+	if (!name)
+		return -ENOMEM;
+
+	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, name, NULL,
+				      ddr_perf_offline_cpu);
+	if (ret < 0)
+		return ret;
+
+	pmu->cpuhp_state = ret;
+
+	/* Register the pmu instance for cpu hotplug */
+	ret = cpuhp_state_add_instance_nocalls(pmu->cpuhp_state, &pmu->node);
+	if (ret)
+		goto cpuhp_instance_err;
+
+	fill_event_attr(pmu);
+
+	ret = perf_pmu_register(&pmu->pmu, name, -1);
+	if (ret)
+		goto pmu_register_err;
+
+	pmu->name = name;
+	pmu->dev = &pdev->dev;
+	pmu->pmu_enabled = false;
+
+	platform_set_drvdata(pdev, pmu);
+
+	return 0;
+
+pmu_register_err:
+	cpuhp_state_remove_instance_nocalls(pmu->cpuhp_state, &pmu->node);
+
+cpuhp_instance_err:
+	cpuhp_remove_state(pmu->cpuhp_state);
+
+	return ret;
+}
+
+int meson_ddr_pmu_remove(struct platform_device *pdev)
+{
+	struct ddr_pmu *pmu = platform_get_drvdata(pdev);
+
+	perf_pmu_unregister(&pmu->pmu);
+	cpuhp_state_remove_instance_nocalls(pmu->cpuhp_state, &pmu->node);
+	cpuhp_remove_state(pmu->cpuhp_state);
+
+	return 0;
+}
diff --git a/drivers/perf/amlogic/meson_g12_ddr_pmu.c b/drivers/perf/amlogic/meson_g12_ddr_pmu.c
new file mode 100644
index 000000000000..c07c34f03cce
--- /dev/null
+++ b/drivers/perf/amlogic/meson_g12_ddr_pmu.c
@@ -0,0 +1,394 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2022 Amlogic, Inc. All rights reserved.
+ */
+
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/printk.h>
+#include <linux/types.h>
+#include <linux/version.h>
+
+#include <soc/amlogic/meson_ddr_pmu.h>
+
+#define PORT_MAJOR		32
+#define DEFAULT_XTAL_FREQ	24000000UL
+
+#define DMC_QOS_IRQ		BIT(30)
+
+/* DMC bandwidth monitor register address offset */
+#define DMC_MON_G12_CTRL0		(0x20  << 2)
+#define DMC_MON_G12_CTRL1		(0x21  << 2)
+#define DMC_MON_G12_CTRL2		(0x22  << 2)
+#define DMC_MON_G12_CTRL3		(0x23  << 2)
+#define DMC_MON_G12_CTRL4		(0x24  << 2)
+#define DMC_MON_G12_CTRL5		(0x25  << 2)
+#define DMC_MON_G12_CTRL6		(0x26  << 2)
+#define DMC_MON_G12_CTRL7		(0x27  << 2)
+#define DMC_MON_G12_CTRL8		(0x28  << 2)
+
+#define DMC_MON_G12_ALL_REQ_CNT		(0x29  << 2)
+#define DMC_MON_G12_ALL_GRANT_CNT	(0x2a  << 2)
+#define DMC_MON_G12_ONE_GRANT_CNT	(0x2b  << 2)
+#define DMC_MON_G12_SEC_GRANT_CNT	(0x2c  << 2)
+#define DMC_MON_G12_THD_GRANT_CNT	(0x2d  << 2)
+#define DMC_MON_G12_FOR_GRANT_CNT	(0x2e  << 2)
+#define DMC_MON_G12_TIMER		(0x2f  << 2)
+
+/* Each bit represent a axi line */
+PMU_FORMAT_ATTR(event, "config:0-7");
+PMU_FORMAT_ATTR(arm, "config1:0");
+PMU_FORMAT_ATTR(gpu, "config1:1");
+PMU_FORMAT_ATTR(pcie, "config1:2");
+PMU_FORMAT_ATTR(hdcp, "config1:3");
+PMU_FORMAT_ATTR(hevc_front, "config1:4");
+PMU_FORMAT_ATTR(usb3_0, "config1:6");
+PMU_FORMAT_ATTR(device, "config1:7");
+PMU_FORMAT_ATTR(hevc_back, "config1:8");
+PMU_FORMAT_ATTR(h265enc, "config1:9");
+PMU_FORMAT_ATTR(vpu_read1, "config1:16");
+PMU_FORMAT_ATTR(vpu_read2, "config1:17");
+PMU_FORMAT_ATTR(vpu_read3, "config1:18");
+PMU_FORMAT_ATTR(vpu_write1, "config1:19");
+PMU_FORMAT_ATTR(vpu_write2, "config1:20");
+PMU_FORMAT_ATTR(vdec, "config1:21");
+PMU_FORMAT_ATTR(hcodec, "config1:22");
+PMU_FORMAT_ATTR(ge2d, "config1:23");
+
+PMU_FORMAT_ATTR(spicc1, "config1:32");
+PMU_FORMAT_ATTR(usb0, "config1:33");
+PMU_FORMAT_ATTR(dma, "config1:34");
+PMU_FORMAT_ATTR(arb0, "config1:35");
+PMU_FORMAT_ATTR(sd_emmc_b, "config1:36");
+PMU_FORMAT_ATTR(usb1, "config1:37");
+PMU_FORMAT_ATTR(audio, "config1:38");
+PMU_FORMAT_ATTR(aififo, "config1:39");
+PMU_FORMAT_ATTR(parser, "config1:41");
+PMU_FORMAT_ATTR(ao_cpu, "config1:42");
+PMU_FORMAT_ATTR(sd_emmc_c, "config1:43");
+PMU_FORMAT_ATTR(spicc2, "config1:44");
+PMU_FORMAT_ATTR(ethernet, "config1:45");
+PMU_FORMAT_ATTR(sana, "config1:46");
+
+/* for sm1 and g12b */
+PMU_FORMAT_ATTR(nna, "config1:10");
+
+/* for g12b only */
+PMU_FORMAT_ATTR(gdc, "config1:11");
+PMU_FORMAT_ATTR(mipi_isp, "config1:12");
+PMU_FORMAT_ATTR(arm1, "config1:13");
+PMU_FORMAT_ATTR(sd_emmc_a, "config1:40");
+
+static struct attribute *g12_pmu_format_attrs[] = {
+	&format_attr_event.attr,
+	&format_attr_arm.attr,
+	&format_attr_gpu.attr,
+	&format_attr_nna.attr,
+	&format_attr_gdc.attr,
+	&format_attr_arm1.attr,
+	&format_attr_mipi_isp.attr,
+	&format_attr_sd_emmc_a.attr,
+	&format_attr_pcie.attr,
+	&format_attr_hdcp.attr,
+	&format_attr_hevc_front.attr,
+	&format_attr_usb3_0.attr,
+	&format_attr_device.attr,
+	&format_attr_hevc_back.attr,
+	&format_attr_h265enc.attr,
+	&format_attr_vpu_read1.attr,
+	&format_attr_vpu_read2.attr,
+	&format_attr_vpu_read3.attr,
+	&format_attr_vpu_write1.attr,
+	&format_attr_vpu_write2.attr,
+	&format_attr_vdec.attr,
+	&format_attr_hcodec.attr,
+	&format_attr_ge2d.attr,
+	&format_attr_spicc1.attr,
+	&format_attr_usb0.attr,
+	&format_attr_dma.attr,
+	&format_attr_arb0.attr,
+	&format_attr_sd_emmc_b.attr,
+	&format_attr_usb1.attr,
+	&format_attr_audio.attr,
+	&format_attr_aififo.attr,
+	&format_attr_parser.attr,
+	&format_attr_ao_cpu.attr,
+	&format_attr_sd_emmc_c.attr,
+	&format_attr_spicc2.attr,
+	&format_attr_ethernet.attr,
+	&format_attr_sana.attr,
+	NULL,
+};
+
+/* calculate ddr clock */
+static unsigned long dmc_g12_get_freq_quick(struct dmc_info *info)
+{
+	unsigned int val;
+	unsigned int n, m, od1;
+	unsigned int od_div = 0xfff;
+	unsigned long freq = 0;
+
+	val = readl(info->pll_reg);
+	val = val & 0xfffff;
+	switch ((val >> 16) & 7) {
+	case 0:
+		od_div = 2;
+		break;
+
+	case 1:
+		od_div = 3;
+		break;
+
+	case 2:
+		od_div = 4;
+		break;
+
+	case 3:
+		od_div = 6;
+		break;
+
+	case 4:
+		od_div = 8;
+		break;
+
+	default:
+		break;
+	}
+
+	m = val & 0x1ff;
+	n = ((val >> 10) & 0x1f);
+	od1 = (((val >> 19) & 0x1)) == 1 ? 2 : 1;
+	freq = DEFAULT_XTAL_FREQ / 1000;        /* avoid overflow */
+	if (n)
+		freq = ((((freq * m) / n) >> od1) / od_div) * 1000;
+
+	return freq;
+}
+
+#ifdef DEBUG
+static void g12_dump_reg(struct dmc_info *db)
+{
+	int s = 0, i;
+	unsigned int r;
+
+	for (i = 0; i < 9; i++) {
+		r  = readl(db->ddr_reg[0] + (DMC_MON_G12_CTRL0 + (i << 2)));
+		pr_notice("DMC_MON_CTRL%d:        %08x\n", i, r);
+	}
+	r  = readl(db->ddr_reg[0] + DMC_MON_G12_ALL_REQ_CNT);
+	pr_notice("DMC_MON_ALL_REQ_CNT:  %08x\n", r);
+	r  = readl(db->ddr_reg[0] + DMC_MON_G12_ALL_GRANT_CNT);
+	pr_notice("DMC_MON_ALL_GRANT_CNT:%08x\n", r);
+	r  = readl(db->ddr_reg[0] + DMC_MON_G12_ONE_GRANT_CNT);
+	pr_notice("DMC_MON_ONE_GRANT_CNT:%08x\n", r);
+	r  = readl(db->ddr_reg[0] + DMC_MON_G12_SEC_GRANT_CNT);
+	pr_notice("DMC_MON_SEC_GRANT_CNT:%08x\n", r);
+	r  = readl(db->ddr_reg[0] + DMC_MON_G12_THD_GRANT_CNT);
+	pr_notice("DMC_MON_THD_GRANT_CNT:%08x\n", r);
+	r  = readl(db->ddr_reg[0] + DMC_MON_G12_FOR_GRANT_CNT);
+	pr_notice("DMC_MON_FOR_GRANT_CNT:%08x\n", r);
+	r  = readl(db->ddr_reg[0] + DMC_MON_G12_TIMER);
+	pr_notice("DMC_MON_TIMER:        %08x\n", r);
+}
+#endif
+
+static void dmc_g12_counter_enable(struct dmc_info *info)
+{
+	unsigned int val;
+	unsigned long clock_count = dmc_g12_get_freq_quick(info) / 10; /* 100ms */
+
+	writel(clock_count, info->ddr_reg[0] + DMC_MON_G12_TIMER);
+
+	val = readl(info->ddr_reg[0] + DMC_MON_G12_CTRL0);
+
+	/* enable all channel */
+	val =  BIT(31) |	/* enable bit */
+	       BIT(20) |	/* use timer  */
+	       0x0f;		/* 4 channels */
+
+	writel(val, info->ddr_reg[0] + DMC_MON_G12_CTRL0);
+
+#ifdef DEBUG
+	g12_dump_reg(info);
+#endif
+}
+
+static void dmc_g12_config_fiter(struct dmc_info *info,
+				 int port, int channel)
+{
+	u32 val;
+	u32 rp[MAX_CHANNEL_NUM] = {DMC_MON_G12_CTRL1, DMC_MON_G12_CTRL3,
+					DMC_MON_G12_CTRL5, DMC_MON_G12_CTRL7};
+	u32 rs[MAX_CHANNEL_NUM] = {DMC_MON_G12_CTRL2, DMC_MON_G12_CTRL4,
+					DMC_MON_G12_CTRL6, DMC_MON_G12_CTRL8};
+	int subport = -1;
+
+	/* clear all port mask */
+	if (port < 0) {
+		writel(0, info->ddr_reg[0] + rp[channel]);
+		writel(0, info->ddr_reg[0] + rs[channel]);
+		return;
+	}
+
+	if (port >= PORT_MAJOR)
+		subport = port - PORT_MAJOR;
+
+	if (subport < 0) {
+		val = readl(info->ddr_reg[0] + rp[channel]);
+		val |=  (1 << port);
+		writel(val, info->ddr_reg[0] + rp[channel]);
+		val = 0xffff;
+		writel(val, info->ddr_reg[0] + rs[channel]);
+	} else {
+		val = BIT(23);		/* select device */
+		writel(val, info->ddr_reg[0] + rp[channel]);
+		val = readl(info->ddr_reg[0] + rs[channel]);
+		val |= (1 << subport);
+		writel(val, info->ddr_reg[0] + rs[channel]);
+	}
+}
+
+static void dmc_g12_set_axi_filter(struct dmc_info *info, int axi_id, int channel)
+{
+	if (channel > info->hw_info->chann_nr)
+		return;
+
+	dmc_g12_config_fiter(info, axi_id, channel);
+}
+
+static void dmc_g12_counter_disable(struct dmc_info *info)
+{
+	int i;
+
+	/* clear timer */
+	writel(0, info->ddr_reg[0] + DMC_MON_G12_CTRL0);
+	writel(0, info->ddr_reg[0] + DMC_MON_G12_TIMER);
+
+	writel(0, info->ddr_reg[0] + DMC_MON_G12_ALL_REQ_CNT);
+	writel(0, info->ddr_reg[0] + DMC_MON_G12_ALL_GRANT_CNT);
+	writel(0, info->ddr_reg[0] + DMC_MON_G12_ONE_GRANT_CNT);
+	writel(0, info->ddr_reg[0] + DMC_MON_G12_SEC_GRANT_CNT);
+	writel(0, info->ddr_reg[0] + DMC_MON_G12_THD_GRANT_CNT);
+	writel(0, info->ddr_reg[0] + DMC_MON_G12_FOR_GRANT_CNT);
+
+	/* clear port channel mapping */
+	for (i = 0; i < info->hw_info->chann_nr; i++)
+		dmc_g12_config_fiter(info, -1, i);
+}
+
+static void dmc_g12_get_counters(struct dmc_info *info,
+				 struct dmc_counter *counter)
+{
+	int i;
+	unsigned int reg;
+
+	counter->all_cnt = readl(info->ddr_reg[0] + DMC_MON_G12_ALL_GRANT_CNT);
+	counter->all_req   = readl(info->ddr_reg[0] + DMC_MON_G12_ALL_REQ_CNT);
+
+	for (i = 0; i < info->hw_info->chann_nr; i++) {
+		reg = DMC_MON_G12_ONE_GRANT_CNT + (i << 2);
+		counter->channel_cnt[i] = readl(info->ddr_reg[0] + reg);
+	}
+}
+
+static int dmc_g12_irq_handler(struct dmc_info *info,
+			       struct dmc_counter *counter)
+{
+	unsigned int val;
+	int ret = -EINVAL;
+
+	val = readl(info->ddr_reg[0] + DMC_MON_G12_CTRL0);
+	if (val & DMC_QOS_IRQ) {
+		dmc_g12_get_counters(info, counter);
+		/* clear irq flags */
+		writel(val, info->ddr_reg[0] + DMC_MON_G12_CTRL0);
+		ret = 0;
+	}
+	return ret;
+}
+
+static const struct dmc_hw_info g12a_dmc_info = {
+	.enable		= dmc_g12_counter_enable,
+	.disable	= dmc_g12_counter_disable,
+	.irq_handler	= dmc_g12_irq_handler,
+	.get_counters	= dmc_g12_get_counters,
+	.set_axi_filter	= dmc_g12_set_axi_filter,
+
+	.dmc_nr = 1,
+	.chann_nr = 4,
+	.capability = {0X7EFF00FF03DF, 0},
+	.fmt_attr = g12_pmu_format_attrs,
+};
+
+static const struct dmc_hw_info g12b_dmc_info = {
+	.enable		= dmc_g12_counter_enable,
+	.disable	= dmc_g12_counter_disable,
+	.irq_handler	= dmc_g12_irq_handler,
+	.get_counters	= dmc_g12_get_counters,
+	.set_axi_filter	= dmc_g12_set_axi_filter,
+
+	.dmc_nr = 1,
+	.chann_nr = 4,
+	.capability = {0X7FFF00FF3FDF, 0},
+	.fmt_attr = g12_pmu_format_attrs,
+};
+
+static const struct dmc_hw_info sm1_dmc_info = {
+	.enable		= dmc_g12_counter_enable,
+	.disable	= dmc_g12_counter_disable,
+	.irq_handler	= dmc_g12_irq_handler,
+	.get_counters	= dmc_g12_get_counters,
+	.set_axi_filter	= dmc_g12_set_axi_filter,
+
+	.dmc_nr = 1,
+	.chann_nr = 4,
+	.capability = {0X7EFF00FF07DF, 0},
+	.fmt_attr = g12_pmu_format_attrs,
+};
+
+static int g12_ddr_pmu_probe(struct platform_device *pdev)
+{
+	return meson_ddr_pmu_create(pdev);
+}
+
+static int g12_ddr_pmu_remove(struct platform_device *pdev)
+{
+	meson_ddr_pmu_remove(pdev);
+
+	return 0;
+}
+
+static const struct of_device_id meson_ddr_pmu_dt_match[] = {
+	{
+		.compatible = "amlogic,g12a-ddr-pmu",
+		.data = &g12a_dmc_info,
+	},
+	{
+		.compatible = "amlogic,g12b-ddr-pmu",
+		.data = &g12b_dmc_info,
+	},
+	{
+		.compatible = "amlogic,sm1-ddr-pmu",
+		.data = &sm1_dmc_info,
+	},
+	{}
+};
+
+static struct platform_driver g12_ddr_pmu_driver = {
+	.probe = g12_ddr_pmu_probe,
+	.remove = g12_ddr_pmu_remove,
+
+	.driver = {
+		.name = "meson-g12-ddr-pmu",
+		.of_match_table = meson_ddr_pmu_dt_match,
+	},
+};
+
+module_platform_driver(g12_ddr_pmu_driver);
+MODULE_AUTHOR("Jiucheng Xu");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Amlogic G12 series SoC DDR PMU");
diff --git a/include/soc/amlogic/meson_ddr_pmu.h b/include/soc/amlogic/meson_ddr_pmu.h
new file mode 100644
index 000000000000..4a33e4ab8ada
--- /dev/null
+++ b/include/soc/amlogic/meson_ddr_pmu.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2022 Amlogic, Inc. All rights reserved.
+ */
+
+#ifndef __MESON_DDR_PMU_H__
+#define __MESON_DDR_PMU_H__
+
+#define MAX_CHANNEL_NUM		8
+
+enum {
+	ALL_CHAN_COUNTER_ID,
+	CHAN1_COUNTER_ID,
+	CHAN2_COUNTER_ID,
+	CHAN3_COUNTER_ID,
+	CHAN4_COUNTER_ID,
+	CHAN5_COUNTER_ID,
+	CHAN6_COUNTER_ID,
+	CHAN7_COUNTER_ID,
+	CHAN8_COUNTER_ID,
+	COUNTER_MAX_ID,
+};
+
+struct dmc_info;
+
+struct dmc_counter {
+	u64 all_cnt;	/* The count of all requests come in/out ddr controller */
+	union {
+		u64 all_req;
+		struct {
+			u64 all_idle_cnt;
+			u64 all_16bit_cnt;
+		};
+	};
+	u64 channel_cnt[MAX_CHANNEL_NUM]; /* To save a DMC bandwidth-monitor channel counter */
+};
+
+struct dmc_hw_info {
+	void (*enable)(struct dmc_info *info);
+	void (*disable)(struct dmc_info *info);
+	/* Bind an axi line to a bandwidth-monitor channel */
+	void (*set_axi_filter)(struct dmc_info *info, int axi_id, int chann);
+	int (*irq_handler)(struct dmc_info *info,
+			   struct dmc_counter *counter);
+	void (*get_counters)(struct dmc_info *info,
+			     struct dmc_counter *counter);
+
+	int dmc_nr;			/* The number of dmc controller */
+	int chann_nr;			/* The number of dmc bandwidth monitor channels */
+	struct attribute **fmt_attr;
+	const u64 capability[2];
+};
+
+struct dmc_info {
+	const struct dmc_hw_info *hw_info;
+
+	void __iomem *ddr_reg[4];
+	unsigned long timer_value;	/* Timer value in TIMER register */
+	void __iomem *pll_reg;
+	int irq_num;			/* irq vector number */
+};
+
+int meson_ddr_pmu_create(struct platform_device *pdev);
+int meson_ddr_pmu_remove(struct platform_device *pdev);
+
+#endif /* __MESON_DDR_PMU_H__ */
-- 
cgit v1.2.3


From 7299fdc1cfff0e45e4ca4efd77f250965598b41c Mon Sep 17 00:00:00 2001
From: Jiucheng Xu <jiucheng.xu@amlogic.com>
Date: Tue, 22 Nov 2022 16:40:28 +0800
Subject: perf/amlogic: Fix build error for x86_64 allmodconfig

The driver misses including <linux/io.h>, which causes a compilation
error with x86_64 'allmodconfig':

drivers/perf/amlogic/meson_g12_ddr_pmu.c: In function 'dmc_g12_get_freq_quick':
drivers/perf/amlogic/meson_g12_ddr_pmu.c:135:15: error: implicit declaration of function 'readl' [-Werror=implicit-function-declaration]
  135 |         val = readl(info->pll_reg);
      |               ^~~~~
drivers/perf/amlogic/meson_g12_ddr_pmu.c: In function 'dmc_g12_counter_enable':
drivers/perf/amlogic/meson_g12_ddr_pmu.c:204:9: error: implicit declaration of function 'writel' [-Werror=implicit-function-declaration]
  204 |         writel(clock_count, info->ddr_reg[0] + DMC_MON_G12_TIMER);
      |         ^~~~~~

Add the missing header to fix the build.

Fixes: 2016e2113d35 ("perf/amlogic: Add support for Amlogic meson G12 SoC DDR PMU driver")
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Jiucheng Xu <jiucheng.xu@amlogic.com>
Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://lore.kernel.org/r/20221122084028.572494-1-jiucheng.xu@amlogic.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/amlogic/meson_g12_ddr_pmu.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'drivers')

diff --git a/drivers/perf/amlogic/meson_g12_ddr_pmu.c b/drivers/perf/amlogic/meson_g12_ddr_pmu.c
index c07c34f03cce..932802abd18c 100644
--- a/drivers/perf/amlogic/meson_g12_ddr_pmu.c
+++ b/drivers/perf/amlogic/meson_g12_ddr_pmu.c
@@ -4,6 +4,7 @@
  */
 
 #include <linux/err.h>
+#include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-- 
cgit v1.2.3


From ce00d127a6068a0e4e9485424c1fd32564bce326 Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Tue, 29 Nov 2022 11:21:08 +0800
Subject: perf/amlogic: Remove unused header inclusions of  <linux/version.h>

According to the "Abaci Robot":

 | ./drivers/perf/amlogic/meson_g12_ddr_pmu.c:15 linux/version.h not needed.
 | ./drivers/perf/amlogic/meson_ddr_pmu_core.c: 19 linux/version.h not needed.

So drop the unnecessary #include directives.

Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=3280
Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=3282
Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Link: https://lore.kernel.org/r/20221129032108.119661-1-jiapeng.chong@linux.alibaba.com
Link: https://lore.kernel.org/r/20221129032108.119661-2-jiapeng.chong@linux.alibaba.com
[will: Squashed patches together, filled out commit message a bit more]
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/amlogic/meson_ddr_pmu_core.c | 1 -
 drivers/perf/amlogic/meson_g12_ddr_pmu.c  | 1 -
 2 files changed, 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/perf/amlogic/meson_ddr_pmu_core.c b/drivers/perf/amlogic/meson_ddr_pmu_core.c
index 0ff7c0449ac2..b84346dbac2c 100644
--- a/drivers/perf/amlogic/meson_ddr_pmu_core.c
+++ b/drivers/perf/amlogic/meson_ddr_pmu_core.c
@@ -16,7 +16,6 @@
 #include <linux/printk.h>
 #include <linux/sysfs.h>
 #include <linux/types.h>
-#include <linux/version.h>
 
 #include <soc/amlogic/meson_ddr_pmu.h>
 
diff --git a/drivers/perf/amlogic/meson_g12_ddr_pmu.c b/drivers/perf/amlogic/meson_g12_ddr_pmu.c
index 932802abd18c..a78fdb15e26c 100644
--- a/drivers/perf/amlogic/meson_g12_ddr_pmu.c
+++ b/drivers/perf/amlogic/meson_g12_ddr_pmu.c
@@ -12,7 +12,6 @@
 #include <linux/platform_device.h>
 #include <linux/printk.h>
 #include <linux/types.h>
-#include <linux/version.h>
 
 #include <soc/amlogic/meson_ddr_pmu.h>
 
-- 
cgit v1.2.3


From 6b4bb4f38dbfe85247f006f06135ba46450d5bf0 Mon Sep 17 00:00:00 2001
From: Yicong Yang <yangyicong@hisilicon.com>
Date: Thu, 17 Nov 2022 16:41:33 +0800
Subject: drivers/perf: hisi: Fix some event id for hisi-pcie-pmu

Some event id of hisi-pcie-pmu is incorrect, fix them.

Fixes: 8404b0fbc7fb ("drivers/perf: hisi: Add driver for HiSilicon PCIe PMU")
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
Link: https://lore.kernel.org/r/20221117084136.53572-2-yangyicong@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/hisilicon/hisi_pcie_pmu.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'drivers')

diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c
index 21771708597d..071e63d9a9ac 100644
--- a/drivers/perf/hisilicon/hisi_pcie_pmu.c
+++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c
@@ -693,10 +693,10 @@ static struct attribute *hisi_pcie_pmu_events_attr[] = {
 	HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_cnt, 0x10210),
 	HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_latency, 0x0011),
 	HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_cnt, 0x10011),
-	HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_flux, 0x1005),
-	HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_time, 0x11005),
-	HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_flux, 0x2004),
-	HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_time, 0x12004),
+	HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_flux, 0x0804),
+	HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_time, 0x10804),
+	HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_flux, 0x0405),
+	HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_time, 0x10405),
 	NULL
 };
 
-- 
cgit v1.2.3


From 17d573984d4d5ad73c7cb5edcf2024c585475b0c Mon Sep 17 00:00:00 2001
From: Yicong Yang <yangyicong@hisilicon.com>
Date: Thu, 17 Nov 2022 16:41:36 +0800
Subject: drivers/perf: hisi: Add TLP filter support

The PMU support to filter the TLP when counting the bandwidth with below
options:

- only count the TLP headers
- only count the TLP payloads
- count both TLP headers and payloads

In the current driver it's default to count the TLP payloads only, which
will have an implicity side effects that on the traffic only have header
only TLPs, we'll get no data.

Make this user configuration through "len_mode" parameter and make it
default to count both TLP headers and payloads when user not specified.
Also update the documentation for it.

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
Link: https://lore.kernel.org/r/20221117084136.53572-5-yangyicong@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 Documentation/admin-guide/perf/hisi-pcie-pmu.rst | 18 ++++++++++++++++++
 drivers/perf/hisilicon/hisi_pcie_pmu.c           | 14 +++++++++++++-
 2 files changed, 31 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/Documentation/admin-guide/perf/hisi-pcie-pmu.rst b/Documentation/admin-guide/perf/hisi-pcie-pmu.rst
index 645f08f65429..7e863662e2d4 100644
--- a/Documentation/admin-guide/perf/hisi-pcie-pmu.rst
+++ b/Documentation/admin-guide/perf/hisi-pcie-pmu.rst
@@ -110,3 +110,21 @@ Filter options
    Example usage of perf::
 
      $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,thr_len=0x4,thr_mode=1/ sleep 5
+
+4. TLP Length filter
+
+   When counting bandwidth, the data can be composed of certain parts of TLP
+   packets. You can specify it through "len_mode":
+
+   - 2'b00: Reserved (Do not use this since the behaviour is undefined)
+   - 2'b01: Bandwidth of TLP payloads
+   - 2'b10: Bandwidth of TLP headers
+   - 2'b11: Bandwidth of both TLP payloads and headers
+
+   For example, "len_mode=2" means only counting the bandwidth of TLP headers
+   and "len_mode=3" means the final bandwidth data is composed of both TLP
+   headers and payloads. Default value if not specified is 2'b11.
+
+   Example usage of perf::
+
+     $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,len_mode=0x1/ sleep 5
diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c
index 071e63d9a9ac..6fee0b6e163b 100644
--- a/drivers/perf/hisilicon/hisi_pcie_pmu.c
+++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c
@@ -47,10 +47,14 @@
 #define HISI_PCIE_EVENT_M		GENMASK_ULL(15, 0)
 #define HISI_PCIE_THR_MODE_M		GENMASK_ULL(27, 27)
 #define HISI_PCIE_THR_M			GENMASK_ULL(31, 28)
+#define HISI_PCIE_LEN_M			GENMASK_ULL(35, 34)
 #define HISI_PCIE_TARGET_M		GENMASK_ULL(52, 36)
 #define HISI_PCIE_TRIG_MODE_M		GENMASK_ULL(53, 53)
 #define HISI_PCIE_TRIG_M		GENMASK_ULL(59, 56)
 
+/* Default config of TLP length mode, will count both TLP headers and payloads */
+#define HISI_PCIE_LEN_M_DEFAULT		3ULL
+
 #define HISI_PCIE_MAX_COUNTERS		8
 #define HISI_PCIE_REG_STEP		8
 #define HISI_PCIE_THR_MAX_VAL		10
@@ -91,6 +95,7 @@ HISI_PCIE_PMU_FILTER_ATTR(thr_len, config1, 3, 0);
 HISI_PCIE_PMU_FILTER_ATTR(thr_mode, config1, 4, 4);
 HISI_PCIE_PMU_FILTER_ATTR(trig_len, config1, 8, 5);
 HISI_PCIE_PMU_FILTER_ATTR(trig_mode, config1, 9, 9);
+HISI_PCIE_PMU_FILTER_ATTR(len_mode, config1, 11, 10);
 HISI_PCIE_PMU_FILTER_ATTR(port, config2, 15, 0);
 HISI_PCIE_PMU_FILTER_ATTR(bdf, config2, 31, 16);
 
@@ -215,8 +220,8 @@ static void hisi_pcie_pmu_config_filter(struct perf_event *event)
 {
 	struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
+	u64 port, trig_len, thr_len, len_mode;
 	u64 reg = HISI_PCIE_INIT_SET;
-	u64 port, trig_len, thr_len;
 
 	/* Config HISI_PCIE_EVENT_CTRL according to event. */
 	reg |= FIELD_PREP(HISI_PCIE_EVENT_M, hisi_pcie_get_real_event(event));
@@ -245,6 +250,12 @@ static void hisi_pcie_pmu_config_filter(struct perf_event *event)
 		reg |= HISI_PCIE_THR_EN;
 	}
 
+	len_mode = hisi_pcie_get_len_mode(event);
+	if (len_mode)
+		reg |= FIELD_PREP(HISI_PCIE_LEN_M, len_mode);
+	else
+		reg |= FIELD_PREP(HISI_PCIE_LEN_M, HISI_PCIE_LEN_M_DEFAULT);
+
 	hisi_pcie_pmu_writeq(pcie_pmu, HISI_PCIE_EVENT_CTRL, hwc->idx, reg);
 }
 
@@ -711,6 +722,7 @@ static struct attribute *hisi_pcie_pmu_format_attr[] = {
 	HISI_PCIE_PMU_FORMAT_ATTR(thr_mode, "config1:4"),
 	HISI_PCIE_PMU_FORMAT_ATTR(trig_len, "config1:5-8"),
 	HISI_PCIE_PMU_FORMAT_ATTR(trig_mode, "config1:9"),
+	HISI_PCIE_PMU_FORMAT_ATTR(len_mode, "config1:10-11"),
 	HISI_PCIE_PMU_FORMAT_ATTR(port, "config2:0-15"),
 	HISI_PCIE_PMU_FORMAT_ATTR(bdf, "config2:16-31"),
 	NULL
-- 
cgit v1.2.3


From 229d58e31678dece365060c50a39a99a2b1dc729 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Wed, 16 Nov 2022 17:03:24 +0000
Subject: firmware: arm_ffa: Move constants to header file

FF-A function IDs and error codes will be needed in the hypervisor too,
so move to them to the header file where they can be shared. Rename the
version constants with an "FFA_" prefix so that they are less likely
to clash with other code in the tree.

Co-developed-by: Andrew Walbran <qwandor@google.com>
Signed-off-by: Andrew Walbran <qwandor@google.com>
Signed-off-by: Quentin Perret <qperret@google.com>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Link: https://lore.kernel.org/r/20221116170335.2341003-2-qperret@google.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/firmware/arm_ffa/driver.c | 101 ++++----------------------------------
 include/linux/arm_ffa.h           |  83 +++++++++++++++++++++++++++++++
 2 files changed, 93 insertions(+), 91 deletions(-)

(limited to 'drivers')

diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c
index d5e86ef40b89..fa85c64d3ded 100644
--- a/drivers/firmware/arm_ffa/driver.c
+++ b/drivers/firmware/arm_ffa/driver.c
@@ -36,81 +36,6 @@
 #include "common.h"
 
 #define FFA_DRIVER_VERSION	FFA_VERSION_1_0
-
-#define FFA_SMC(calling_convention, func_num)				\
-	ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, (calling_convention),	\
-			   ARM_SMCCC_OWNER_STANDARD, (func_num))
-
-#define FFA_SMC_32(func_num)	FFA_SMC(ARM_SMCCC_SMC_32, (func_num))
-#define FFA_SMC_64(func_num)	FFA_SMC(ARM_SMCCC_SMC_64, (func_num))
-
-#define FFA_ERROR			FFA_SMC_32(0x60)
-#define FFA_SUCCESS			FFA_SMC_32(0x61)
-#define FFA_INTERRUPT			FFA_SMC_32(0x62)
-#define FFA_VERSION			FFA_SMC_32(0x63)
-#define FFA_FEATURES			FFA_SMC_32(0x64)
-#define FFA_RX_RELEASE			FFA_SMC_32(0x65)
-#define FFA_RXTX_MAP			FFA_SMC_32(0x66)
-#define FFA_FN64_RXTX_MAP		FFA_SMC_64(0x66)
-#define FFA_RXTX_UNMAP			FFA_SMC_32(0x67)
-#define FFA_PARTITION_INFO_GET		FFA_SMC_32(0x68)
-#define FFA_ID_GET			FFA_SMC_32(0x69)
-#define FFA_MSG_POLL			FFA_SMC_32(0x6A)
-#define FFA_MSG_WAIT			FFA_SMC_32(0x6B)
-#define FFA_YIELD			FFA_SMC_32(0x6C)
-#define FFA_RUN				FFA_SMC_32(0x6D)
-#define FFA_MSG_SEND			FFA_SMC_32(0x6E)
-#define FFA_MSG_SEND_DIRECT_REQ		FFA_SMC_32(0x6F)
-#define FFA_FN64_MSG_SEND_DIRECT_REQ	FFA_SMC_64(0x6F)
-#define FFA_MSG_SEND_DIRECT_RESP	FFA_SMC_32(0x70)
-#define FFA_FN64_MSG_SEND_DIRECT_RESP	FFA_SMC_64(0x70)
-#define FFA_MEM_DONATE			FFA_SMC_32(0x71)
-#define FFA_FN64_MEM_DONATE		FFA_SMC_64(0x71)
-#define FFA_MEM_LEND			FFA_SMC_32(0x72)
-#define FFA_FN64_MEM_LEND		FFA_SMC_64(0x72)
-#define FFA_MEM_SHARE			FFA_SMC_32(0x73)
-#define FFA_FN64_MEM_SHARE		FFA_SMC_64(0x73)
-#define FFA_MEM_RETRIEVE_REQ		FFA_SMC_32(0x74)
-#define FFA_FN64_MEM_RETRIEVE_REQ	FFA_SMC_64(0x74)
-#define FFA_MEM_RETRIEVE_RESP		FFA_SMC_32(0x75)
-#define FFA_MEM_RELINQUISH		FFA_SMC_32(0x76)
-#define FFA_MEM_RECLAIM			FFA_SMC_32(0x77)
-#define FFA_MEM_OP_PAUSE		FFA_SMC_32(0x78)
-#define FFA_MEM_OP_RESUME		FFA_SMC_32(0x79)
-#define FFA_MEM_FRAG_RX			FFA_SMC_32(0x7A)
-#define FFA_MEM_FRAG_TX			FFA_SMC_32(0x7B)
-#define FFA_NORMAL_WORLD_RESUME		FFA_SMC_32(0x7C)
-
-/*
- * For some calls it is necessary to use SMC64 to pass or return 64-bit values.
- * For such calls FFA_FN_NATIVE(name) will choose the appropriate
- * (native-width) function ID.
- */
-#ifdef CONFIG_64BIT
-#define FFA_FN_NATIVE(name)	FFA_FN64_##name
-#else
-#define FFA_FN_NATIVE(name)	FFA_##name
-#endif
-
-/* FFA error codes. */
-#define FFA_RET_SUCCESS            (0)
-#define FFA_RET_NOT_SUPPORTED      (-1)
-#define FFA_RET_INVALID_PARAMETERS (-2)
-#define FFA_RET_NO_MEMORY          (-3)
-#define FFA_RET_BUSY               (-4)
-#define FFA_RET_INTERRUPTED        (-5)
-#define FFA_RET_DENIED             (-6)
-#define FFA_RET_RETRY              (-7)
-#define FFA_RET_ABORTED            (-8)
-
-#define MAJOR_VERSION_MASK	GENMASK(30, 16)
-#define MINOR_VERSION_MASK	GENMASK(15, 0)
-#define MAJOR_VERSION(x)	((u16)(FIELD_GET(MAJOR_VERSION_MASK, (x))))
-#define MINOR_VERSION(x)	((u16)(FIELD_GET(MINOR_VERSION_MASK, (x))))
-#define PACK_VERSION_INFO(major, minor)			\
-	(FIELD_PREP(MAJOR_VERSION_MASK, (major)) |	\
-	 FIELD_PREP(MINOR_VERSION_MASK, (minor)))
-#define FFA_VERSION_1_0		PACK_VERSION_INFO(1, 0)
 #define FFA_MIN_VERSION		FFA_VERSION_1_0
 
 #define SENDER_ID_MASK		GENMASK(31, 16)
@@ -120,12 +45,6 @@
 #define PACK_TARGET_INFO(s, r)		\
 	(FIELD_PREP(SENDER_ID_MASK, (s)) | FIELD_PREP(RECEIVER_ID_MASK, (r)))
 
-/*
- * FF-A specification mentions explicitly about '4K pages'. This should
- * not be confused with the kernel PAGE_SIZE, which is the translation
- * granule kernel is configured and may be one among 4K, 16K and 64K.
- */
-#define FFA_PAGE_SIZE		SZ_4K
 /*
  * Keeping RX TX buffer size as 4K for now
  * 64K may be preferred to keep it min a page in 64K PAGE_SIZE config
@@ -178,9 +97,9 @@ static struct ffa_drv_info *drv_info;
  */
 static u32 ffa_compatible_version_find(u32 version)
 {
-	u16 major = MAJOR_VERSION(version), minor = MINOR_VERSION(version);
-	u16 drv_major = MAJOR_VERSION(FFA_DRIVER_VERSION);
-	u16 drv_minor = MINOR_VERSION(FFA_DRIVER_VERSION);
+	u16 major = FFA_MAJOR_VERSION(version), minor = FFA_MINOR_VERSION(version);
+	u16 drv_major = FFA_MAJOR_VERSION(FFA_DRIVER_VERSION);
+	u16 drv_minor = FFA_MINOR_VERSION(FFA_DRIVER_VERSION);
 
 	if ((major < drv_major) || (major == drv_major && minor <= drv_minor))
 		return version;
@@ -204,16 +123,16 @@ static int ffa_version_check(u32 *version)
 
 	if (ver.a0 < FFA_MIN_VERSION) {
 		pr_err("Incompatible v%d.%d! Earliest supported v%d.%d\n",
-		       MAJOR_VERSION(ver.a0), MINOR_VERSION(ver.a0),
-		       MAJOR_VERSION(FFA_MIN_VERSION),
-		       MINOR_VERSION(FFA_MIN_VERSION));
+		       FFA_MAJOR_VERSION(ver.a0), FFA_MINOR_VERSION(ver.a0),
+		       FFA_MAJOR_VERSION(FFA_MIN_VERSION),
+		       FFA_MINOR_VERSION(FFA_MIN_VERSION));
 		return -EINVAL;
 	}
 
-	pr_info("Driver version %d.%d\n", MAJOR_VERSION(FFA_DRIVER_VERSION),
-		MINOR_VERSION(FFA_DRIVER_VERSION));
-	pr_info("Firmware version %d.%d found\n", MAJOR_VERSION(ver.a0),
-		MINOR_VERSION(ver.a0));
+	pr_info("Driver version %d.%d\n", FFA_MAJOR_VERSION(FFA_DRIVER_VERSION),
+		FFA_MINOR_VERSION(FFA_DRIVER_VERSION));
+	pr_info("Firmware version %d.%d found\n", FFA_MAJOR_VERSION(ver.a0),
+		FFA_MINOR_VERSION(ver.a0));
 	*version = ffa_compatible_version_find(ver.a0);
 
 	return 0;
diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h
index 5f02d2e6b9d9..daff44d777fa 100644
--- a/include/linux/arm_ffa.h
+++ b/include/linux/arm_ffa.h
@@ -11,6 +11,89 @@
 #include <linux/types.h>
 #include <linux/uuid.h>
 
+#define FFA_SMC(calling_convention, func_num)				\
+	ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, (calling_convention),	\
+			   ARM_SMCCC_OWNER_STANDARD, (func_num))
+
+#define FFA_SMC_32(func_num)	FFA_SMC(ARM_SMCCC_SMC_32, (func_num))
+#define FFA_SMC_64(func_num)	FFA_SMC(ARM_SMCCC_SMC_64, (func_num))
+
+#define FFA_ERROR			FFA_SMC_32(0x60)
+#define FFA_SUCCESS			FFA_SMC_32(0x61)
+#define FFA_INTERRUPT			FFA_SMC_32(0x62)
+#define FFA_VERSION			FFA_SMC_32(0x63)
+#define FFA_FEATURES			FFA_SMC_32(0x64)
+#define FFA_RX_RELEASE			FFA_SMC_32(0x65)
+#define FFA_RXTX_MAP			FFA_SMC_32(0x66)
+#define FFA_FN64_RXTX_MAP		FFA_SMC_64(0x66)
+#define FFA_RXTX_UNMAP			FFA_SMC_32(0x67)
+#define FFA_PARTITION_INFO_GET		FFA_SMC_32(0x68)
+#define FFA_ID_GET			FFA_SMC_32(0x69)
+#define FFA_MSG_POLL			FFA_SMC_32(0x6A)
+#define FFA_MSG_WAIT			FFA_SMC_32(0x6B)
+#define FFA_YIELD			FFA_SMC_32(0x6C)
+#define FFA_RUN				FFA_SMC_32(0x6D)
+#define FFA_MSG_SEND			FFA_SMC_32(0x6E)
+#define FFA_MSG_SEND_DIRECT_REQ		FFA_SMC_32(0x6F)
+#define FFA_FN64_MSG_SEND_DIRECT_REQ	FFA_SMC_64(0x6F)
+#define FFA_MSG_SEND_DIRECT_RESP	FFA_SMC_32(0x70)
+#define FFA_FN64_MSG_SEND_DIRECT_RESP	FFA_SMC_64(0x70)
+#define FFA_MEM_DONATE			FFA_SMC_32(0x71)
+#define FFA_FN64_MEM_DONATE		FFA_SMC_64(0x71)
+#define FFA_MEM_LEND			FFA_SMC_32(0x72)
+#define FFA_FN64_MEM_LEND		FFA_SMC_64(0x72)
+#define FFA_MEM_SHARE			FFA_SMC_32(0x73)
+#define FFA_FN64_MEM_SHARE		FFA_SMC_64(0x73)
+#define FFA_MEM_RETRIEVE_REQ		FFA_SMC_32(0x74)
+#define FFA_FN64_MEM_RETRIEVE_REQ	FFA_SMC_64(0x74)
+#define FFA_MEM_RETRIEVE_RESP		FFA_SMC_32(0x75)
+#define FFA_MEM_RELINQUISH		FFA_SMC_32(0x76)
+#define FFA_MEM_RECLAIM			FFA_SMC_32(0x77)
+#define FFA_MEM_OP_PAUSE		FFA_SMC_32(0x78)
+#define FFA_MEM_OP_RESUME		FFA_SMC_32(0x79)
+#define FFA_MEM_FRAG_RX			FFA_SMC_32(0x7A)
+#define FFA_MEM_FRAG_TX			FFA_SMC_32(0x7B)
+#define FFA_NORMAL_WORLD_RESUME		FFA_SMC_32(0x7C)
+
+/*
+ * For some calls it is necessary to use SMC64 to pass or return 64-bit values.
+ * For such calls FFA_FN_NATIVE(name) will choose the appropriate
+ * (native-width) function ID.
+ */
+#ifdef CONFIG_64BIT
+#define FFA_FN_NATIVE(name)	FFA_FN64_##name
+#else
+#define FFA_FN_NATIVE(name)	FFA_##name
+#endif
+
+/* FFA error codes. */
+#define FFA_RET_SUCCESS            (0)
+#define FFA_RET_NOT_SUPPORTED      (-1)
+#define FFA_RET_INVALID_PARAMETERS (-2)
+#define FFA_RET_NO_MEMORY          (-3)
+#define FFA_RET_BUSY               (-4)
+#define FFA_RET_INTERRUPTED        (-5)
+#define FFA_RET_DENIED             (-6)
+#define FFA_RET_RETRY              (-7)
+#define FFA_RET_ABORTED            (-8)
+
+/* FFA version encoding */
+#define FFA_MAJOR_VERSION_MASK	GENMASK(30, 16)
+#define FFA_MINOR_VERSION_MASK	GENMASK(15, 0)
+#define FFA_MAJOR_VERSION(x)	((u16)(FIELD_GET(FFA_MAJOR_VERSION_MASK, (x))))
+#define FFA_MINOR_VERSION(x)	((u16)(FIELD_GET(FFA_MINOR_VERSION_MASK, (x))))
+#define FFA_PACK_VERSION_INFO(major, minor)			\
+	(FIELD_PREP(FFA_MAJOR_VERSION_MASK, (major)) |		\
+	 FIELD_PREP(FFA_MINOR_VERSION_MASK, (minor)))
+#define FFA_VERSION_1_0		FFA_PACK_VERSION_INFO(1, 0)
+
+/**
+ * FF-A specification mentions explicitly about '4K pages'. This should
+ * not be confused with the kernel PAGE_SIZE, which is the translation
+ * granule kernel is configured and may be one among 4K, 16K and 64K.
+ */
+#define FFA_PAGE_SIZE		SZ_4K
+
 /* FFA Bus/Device/Driver related */
 struct ffa_device {
 	int vm_id;
-- 
cgit v1.2.3


From 4361251cef466839795691e2628285e3f5093a98 Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Fri, 2 Dec 2022 07:26:11 +0530
Subject: arm_pmu: Drop redundant armpmu->map_event() in armpmu_event_init()

__hw_perf_event_init() already calls armpmu->map_event() callback, and also
returns its error code including -ENOENT, along with a debug callout. Hence
an additional armpmu->map_event() check for -ENOENT is redundant.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Link: https://lore.kernel.org/r/20221202015611.338499-1-anshuman.khandual@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm_pmu.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 3f07df5a7e95..ed89748b1612 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -514,9 +514,6 @@ static int armpmu_event_init(struct perf_event *event)
 	if (has_branch_stack(event))
 		return -EOPNOTSUPP;
 
-	if (armpmu->map_event(event) == -ENOENT)
-		return -ENOENT;
-
 	return __hw_perf_event_init(event);
 }
 
-- 
cgit v1.2.3