From 9a66d36cc7ace8062bd703d1edfb99437a2ddf2b Mon Sep 17 00:00:00 2001
From: Frank Li <frank.li@nxp.com>
Date: Wed, 1 May 2019 18:43:29 +0000
Subject: drivers/perf: imx_ddr: Add DDR performance counter support to perf

Add DDR performance monitor support for iMX8QXP. The PMU consists of 3
programmable event counters and a single dedicated cycle counter.

Example usage:

 $ perf stat -a -e \
   imx8_ddr0/read-cycles/,imx8_ddr0/write-cycles/,imx8_ddr0/precharge/ ls

- or -

 $ perf stat -a -e \
   imx8_ddr0/cycles/,imx8_ddr0/read-access/,imx8_ddr0/write-access/ ls

Other events are supported, and advertised via perf list.

Reviewed-by: Andrey Smirnov <andrew.smirnov@gmail.com>
Signed-off-by: Frank Li <Frank.Li@nxp.com>
[will: rewrote commit message/kconfig and used #defines for dev/cpuhp names]
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/perf/Kconfig             |   8 +
 drivers/perf/Makefile            |   1 +
 drivers/perf/fsl_imx8_ddr_perf.c | 554 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 563 insertions(+)
 create mode 100644 drivers/perf/fsl_imx8_ddr_perf.c

(limited to 'drivers')

diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index e4221a107dca..09ae8a970880 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -71,6 +71,14 @@ config ARM_DSU_PMU
 	  system, control logic. The PMU allows counting various events related
 	  to DSU.
 
+config FSL_IMX8_DDR_PMU
+	tristate "Freescale i.MX8 DDR perf monitor"
+	depends on ARCH_MXC
+	  help
+	  Provides support for the DDR performance monitor in i.MX8, which
+	  can give information about memory throughput and other related
+	  events.
+
 config HISI_PMU
        bool "HiSilicon SoC PMU"
        depends on ARM64 && ACPI
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index 30489941f3d6..2ebb4de17815 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -5,6 +5,7 @@ obj-$(CONFIG_ARM_DSU_PMU) += arm_dsu_pmu.o
 obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o
 obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o
 obj-$(CONFIG_ARM_SMMU_V3_PMU) += arm_smmuv3_pmu.o
+obj-$(CONFIG_FSL_IMX8_DDR_PMU) += fsl_imx8_ddr_perf.o
 obj-$(CONFIG_HISI_PMU) += hisilicon/
 obj-$(CONFIG_QCOM_L2_PMU)	+= qcom_l2_pmu.o
 obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o
diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c
new file mode 100644
index 000000000000..63fe21600072
--- /dev/null
+++ b/drivers/perf/fsl_imx8_ddr_perf.c
@@ -0,0 +1,554 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2017 NXP
+ * Copyright 2016 Freescale Semiconductor, Inc.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/of_irq.h>
+#include <linux/perf_event.h>
+#include <linux/slab.h>
+
+#define COUNTER_CNTL		0x0
+#define COUNTER_READ		0x20
+
+#define COUNTER_DPCR1		0x30
+
+#define CNTL_OVER		0x1
+#define CNTL_CLEAR		0x2
+#define CNTL_EN			0x4
+#define CNTL_EN_MASK		0xFFFFFFFB
+#define CNTL_CLEAR_MASK		0xFFFFFFFD
+#define CNTL_OVER_MASK		0xFFFFFFFE
+
+#define CNTL_CSV_SHIFT		24
+#define CNTL_CSV_MASK		(0xFF << CNTL_CSV_SHIFT)
+
+#define EVENT_CYCLES_ID		0
+#define EVENT_CYCLES_COUNTER	0
+#define NUM_COUNTERS		4
+
+#define to_ddr_pmu(p)		container_of(p, struct ddr_pmu, pmu)
+
+#define DDR_PERF_DEV_NAME	"imx8_ddr"
+#define DDR_CPUHP_CB_NAME	DDR_PERF_DEV_NAME "_perf_pmu"
+
+static DEFINE_IDA(ddr_ida);
+
+static const struct of_device_id imx_ddr_pmu_dt_ids[] = {
+	{ .compatible = "fsl,imx8-ddr-pmu",},
+	{ .compatible = "fsl,imx8m-ddr-pmu",},
+	{ /* sentinel */ }
+};
+
+struct ddr_pmu {
+	struct pmu pmu;
+	void __iomem *base;
+	unsigned int cpu;
+	struct	hlist_node node;
+	struct	device *dev;
+	struct perf_event *events[NUM_COUNTERS];
+	int active_events;
+	enum cpuhp_state cpuhp_state;
+	int irq;
+	int id;
+};
+
+static ssize_t ddr_perf_cpumask_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct ddr_pmu *pmu = dev_get_drvdata(dev);
+
+	return cpumap_print_to_pagebuf(true, buf, cpumask_of(pmu->cpu));
+}
+
+static struct device_attribute ddr_perf_cpumask_attr =
+	__ATTR(cpumask, 0444, ddr_perf_cpumask_show, NULL);
+
+static struct attribute *ddr_perf_cpumask_attrs[] = {
+	&ddr_perf_cpumask_attr.attr,
+	NULL,
+};
+
+static struct attribute_group ddr_perf_cpumask_attr_group = {
+	.attrs = ddr_perf_cpumask_attrs,
+};
+
+static ssize_t
+ddr_pmu_event_show(struct device *dev, struct device_attribute *attr,
+		   char *page)
+{
+	struct perf_pmu_events_attr *pmu_attr;
+
+	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+	return sprintf(page, "event=0x%02llx\n", pmu_attr->id);
+}
+
+#define IMX8_DDR_PMU_EVENT_ATTR(_name, _id)				\
+	(&((struct perf_pmu_events_attr[]) {				\
+		{ .attr = __ATTR(_name, 0444, ddr_pmu_event_show, NULL),\
+		  .id = _id, }						\
+	})[0].attr.attr)
+
+static struct attribute *ddr_perf_events_attrs[] = {
+	IMX8_DDR_PMU_EVENT_ATTR(cycles, EVENT_CYCLES_ID),
+	IMX8_DDR_PMU_EVENT_ATTR(selfresh, 0x01),
+	IMX8_DDR_PMU_EVENT_ATTR(read-accesses, 0x04),
+	IMX8_DDR_PMU_EVENT_ATTR(write-accesses, 0x05),
+	IMX8_DDR_PMU_EVENT_ATTR(read-queue-depth, 0x08),
+	IMX8_DDR_PMU_EVENT_ATTR(write-queue-depth, 0x09),
+	IMX8_DDR_PMU_EVENT_ATTR(lp-read-credit-cnt, 0x10),
+	IMX8_DDR_PMU_EVENT_ATTR(hp-read-credit-cnt, 0x11),
+	IMX8_DDR_PMU_EVENT_ATTR(write-credit-cnt, 0x12),
+	IMX8_DDR_PMU_EVENT_ATTR(read-command, 0x20),
+	IMX8_DDR_PMU_EVENT_ATTR(write-command, 0x21),
+	IMX8_DDR_PMU_EVENT_ATTR(read-modify-write-command, 0x22),
+	IMX8_DDR_PMU_EVENT_ATTR(hp-read, 0x23),
+	IMX8_DDR_PMU_EVENT_ATTR(hp-req-nocredit, 0x24),
+	IMX8_DDR_PMU_EVENT_ATTR(hp-xact-credit, 0x25),
+	IMX8_DDR_PMU_EVENT_ATTR(lp-req-nocredit, 0x26),
+	IMX8_DDR_PMU_EVENT_ATTR(lp-xact-credit, 0x27),
+	IMX8_DDR_PMU_EVENT_ATTR(wr-xact-credit, 0x29),
+	IMX8_DDR_PMU_EVENT_ATTR(read-cycles, 0x2a),
+	IMX8_DDR_PMU_EVENT_ATTR(write-cycles, 0x2b),
+	IMX8_DDR_PMU_EVENT_ATTR(read-write-transition, 0x30),
+	IMX8_DDR_PMU_EVENT_ATTR(precharge, 0x31),
+	IMX8_DDR_PMU_EVENT_ATTR(activate, 0x32),
+	IMX8_DDR_PMU_EVENT_ATTR(load-mode, 0x33),
+	IMX8_DDR_PMU_EVENT_ATTR(perf-mwr, 0x34),
+	IMX8_DDR_PMU_EVENT_ATTR(read, 0x35),
+	IMX8_DDR_PMU_EVENT_ATTR(read-activate, 0x36),
+	IMX8_DDR_PMU_EVENT_ATTR(refresh, 0x37),
+	IMX8_DDR_PMU_EVENT_ATTR(write, 0x38),
+	IMX8_DDR_PMU_EVENT_ATTR(raw-hazard, 0x39),
+	NULL,
+};
+
+static struct attribute_group ddr_perf_events_attr_group = {
+	.name = "events",
+	.attrs = ddr_perf_events_attrs,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-7");
+
+static struct attribute *ddr_perf_format_attrs[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+
+static struct attribute_group ddr_perf_format_attr_group = {
+	.name = "format",
+	.attrs = ddr_perf_format_attrs,
+};
+
+static const struct attribute_group *attr_groups[] = {
+	&ddr_perf_events_attr_group,
+	&ddr_perf_format_attr_group,
+	&ddr_perf_cpumask_attr_group,
+	NULL,
+};
+
+static u32 ddr_perf_alloc_counter(struct ddr_pmu *pmu, int event)
+{
+	int i;
+
+	/*
+	 * Always map cycle event to counter 0
+	 * Cycles counter is dedicated for cycle event
+	 * can't used for the other events
+	 */
+	if (event == EVENT_CYCLES_ID) {
+		if (pmu->events[EVENT_CYCLES_COUNTER] == NULL)
+			return EVENT_CYCLES_COUNTER;
+		else
+			return -ENOENT;
+	}
+
+	for (i = 1; i < NUM_COUNTERS; i++) {
+		if (pmu->events[i] == NULL)
+			return i;
+	}
+
+	return -ENOENT;
+}
+
+static void ddr_perf_free_counter(struct ddr_pmu *pmu, int counter)
+{
+	pmu->events[counter] = NULL;
+}
+
+static u32 ddr_perf_read_counter(struct ddr_pmu *pmu, int counter)
+{
+	return readl_relaxed(pmu->base + COUNTER_READ + counter * 4);
+}
+
+static int ddr_perf_event_init(struct perf_event *event)
+{
+	struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	struct perf_event *sibling;
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+		return -EOPNOTSUPP;
+
+	if (event->cpu < 0) {
+		dev_warn(pmu->dev, "Can't provide per-task data!\n");
+		return -EOPNOTSUPP;
+	}
+
+	/*
+	 * We must NOT create groups containing mixed PMUs, although software
+	 * events are acceptable (for example to create a CCN group
+	 * periodically read when a hrtimer aka cpu-clock leader triggers).
+	 */
+	if (event->group_leader->pmu != event->pmu &&
+			!is_software_event(event->group_leader))
+		return -EINVAL;
+
+	for_each_sibling_event(sibling, event->group_leader) {
+		if (sibling->pmu != event->pmu &&
+				!is_software_event(sibling))
+			return -EINVAL;
+	}
+
+	event->cpu = pmu->cpu;
+	hwc->idx = -1;
+
+	return 0;
+}
+
+
+static void ddr_perf_event_update(struct perf_event *event)
+{
+	struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	u64 delta, prev_raw_count, new_raw_count;
+	int counter = hwc->idx;
+
+	do {
+		prev_raw_count = local64_read(&hwc->prev_count);
+		new_raw_count = ddr_perf_read_counter(pmu, counter);
+	} while (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+			new_raw_count) != prev_raw_count);
+
+	delta = (new_raw_count - prev_raw_count) & 0xFFFFFFFF;
+
+	local64_add(delta, &event->count);
+}
+
+static void ddr_perf_counter_enable(struct ddr_pmu *pmu, int config,
+				  int counter, bool enable)
+{
+	u8 reg = counter * 4 + COUNTER_CNTL;
+	int val;
+
+	if (enable) {
+		/*
+		 * must disable first, then enable again
+		 * otherwise, cycle counter will not work
+		 * if previous state is enabled.
+		 */
+		writel(0, pmu->base + reg);
+		val = CNTL_EN | CNTL_CLEAR;
+		val |= FIELD_PREP(CNTL_CSV_MASK, config);
+		writel(val, pmu->base + reg);
+	} else {
+		/* Disable counter */
+		writel(0, pmu->base + reg);
+	}
+}
+
+static void ddr_perf_event_start(struct perf_event *event, int flags)
+{
+	struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int counter = hwc->idx;
+
+	local64_set(&hwc->prev_count, 0);
+
+	ddr_perf_counter_enable(pmu, event->attr.config, counter, true);
+
+	hwc->state = 0;
+}
+
+static int ddr_perf_event_add(struct perf_event *event, int flags)
+{
+	struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int counter;
+	int cfg = event->attr.config;
+
+	counter = ddr_perf_alloc_counter(pmu, cfg);
+	if (counter < 0) {
+		dev_dbg(pmu->dev, "There are not enough counters\n");
+		return -EOPNOTSUPP;
+	}
+
+	pmu->events[counter] = event;
+	pmu->active_events++;
+	hwc->idx = counter;
+
+	hwc->state |= PERF_HES_STOPPED;
+
+	if (flags & PERF_EF_START)
+		ddr_perf_event_start(event, flags);
+
+	return 0;
+}
+
+static void ddr_perf_event_stop(struct perf_event *event, int flags)
+{
+	struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int counter = hwc->idx;
+
+	ddr_perf_counter_enable(pmu, event->attr.config, counter, false);
+	ddr_perf_event_update(event);
+
+	hwc->state |= PERF_HES_STOPPED;
+}
+
+static void ddr_perf_event_del(struct perf_event *event, int flags)
+{
+	struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int counter = hwc->idx;
+
+	ddr_perf_event_stop(event, PERF_EF_UPDATE);
+
+	ddr_perf_free_counter(pmu, counter);
+	pmu->active_events--;
+	hwc->idx = -1;
+}
+
+static void ddr_perf_pmu_enable(struct pmu *pmu)
+{
+	struct ddr_pmu *ddr_pmu = to_ddr_pmu(pmu);
+
+	/* enable cycle counter if cycle is not active event list */
+	if (ddr_pmu->events[EVENT_CYCLES_COUNTER] == NULL)
+		ddr_perf_counter_enable(ddr_pmu,
+				      EVENT_CYCLES_ID,
+				      EVENT_CYCLES_COUNTER,
+				      true);
+}
+
+static void ddr_perf_pmu_disable(struct pmu *pmu)
+{
+	struct ddr_pmu *ddr_pmu = to_ddr_pmu(pmu);
+
+	if (ddr_pmu->events[EVENT_CYCLES_COUNTER] == NULL)
+		ddr_perf_counter_enable(ddr_pmu,
+				      EVENT_CYCLES_ID,
+				      EVENT_CYCLES_COUNTER,
+				      false);
+}
+
+static int ddr_perf_init(struct ddr_pmu *pmu, void __iomem *base,
+			 struct device *dev)
+{
+	*pmu = (struct ddr_pmu) {
+		.pmu = (struct pmu) {
+			.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+			.task_ctx_nr = perf_invalid_context,
+			.attr_groups = attr_groups,
+			.event_init  = ddr_perf_event_init,
+			.add	     = ddr_perf_event_add,
+			.del	     = ddr_perf_event_del,
+			.start	     = ddr_perf_event_start,
+			.stop	     = ddr_perf_event_stop,
+			.read	     = ddr_perf_event_update,
+			.pmu_enable  = ddr_perf_pmu_enable,
+			.pmu_disable = ddr_perf_pmu_disable,
+		},
+		.base = base,
+		.dev = dev,
+	};
+
+	pmu->id = ida_simple_get(&ddr_ida, 0, 0, GFP_KERNEL);
+	return pmu->id;
+}
+
+static irqreturn_t ddr_perf_irq_handler(int irq, void *p)
+{
+	int i;
+	struct ddr_pmu *pmu = (struct ddr_pmu *) p;
+	struct perf_event *event, *cycle_event = NULL;
+
+	/* all counter will stop if cycle counter disabled */
+	ddr_perf_counter_enable(pmu,
+			      EVENT_CYCLES_ID,
+			      EVENT_CYCLES_COUNTER,
+			      false);
+	/*
+	 * When the cycle counter overflows, all counters are stopped,
+	 * and an IRQ is raised. If any other counter overflows, it
+	 * continues counting, and no IRQ is raised.
+	 *
+	 * Cycles occur at least 4 times as often as other events, so we
+	 * can update all events on a cycle counter overflow and not
+	 * lose events.
+	 *
+	 */
+	for (i = 0; i < NUM_COUNTERS; i++) {
+
+		if (!pmu->events[i])
+			continue;
+
+		event = pmu->events[i];
+
+		ddr_perf_event_update(event);
+
+		if (event->hw.idx == EVENT_CYCLES_COUNTER)
+			cycle_event = event;
+	}
+
+	ddr_perf_counter_enable(pmu,
+			      EVENT_CYCLES_ID,
+			      EVENT_CYCLES_COUNTER,
+			      true);
+	if (cycle_event)
+		ddr_perf_event_update(cycle_event);
+
+	return IRQ_HANDLED;
+}
+
+static int ddr_perf_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct ddr_pmu *pmu = hlist_entry_safe(node, struct ddr_pmu, node);
+	int target;
+
+	if (cpu != pmu->cpu)
+		return 0;
+
+	target = cpumask_any_but(cpu_online_mask, cpu);
+	if (target >= nr_cpu_ids)
+		return 0;
+
+	perf_pmu_migrate_context(&pmu->pmu, cpu, target);
+	pmu->cpu = target;
+
+	WARN_ON(irq_set_affinity_hint(pmu->irq, cpumask_of(pmu->cpu)));
+
+	return 0;
+}
+
+static int ddr_perf_probe(struct platform_device *pdev)
+{
+	struct ddr_pmu *pmu;
+	struct device_node *np;
+	void __iomem *base;
+	char *name;
+	int num;
+	int ret;
+	int irq;
+
+	base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	np = pdev->dev.of_node;
+
+	pmu = devm_kzalloc(&pdev->dev, sizeof(*pmu), GFP_KERNEL);
+	if (!pmu)
+		return -ENOMEM;
+
+	num = ddr_perf_init(pmu, base, &pdev->dev);
+
+	platform_set_drvdata(pdev, pmu);
+
+	name = devm_kasprintf(&pdev->dev, GFP_KERNEL, DDR_PERF_DEV_NAME "%d",
+			      num);
+	if (!name)
+		return -ENOMEM;
+
+	pmu->cpu = raw_smp_processor_id();
+	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+				      DDR_CPUHP_CB_NAME,
+				      NULL,
+				      ddr_perf_offline_cpu);
+
+	if (ret < 0) {
+		dev_err(&pdev->dev, "cpuhp_setup_state_multi failed\n");
+		goto ddr_perf_err;
+	}
+
+	pmu->cpuhp_state = ret;
+
+	/* Register the pmu instance for cpu hotplug */
+	cpuhp_state_add_instance_nocalls(pmu->cpuhp_state, &pmu->node);
+
+	/* Request irq */
+	irq = of_irq_get(np, 0);
+	if (irq < 0) {
+		dev_err(&pdev->dev, "Failed to get irq: %d", irq);
+		ret = irq;
+		goto ddr_perf_err;
+	}
+
+	ret = devm_request_irq(&pdev->dev, irq,
+					ddr_perf_irq_handler,
+					IRQF_NOBALANCING | IRQF_NO_THREAD,
+					DDR_CPUHP_CB_NAME,
+					pmu);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Request irq failed: %d", ret);
+		goto ddr_perf_err;
+	}
+
+	pmu->irq = irq;
+	ret = irq_set_affinity_hint(pmu->irq, cpumask_of(pmu->cpu));
+	if (ret) {
+		dev_err(pmu->dev, "Failed to set interrupt affinity!\n");
+		goto ddr_perf_err;
+	}
+
+	ret = perf_pmu_register(&pmu->pmu, name, -1);
+	if (ret)
+		goto ddr_perf_err;
+
+	return 0;
+
+ddr_perf_err:
+	if (pmu->cpuhp_state)
+		cpuhp_state_remove_instance_nocalls(pmu->cpuhp_state, &pmu->node);
+
+	ida_simple_remove(&ddr_ida, pmu->id);
+	dev_warn(&pdev->dev, "i.MX8 DDR Perf PMU failed (%d), disabled\n", ret);
+	return ret;
+}
+
+static int ddr_perf_remove(struct platform_device *pdev)
+{
+	struct ddr_pmu *pmu = platform_get_drvdata(pdev);
+
+	cpuhp_state_remove_instance_nocalls(pmu->cpuhp_state, &pmu->node);
+	irq_set_affinity_hint(pmu->irq, NULL);
+
+	perf_pmu_unregister(&pmu->pmu);
+
+	ida_simple_remove(&ddr_ida, pmu->id);
+	return 0;
+}
+
+static struct platform_driver imx_ddr_pmu_driver = {
+	.driver         = {
+		.name   = "imx-ddr-pmu",
+		.of_match_table = imx_ddr_pmu_dt_ids,
+	},
+	.probe          = ddr_perf_probe,
+	.remove         = ddr_perf_remove,
+};
+
+module_platform_driver(imx_ddr_pmu_driver);
+MODULE_LICENSE("GPL v2");
-- 
cgit v1.2.3


From ed2b664fcc8073c09394393756df3fc86977bbac Mon Sep 17 00:00:00 2001
From: Jeremy Linton <jeremy.linton@arm.com>
Date: Wed, 26 Jun 2019 16:37:15 -0500
Subject: ACPI/PPTT: Modify node flag detection to find last IDENTICAL

The ACPI specification implies that the IDENTICAL flag should be
set on all non leaf nodes where the children are identical.
This means that we need to be searching for the last node with
the identical flag set rather than the first one.

Since this flag is also dependent on the table revision, we
need to add a bit of extra code to verify the table revision,
and the next node's state in the traversal. Since we want to
avoid function pointers here, lets just special case
the IDENTICAL flag.

Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Tested-by: Hanjun Guo <hanjun.guo@linaro.org>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/acpi/pptt.c | 35 +++++++++++++++++++++++++++++------
 1 file changed, 29 insertions(+), 6 deletions(-)

(limited to 'drivers')

diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c
index b72e6afaa8fb..05344413f199 100644
--- a/drivers/acpi/pptt.c
+++ b/drivers/acpi/pptt.c
@@ -432,17 +432,40 @@ static void cache_setup_acpi_cpu(struct acpi_table_header *table,
 	}
 }
 
+static bool flag_identical(struct acpi_table_header *table_hdr,
+			   struct acpi_pptt_processor *cpu)
+{
+	struct acpi_pptt_processor *next;
+
+	/* heterogeneous machines must use PPTT revision > 1 */
+	if (table_hdr->revision < 2)
+		return false;
+
+	/* Locate the last node in the tree with IDENTICAL set */
+	if (cpu->flags & ACPI_PPTT_ACPI_IDENTICAL) {
+		next = fetch_pptt_node(table_hdr, cpu->parent);
+		if (!(next && next->flags & ACPI_PPTT_ACPI_IDENTICAL))
+			return true;
+	}
+
+	return false;
+}
+
 /* Passing level values greater than this will result in search termination */
 #define PPTT_ABORT_PACKAGE 0xFF
 
-static struct acpi_pptt_processor *acpi_find_processor_package_id(struct acpi_table_header *table_hdr,
-								  struct acpi_pptt_processor *cpu,
-								  int level, int flag)
+static struct acpi_pptt_processor *acpi_find_processor_tag(struct acpi_table_header *table_hdr,
+							   struct acpi_pptt_processor *cpu,
+							   int level, int flag)
 {
 	struct acpi_pptt_processor *prev_node;
 
 	while (cpu && level) {
-		if (cpu->flags & flag)
+		/* special case the identical flag to find last identical */
+		if (flag == ACPI_PPTT_ACPI_IDENTICAL) {
+			if (flag_identical(table_hdr, cpu))
+				break;
+		} else if (cpu->flags & flag)
 			break;
 		pr_debug("level %d\n", level);
 		prev_node = fetch_pptt_node(table_hdr, cpu->parent);
@@ -480,8 +503,8 @@ static int topology_get_acpi_cpu_tag(struct acpi_table_header *table,
 
 	cpu_node = acpi_find_processor_node(table, acpi_cpu_id);
 	if (cpu_node) {
-		cpu_node = acpi_find_processor_package_id(table, cpu_node,
-							  level, flag);
+		cpu_node = acpi_find_processor_tag(table, cpu_node,
+						   level, flag);
 		/*
 		 * As per specification if the processor structure represents
 		 * an actual processor, then ACPI processor ID must be valid.
-- 
cgit v1.2.3


From 56855a99f3d0d1e9f1f4e24f5851f9bf14c83296 Mon Sep 17 00:00:00 2001
From: Jeremy Linton <jeremy.linton@arm.com>
Date: Wed, 26 Jun 2019 16:37:16 -0500
Subject: ACPI/PPTT: Add function to return ACPI 6.3 Identical tokens

ACPI 6.3 adds a flag to indicate that child nodes are all
identical cores. This is useful to authoritatively determine
if a set of (possibly offline) cores are identical or not.

Since the flag doesn't give us a unique id we can generate
one and use it to create bitmaps of sibling nodes, or simply
in a loop to determine if a subset of cores are identical.

Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Tested-by: Hanjun Guo <hanjun.guo@linaro.org>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/acpi/pptt.c  | 26 ++++++++++++++++++++++++++
 include/linux/acpi.h |  5 +++++
 2 files changed, 31 insertions(+)

(limited to 'drivers')

diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c
index 05344413f199..1e7ac0bd0d3a 100644
--- a/drivers/acpi/pptt.c
+++ b/drivers/acpi/pptt.c
@@ -683,3 +683,29 @@ int find_acpi_cpu_topology_package(unsigned int cpu)
 	return find_acpi_cpu_topology_tag(cpu, PPTT_ABORT_PACKAGE,
 					  ACPI_PPTT_PHYSICAL_PACKAGE);
 }
+
+/**
+ * find_acpi_cpu_topology_hetero_id() - Get a core architecture tag
+ * @cpu: Kernel logical CPU number
+ *
+ * Determine a unique heterogeneous tag for the given CPU. CPUs with the same
+ * implementation should have matching tags.
+ *
+ * The returned tag can be used to group peers with identical implementation.
+ *
+ * The search terminates when a level is found with the identical implementation
+ * flag set or we reach a root node.
+ *
+ * Due to limitations in the PPTT data structure, there may be rare situations
+ * where two cores in a heterogeneous machine may be identical, but won't have
+ * the same tag.
+ *
+ * Return: -ENOENT if the PPTT doesn't exist, or the CPU cannot be found.
+ * Otherwise returns a value which represents a group of identical cores
+ * similar to this CPU.
+ */
+int find_acpi_cpu_topology_hetero_id(unsigned int cpu)
+{
+	return find_acpi_cpu_topology_tag(cpu, PPTT_ABORT_PACKAGE,
+					  ACPI_PPTT_ACPI_IDENTICAL);
+}
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index d315d86844e4..5bcd23e5ccd6 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -1303,6 +1303,7 @@ static inline int lpit_read_residency_count_address(u64 *address)
 #ifdef CONFIG_ACPI_PPTT
 int find_acpi_cpu_topology(unsigned int cpu, int level);
 int find_acpi_cpu_topology_package(unsigned int cpu);
+int find_acpi_cpu_topology_hetero_id(unsigned int cpu);
 int find_acpi_cpu_cache_topology(unsigned int cpu, int level);
 #else
 static inline int find_acpi_cpu_topology(unsigned int cpu, int level)
@@ -1313,6 +1314,10 @@ static inline int find_acpi_cpu_topology_package(unsigned int cpu)
 {
 	return -EINVAL;
 }
+static inline int find_acpi_cpu_topology_hetero_id(unsigned int cpu)
+{
+	return -EINVAL;
+}
 static inline int find_acpi_cpu_cache_topology(unsigned int cpu, int level)
 {
 	return -EINVAL;
-- 
cgit v1.2.3


From d24a0c7099b32b6981d7f126c45348e381718350 Mon Sep 17 00:00:00 2001
From: Jeremy Linton <jeremy.linton@arm.com>
Date: Wed, 26 Jun 2019 16:37:17 -0500
Subject: arm_pmu: acpi: spe: Add initial MADT/SPE probing

ACPI 6.3 adds additional fields to the MADT GICC
structure to describe SPE PPI's. We pick these out
of the cached reference to the madt_gicc structure
similarly to the core PMU code. We then create a platform
device referring to the IRQ and let the user/module loader
decide whether to load the SPE driver.

Tested-by: Hanjun Guo <hanjun.guo@linaro.org>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Reviewed-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/acpi.h |  3 ++
 drivers/perf/arm_pmu_acpi.c   | 72 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/perf/arm_pmu.h  |  2 ++
 3 files changed, 77 insertions(+)

(limited to 'drivers')

diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
index 7628efbe6c12..d10399b9f998 100644
--- a/arch/arm64/include/asm/acpi.h
+++ b/arch/arm64/include/asm/acpi.h
@@ -41,6 +41,9 @@
 	(!(entry) || (entry)->header.length < ACPI_MADT_GICC_MIN_LENGTH || \
 	(unsigned long)(entry) + (entry)->header.length > (end))
 
+#define ACPI_MADT_GICC_SPE  (ACPI_OFFSET(struct acpi_madt_generic_interrupt, \
+	spe_interrupt) + sizeof(u16))
+
 /* Basic configuration for ACPI */
 #ifdef	CONFIG_ACPI
 pgprot_t __acpi_get_mem_attribute(phys_addr_t addr);
diff --git a/drivers/perf/arm_pmu_acpi.c b/drivers/perf/arm_pmu_acpi.c
index 0f197516d708..864d7ebe45e9 100644
--- a/drivers/perf/arm_pmu_acpi.c
+++ b/drivers/perf/arm_pmu_acpi.c
@@ -74,6 +74,76 @@ static void arm_pmu_acpi_unregister_irq(int cpu)
 	acpi_unregister_gsi(gsi);
 }
 
+#if IS_ENABLED(CONFIG_ARM_SPE_PMU)
+static struct resource spe_resources[] = {
+	{
+		/* irq */
+		.flags          = IORESOURCE_IRQ,
+	}
+};
+
+static struct platform_device spe_dev = {
+	.name = ARMV8_SPE_PDEV_NAME,
+	.id = -1,
+	.resource = spe_resources,
+	.num_resources = ARRAY_SIZE(spe_resources)
+};
+
+/*
+ * For lack of a better place, hook the normal PMU MADT walk
+ * and create a SPE device if we detect a recent MADT with
+ * a homogeneous PPI mapping.
+ */
+static void arm_spe_acpi_register_device(void)
+{
+	int cpu, hetid, irq, ret;
+	bool first = true;
+	u16 gsi = 0;
+
+	/*
+	 * Sanity check all the GICC tables for the same interrupt number.
+	 * For now, we only support homogeneous ACPI/SPE machines.
+	 */
+	for_each_possible_cpu(cpu) {
+		struct acpi_madt_generic_interrupt *gicc;
+
+		gicc = acpi_cpu_get_madt_gicc(cpu);
+		if (gicc->header.length < ACPI_MADT_GICC_SPE)
+			return;
+
+		if (first) {
+			gsi = gicc->spe_interrupt;
+			if (!gsi)
+				return;
+			hetid = find_acpi_cpu_topology_hetero_id(cpu);
+			first = false;
+		} else if ((gsi != gicc->spe_interrupt) ||
+			   (hetid != find_acpi_cpu_topology_hetero_id(cpu))) {
+			pr_warn("ACPI: SPE must be homogeneous\n");
+			return;
+		}
+	}
+
+	irq = acpi_register_gsi(NULL, gsi, ACPI_LEVEL_SENSITIVE,
+				ACPI_ACTIVE_HIGH);
+	if (irq < 0) {
+		pr_warn("ACPI: SPE Unable to register interrupt: %d\n", gsi);
+		return;
+	}
+
+	spe_resources[0].start = irq;
+	ret = platform_device_register(&spe_dev);
+	if (ret < 0) {
+		pr_warn("ACPI: SPE: Unable to register device\n");
+		acpi_unregister_gsi(gsi);
+	}
+}
+#else
+static inline void arm_spe_acpi_register_device(void)
+{
+}
+#endif /* CONFIG_ARM_SPE_PMU */
+
 static int arm_pmu_acpi_parse_irqs(void)
 {
 	int irq, cpu, irq_cpu, err;
@@ -279,6 +349,8 @@ static int arm_pmu_acpi_init(void)
 	if (acpi_disabled)
 		return 0;
 
+	arm_spe_acpi_register_device();
+
 	ret = arm_pmu_acpi_parse_irqs();
 	if (ret)
 		return ret;
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index 4641e850b204..784bc58f165a 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -175,4 +175,6 @@ void armpmu_free_irq(int irq, int cpu);
 
 #endif /* CONFIG_ARM_PMU */
 
+#define ARMV8_SPE_PDEV_NAME "arm,spe-v1"
+
 #endif /* __ARM_PMU_H__ */
-- 
cgit v1.2.3


From d482e575fbf0f7ec9319bded951f21bbc84312bf Mon Sep 17 00:00:00 2001
From: Jeremy Linton <jeremy.linton@arm.com>
Date: Wed, 26 Jun 2019 16:37:18 -0500
Subject: perf: arm_spe: Enable ACPI/Platform automatic module loading

Lets add the MODULE_TABLE and platform id_table entries so that
the SPE driver can attach to the ACPI platform device created by
the core pmu code.

Tested-by: Hanjun Guo <hanjun.guo@linaro.org>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm_spe_pmu.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
index e120f933412a..4fb65c61c8ea 100644
--- a/drivers/perf/arm_spe_pmu.c
+++ b/drivers/perf/arm_spe_pmu.c
@@ -38,6 +38,7 @@
 #include <linux/of_address.h>
 #include <linux/of_device.h>
 #include <linux/perf_event.h>
+#include <linux/perf/arm_pmu.h>
 #include <linux/platform_device.h>
 #include <linux/printk.h>
 #include <linux/slab.h>
@@ -1168,7 +1169,13 @@ static const struct of_device_id arm_spe_pmu_of_match[] = {
 };
 MODULE_DEVICE_TABLE(of, arm_spe_pmu_of_match);
 
-static int arm_spe_pmu_device_dt_probe(struct platform_device *pdev)
+static const struct platform_device_id arm_spe_match[] = {
+	{ ARMV8_SPE_PDEV_NAME, 0},
+	{ }
+};
+MODULE_DEVICE_TABLE(platform, arm_spe_match);
+
+static int arm_spe_pmu_device_probe(struct platform_device *pdev)
 {
 	int ret;
 	struct arm_spe_pmu *spe_pmu;
@@ -1228,11 +1235,12 @@ static int arm_spe_pmu_device_remove(struct platform_device *pdev)
 }
 
 static struct platform_driver arm_spe_pmu_driver = {
+	.id_table = arm_spe_match,
 	.driver	= {
 		.name		= DRVNAME,
 		.of_match_table	= of_match_ptr(arm_spe_pmu_of_match),
 	},
-	.probe	= arm_spe_pmu_device_dt_probe,
+	.probe	= arm_spe_pmu_device_probe,
 	.remove	= arm_spe_pmu_device_remove,
 };
 
-- 
cgit v1.2.3