From 9a66d36cc7ace8062bd703d1edfb99437a2ddf2b Mon Sep 17 00:00:00 2001 From: Frank Li Date: Wed, 1 May 2019 18:43:29 +0000 Subject: drivers/perf: imx_ddr: Add DDR performance counter support to perf Add DDR performance monitor support for iMX8QXP. The PMU consists of 3 programmable event counters and a single dedicated cycle counter. Example usage: $ perf stat -a -e \ imx8_ddr0/read-cycles/,imx8_ddr0/write-cycles/,imx8_ddr0/precharge/ ls - or - $ perf stat -a -e \ imx8_ddr0/cycles/,imx8_ddr0/read-access/,imx8_ddr0/write-access/ ls Other events are supported, and advertised via perf list. Reviewed-by: Andrey Smirnov Signed-off-by: Frank Li [will: rewrote commit message/kconfig and used #defines for dev/cpuhp names] Signed-off-by: Will Deacon --- drivers/perf/Kconfig | 8 + drivers/perf/Makefile | 1 + drivers/perf/fsl_imx8_ddr_perf.c | 554 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 563 insertions(+) create mode 100644 drivers/perf/fsl_imx8_ddr_perf.c (limited to 'drivers') diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index e4221a107dca..09ae8a970880 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -71,6 +71,14 @@ config ARM_DSU_PMU system, control logic. The PMU allows counting various events related to DSU. +config FSL_IMX8_DDR_PMU + tristate "Freescale i.MX8 DDR perf monitor" + depends on ARCH_MXC + help + Provides support for the DDR performance monitor in i.MX8, which + can give information about memory throughput and other related + events. + config HISI_PMU bool "HiSilicon SoC PMU" depends on ARM64 && ACPI diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile index 30489941f3d6..2ebb4de17815 100644 --- a/drivers/perf/Makefile +++ b/drivers/perf/Makefile @@ -5,6 +5,7 @@ obj-$(CONFIG_ARM_DSU_PMU) += arm_dsu_pmu.o obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o obj-$(CONFIG_ARM_SMMU_V3_PMU) += arm_smmuv3_pmu.o +obj-$(CONFIG_FSL_IMX8_DDR_PMU) += fsl_imx8_ddr_perf.o obj-$(CONFIG_HISI_PMU) += hisilicon/ obj-$(CONFIG_QCOM_L2_PMU) += qcom_l2_pmu.o obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c new file mode 100644 index 000000000000..63fe21600072 --- /dev/null +++ b/drivers/perf/fsl_imx8_ddr_perf.c @@ -0,0 +1,554 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2017 NXP + * Copyright 2016 Freescale Semiconductor, Inc. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define COUNTER_CNTL 0x0 +#define COUNTER_READ 0x20 + +#define COUNTER_DPCR1 0x30 + +#define CNTL_OVER 0x1 +#define CNTL_CLEAR 0x2 +#define CNTL_EN 0x4 +#define CNTL_EN_MASK 0xFFFFFFFB +#define CNTL_CLEAR_MASK 0xFFFFFFFD +#define CNTL_OVER_MASK 0xFFFFFFFE + +#define CNTL_CSV_SHIFT 24 +#define CNTL_CSV_MASK (0xFF << CNTL_CSV_SHIFT) + +#define EVENT_CYCLES_ID 0 +#define EVENT_CYCLES_COUNTER 0 +#define NUM_COUNTERS 4 + +#define to_ddr_pmu(p) container_of(p, struct ddr_pmu, pmu) + +#define DDR_PERF_DEV_NAME "imx8_ddr" +#define DDR_CPUHP_CB_NAME DDR_PERF_DEV_NAME "_perf_pmu" + +static DEFINE_IDA(ddr_ida); + +static const struct of_device_id imx_ddr_pmu_dt_ids[] = { + { .compatible = "fsl,imx8-ddr-pmu",}, + { .compatible = "fsl,imx8m-ddr-pmu",}, + { /* sentinel */ } +}; + +struct ddr_pmu { + struct pmu pmu; + void __iomem *base; + unsigned int cpu; + struct hlist_node node; + struct device *dev; + struct perf_event *events[NUM_COUNTERS]; + int active_events; + enum cpuhp_state cpuhp_state; + int irq; + int id; +}; + +static ssize_t ddr_perf_cpumask_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct ddr_pmu *pmu = dev_get_drvdata(dev); + + return cpumap_print_to_pagebuf(true, buf, cpumask_of(pmu->cpu)); +} + +static struct device_attribute ddr_perf_cpumask_attr = + __ATTR(cpumask, 0444, ddr_perf_cpumask_show, NULL); + +static struct attribute *ddr_perf_cpumask_attrs[] = { + &ddr_perf_cpumask_attr.attr, + NULL, +}; + +static struct attribute_group ddr_perf_cpumask_attr_group = { + .attrs = ddr_perf_cpumask_attrs, +}; + +static ssize_t +ddr_pmu_event_show(struct device *dev, struct device_attribute *attr, + char *page) +{ + struct perf_pmu_events_attr *pmu_attr; + + pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); + return sprintf(page, "event=0x%02llx\n", pmu_attr->id); +} + +#define IMX8_DDR_PMU_EVENT_ATTR(_name, _id) \ + (&((struct perf_pmu_events_attr[]) { \ + { .attr = __ATTR(_name, 0444, ddr_pmu_event_show, NULL),\ + .id = _id, } \ + })[0].attr.attr) + +static struct attribute *ddr_perf_events_attrs[] = { + IMX8_DDR_PMU_EVENT_ATTR(cycles, EVENT_CYCLES_ID), + IMX8_DDR_PMU_EVENT_ATTR(selfresh, 0x01), + IMX8_DDR_PMU_EVENT_ATTR(read-accesses, 0x04), + IMX8_DDR_PMU_EVENT_ATTR(write-accesses, 0x05), + IMX8_DDR_PMU_EVENT_ATTR(read-queue-depth, 0x08), + IMX8_DDR_PMU_EVENT_ATTR(write-queue-depth, 0x09), + IMX8_DDR_PMU_EVENT_ATTR(lp-read-credit-cnt, 0x10), + IMX8_DDR_PMU_EVENT_ATTR(hp-read-credit-cnt, 0x11), + IMX8_DDR_PMU_EVENT_ATTR(write-credit-cnt, 0x12), + IMX8_DDR_PMU_EVENT_ATTR(read-command, 0x20), + IMX8_DDR_PMU_EVENT_ATTR(write-command, 0x21), + IMX8_DDR_PMU_EVENT_ATTR(read-modify-write-command, 0x22), + IMX8_DDR_PMU_EVENT_ATTR(hp-read, 0x23), + IMX8_DDR_PMU_EVENT_ATTR(hp-req-nocredit, 0x24), + IMX8_DDR_PMU_EVENT_ATTR(hp-xact-credit, 0x25), + IMX8_DDR_PMU_EVENT_ATTR(lp-req-nocredit, 0x26), + IMX8_DDR_PMU_EVENT_ATTR(lp-xact-credit, 0x27), + IMX8_DDR_PMU_EVENT_ATTR(wr-xact-credit, 0x29), + IMX8_DDR_PMU_EVENT_ATTR(read-cycles, 0x2a), + IMX8_DDR_PMU_EVENT_ATTR(write-cycles, 0x2b), + IMX8_DDR_PMU_EVENT_ATTR(read-write-transition, 0x30), + IMX8_DDR_PMU_EVENT_ATTR(precharge, 0x31), + IMX8_DDR_PMU_EVENT_ATTR(activate, 0x32), + IMX8_DDR_PMU_EVENT_ATTR(load-mode, 0x33), + IMX8_DDR_PMU_EVENT_ATTR(perf-mwr, 0x34), + IMX8_DDR_PMU_EVENT_ATTR(read, 0x35), + IMX8_DDR_PMU_EVENT_ATTR(read-activate, 0x36), + IMX8_DDR_PMU_EVENT_ATTR(refresh, 0x37), + IMX8_DDR_PMU_EVENT_ATTR(write, 0x38), + IMX8_DDR_PMU_EVENT_ATTR(raw-hazard, 0x39), + NULL, +}; + +static struct attribute_group ddr_perf_events_attr_group = { + .name = "events", + .attrs = ddr_perf_events_attrs, +}; + +PMU_FORMAT_ATTR(event, "config:0-7"); + +static struct attribute *ddr_perf_format_attrs[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group ddr_perf_format_attr_group = { + .name = "format", + .attrs = ddr_perf_format_attrs, +}; + +static const struct attribute_group *attr_groups[] = { + &ddr_perf_events_attr_group, + &ddr_perf_format_attr_group, + &ddr_perf_cpumask_attr_group, + NULL, +}; + +static u32 ddr_perf_alloc_counter(struct ddr_pmu *pmu, int event) +{ + int i; + + /* + * Always map cycle event to counter 0 + * Cycles counter is dedicated for cycle event + * can't used for the other events + */ + if (event == EVENT_CYCLES_ID) { + if (pmu->events[EVENT_CYCLES_COUNTER] == NULL) + return EVENT_CYCLES_COUNTER; + else + return -ENOENT; + } + + for (i = 1; i < NUM_COUNTERS; i++) { + if (pmu->events[i] == NULL) + return i; + } + + return -ENOENT; +} + +static void ddr_perf_free_counter(struct ddr_pmu *pmu, int counter) +{ + pmu->events[counter] = NULL; +} + +static u32 ddr_perf_read_counter(struct ddr_pmu *pmu, int counter) +{ + return readl_relaxed(pmu->base + COUNTER_READ + counter * 4); +} + +static int ddr_perf_event_init(struct perf_event *event) +{ + struct ddr_pmu *pmu = to_ddr_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + struct perf_event *sibling; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) + return -EOPNOTSUPP; + + if (event->cpu < 0) { + dev_warn(pmu->dev, "Can't provide per-task data!\n"); + return -EOPNOTSUPP; + } + + /* + * We must NOT create groups containing mixed PMUs, although software + * events are acceptable (for example to create a CCN group + * periodically read when a hrtimer aka cpu-clock leader triggers). + */ + if (event->group_leader->pmu != event->pmu && + !is_software_event(event->group_leader)) + return -EINVAL; + + for_each_sibling_event(sibling, event->group_leader) { + if (sibling->pmu != event->pmu && + !is_software_event(sibling)) + return -EINVAL; + } + + event->cpu = pmu->cpu; + hwc->idx = -1; + + return 0; +} + + +static void ddr_perf_event_update(struct perf_event *event) +{ + struct ddr_pmu *pmu = to_ddr_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + u64 delta, prev_raw_count, new_raw_count; + int counter = hwc->idx; + + do { + prev_raw_count = local64_read(&hwc->prev_count); + new_raw_count = ddr_perf_read_counter(pmu, counter); + } while (local64_cmpxchg(&hwc->prev_count, prev_raw_count, + new_raw_count) != prev_raw_count); + + delta = (new_raw_count - prev_raw_count) & 0xFFFFFFFF; + + local64_add(delta, &event->count); +} + +static void ddr_perf_counter_enable(struct ddr_pmu *pmu, int config, + int counter, bool enable) +{ + u8 reg = counter * 4 + COUNTER_CNTL; + int val; + + if (enable) { + /* + * must disable first, then enable again + * otherwise, cycle counter will not work + * if previous state is enabled. + */ + writel(0, pmu->base + reg); + val = CNTL_EN | CNTL_CLEAR; + val |= FIELD_PREP(CNTL_CSV_MASK, config); + writel(val, pmu->base + reg); + } else { + /* Disable counter */ + writel(0, pmu->base + reg); + } +} + +static void ddr_perf_event_start(struct perf_event *event, int flags) +{ + struct ddr_pmu *pmu = to_ddr_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int counter = hwc->idx; + + local64_set(&hwc->prev_count, 0); + + ddr_perf_counter_enable(pmu, event->attr.config, counter, true); + + hwc->state = 0; +} + +static int ddr_perf_event_add(struct perf_event *event, int flags) +{ + struct ddr_pmu *pmu = to_ddr_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int counter; + int cfg = event->attr.config; + + counter = ddr_perf_alloc_counter(pmu, cfg); + if (counter < 0) { + dev_dbg(pmu->dev, "There are not enough counters\n"); + return -EOPNOTSUPP; + } + + pmu->events[counter] = event; + pmu->active_events++; + hwc->idx = counter; + + hwc->state |= PERF_HES_STOPPED; + + if (flags & PERF_EF_START) + ddr_perf_event_start(event, flags); + + return 0; +} + +static void ddr_perf_event_stop(struct perf_event *event, int flags) +{ + struct ddr_pmu *pmu = to_ddr_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int counter = hwc->idx; + + ddr_perf_counter_enable(pmu, event->attr.config, counter, false); + ddr_perf_event_update(event); + + hwc->state |= PERF_HES_STOPPED; +} + +static void ddr_perf_event_del(struct perf_event *event, int flags) +{ + struct ddr_pmu *pmu = to_ddr_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int counter = hwc->idx; + + ddr_perf_event_stop(event, PERF_EF_UPDATE); + + ddr_perf_free_counter(pmu, counter); + pmu->active_events--; + hwc->idx = -1; +} + +static void ddr_perf_pmu_enable(struct pmu *pmu) +{ + struct ddr_pmu *ddr_pmu = to_ddr_pmu(pmu); + + /* enable cycle counter if cycle is not active event list */ + if (ddr_pmu->events[EVENT_CYCLES_COUNTER] == NULL) + ddr_perf_counter_enable(ddr_pmu, + EVENT_CYCLES_ID, + EVENT_CYCLES_COUNTER, + true); +} + +static void ddr_perf_pmu_disable(struct pmu *pmu) +{ + struct ddr_pmu *ddr_pmu = to_ddr_pmu(pmu); + + if (ddr_pmu->events[EVENT_CYCLES_COUNTER] == NULL) + ddr_perf_counter_enable(ddr_pmu, + EVENT_CYCLES_ID, + EVENT_CYCLES_COUNTER, + false); +} + +static int ddr_perf_init(struct ddr_pmu *pmu, void __iomem *base, + struct device *dev) +{ + *pmu = (struct ddr_pmu) { + .pmu = (struct pmu) { + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + .task_ctx_nr = perf_invalid_context, + .attr_groups = attr_groups, + .event_init = ddr_perf_event_init, + .add = ddr_perf_event_add, + .del = ddr_perf_event_del, + .start = ddr_perf_event_start, + .stop = ddr_perf_event_stop, + .read = ddr_perf_event_update, + .pmu_enable = ddr_perf_pmu_enable, + .pmu_disable = ddr_perf_pmu_disable, + }, + .base = base, + .dev = dev, + }; + + pmu->id = ida_simple_get(&ddr_ida, 0, 0, GFP_KERNEL); + return pmu->id; +} + +static irqreturn_t ddr_perf_irq_handler(int irq, void *p) +{ + int i; + struct ddr_pmu *pmu = (struct ddr_pmu *) p; + struct perf_event *event, *cycle_event = NULL; + + /* all counter will stop if cycle counter disabled */ + ddr_perf_counter_enable(pmu, + EVENT_CYCLES_ID, + EVENT_CYCLES_COUNTER, + false); + /* + * When the cycle counter overflows, all counters are stopped, + * and an IRQ is raised. If any other counter overflows, it + * continues counting, and no IRQ is raised. + * + * Cycles occur at least 4 times as often as other events, so we + * can update all events on a cycle counter overflow and not + * lose events. + * + */ + for (i = 0; i < NUM_COUNTERS; i++) { + + if (!pmu->events[i]) + continue; + + event = pmu->events[i]; + + ddr_perf_event_update(event); + + if (event->hw.idx == EVENT_CYCLES_COUNTER) + cycle_event = event; + } + + ddr_perf_counter_enable(pmu, + EVENT_CYCLES_ID, + EVENT_CYCLES_COUNTER, + true); + if (cycle_event) + ddr_perf_event_update(cycle_event); + + return IRQ_HANDLED; +} + +static int ddr_perf_offline_cpu(unsigned int cpu, struct hlist_node *node) +{ + struct ddr_pmu *pmu = hlist_entry_safe(node, struct ddr_pmu, node); + int target; + + if (cpu != pmu->cpu) + return 0; + + target = cpumask_any_but(cpu_online_mask, cpu); + if (target >= nr_cpu_ids) + return 0; + + perf_pmu_migrate_context(&pmu->pmu, cpu, target); + pmu->cpu = target; + + WARN_ON(irq_set_affinity_hint(pmu->irq, cpumask_of(pmu->cpu))); + + return 0; +} + +static int ddr_perf_probe(struct platform_device *pdev) +{ + struct ddr_pmu *pmu; + struct device_node *np; + void __iomem *base; + char *name; + int num; + int ret; + int irq; + + base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(base)) + return PTR_ERR(base); + + np = pdev->dev.of_node; + + pmu = devm_kzalloc(&pdev->dev, sizeof(*pmu), GFP_KERNEL); + if (!pmu) + return -ENOMEM; + + num = ddr_perf_init(pmu, base, &pdev->dev); + + platform_set_drvdata(pdev, pmu); + + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, DDR_PERF_DEV_NAME "%d", + num); + if (!name) + return -ENOMEM; + + pmu->cpu = raw_smp_processor_id(); + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, + DDR_CPUHP_CB_NAME, + NULL, + ddr_perf_offline_cpu); + + if (ret < 0) { + dev_err(&pdev->dev, "cpuhp_setup_state_multi failed\n"); + goto ddr_perf_err; + } + + pmu->cpuhp_state = ret; + + /* Register the pmu instance for cpu hotplug */ + cpuhp_state_add_instance_nocalls(pmu->cpuhp_state, &pmu->node); + + /* Request irq */ + irq = of_irq_get(np, 0); + if (irq < 0) { + dev_err(&pdev->dev, "Failed to get irq: %d", irq); + ret = irq; + goto ddr_perf_err; + } + + ret = devm_request_irq(&pdev->dev, irq, + ddr_perf_irq_handler, + IRQF_NOBALANCING | IRQF_NO_THREAD, + DDR_CPUHP_CB_NAME, + pmu); + if (ret < 0) { + dev_err(&pdev->dev, "Request irq failed: %d", ret); + goto ddr_perf_err; + } + + pmu->irq = irq; + ret = irq_set_affinity_hint(pmu->irq, cpumask_of(pmu->cpu)); + if (ret) { + dev_err(pmu->dev, "Failed to set interrupt affinity!\n"); + goto ddr_perf_err; + } + + ret = perf_pmu_register(&pmu->pmu, name, -1); + if (ret) + goto ddr_perf_err; + + return 0; + +ddr_perf_err: + if (pmu->cpuhp_state) + cpuhp_state_remove_instance_nocalls(pmu->cpuhp_state, &pmu->node); + + ida_simple_remove(&ddr_ida, pmu->id); + dev_warn(&pdev->dev, "i.MX8 DDR Perf PMU failed (%d), disabled\n", ret); + return ret; +} + +static int ddr_perf_remove(struct platform_device *pdev) +{ + struct ddr_pmu *pmu = platform_get_drvdata(pdev); + + cpuhp_state_remove_instance_nocalls(pmu->cpuhp_state, &pmu->node); + irq_set_affinity_hint(pmu->irq, NULL); + + perf_pmu_unregister(&pmu->pmu); + + ida_simple_remove(&ddr_ida, pmu->id); + return 0; +} + +static struct platform_driver imx_ddr_pmu_driver = { + .driver = { + .name = "imx-ddr-pmu", + .of_match_table = imx_ddr_pmu_dt_ids, + }, + .probe = ddr_perf_probe, + .remove = ddr_perf_remove, +}; + +module_platform_driver(imx_ddr_pmu_driver); +MODULE_LICENSE("GPL v2"); -- cgit v1.2.3 From ed2b664fcc8073c09394393756df3fc86977bbac Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Wed, 26 Jun 2019 16:37:15 -0500 Subject: ACPI/PPTT: Modify node flag detection to find last IDENTICAL The ACPI specification implies that the IDENTICAL flag should be set on all non leaf nodes where the children are identical. This means that we need to be searching for the last node with the identical flag set rather than the first one. Since this flag is also dependent on the table revision, we need to add a bit of extra code to verify the table revision, and the next node's state in the traversal. Since we want to avoid function pointers here, lets just special case the IDENTICAL flag. Acked-by: Rafael J. Wysocki Tested-by: Hanjun Guo Reviewed-by: Sudeep Holla Signed-off-by: Jeremy Linton Signed-off-by: Will Deacon --- drivers/acpi/pptt.c | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c index b72e6afaa8fb..05344413f199 100644 --- a/drivers/acpi/pptt.c +++ b/drivers/acpi/pptt.c @@ -432,17 +432,40 @@ static void cache_setup_acpi_cpu(struct acpi_table_header *table, } } +static bool flag_identical(struct acpi_table_header *table_hdr, + struct acpi_pptt_processor *cpu) +{ + struct acpi_pptt_processor *next; + + /* heterogeneous machines must use PPTT revision > 1 */ + if (table_hdr->revision < 2) + return false; + + /* Locate the last node in the tree with IDENTICAL set */ + if (cpu->flags & ACPI_PPTT_ACPI_IDENTICAL) { + next = fetch_pptt_node(table_hdr, cpu->parent); + if (!(next && next->flags & ACPI_PPTT_ACPI_IDENTICAL)) + return true; + } + + return false; +} + /* Passing level values greater than this will result in search termination */ #define PPTT_ABORT_PACKAGE 0xFF -static struct acpi_pptt_processor *acpi_find_processor_package_id(struct acpi_table_header *table_hdr, - struct acpi_pptt_processor *cpu, - int level, int flag) +static struct acpi_pptt_processor *acpi_find_processor_tag(struct acpi_table_header *table_hdr, + struct acpi_pptt_processor *cpu, + int level, int flag) { struct acpi_pptt_processor *prev_node; while (cpu && level) { - if (cpu->flags & flag) + /* special case the identical flag to find last identical */ + if (flag == ACPI_PPTT_ACPI_IDENTICAL) { + if (flag_identical(table_hdr, cpu)) + break; + } else if (cpu->flags & flag) break; pr_debug("level %d\n", level); prev_node = fetch_pptt_node(table_hdr, cpu->parent); @@ -480,8 +503,8 @@ static int topology_get_acpi_cpu_tag(struct acpi_table_header *table, cpu_node = acpi_find_processor_node(table, acpi_cpu_id); if (cpu_node) { - cpu_node = acpi_find_processor_package_id(table, cpu_node, - level, flag); + cpu_node = acpi_find_processor_tag(table, cpu_node, + level, flag); /* * As per specification if the processor structure represents * an actual processor, then ACPI processor ID must be valid. -- cgit v1.2.3 From 56855a99f3d0d1e9f1f4e24f5851f9bf14c83296 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Wed, 26 Jun 2019 16:37:16 -0500 Subject: ACPI/PPTT: Add function to return ACPI 6.3 Identical tokens ACPI 6.3 adds a flag to indicate that child nodes are all identical cores. This is useful to authoritatively determine if a set of (possibly offline) cores are identical or not. Since the flag doesn't give us a unique id we can generate one and use it to create bitmaps of sibling nodes, or simply in a loop to determine if a subset of cores are identical. Acked-by: Rafael J. Wysocki Tested-by: Hanjun Guo Reviewed-by: Sudeep Holla Signed-off-by: Jeremy Linton Signed-off-by: Will Deacon --- drivers/acpi/pptt.c | 26 ++++++++++++++++++++++++++ include/linux/acpi.h | 5 +++++ 2 files changed, 31 insertions(+) (limited to 'drivers') diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c index 05344413f199..1e7ac0bd0d3a 100644 --- a/drivers/acpi/pptt.c +++ b/drivers/acpi/pptt.c @@ -683,3 +683,29 @@ int find_acpi_cpu_topology_package(unsigned int cpu) return find_acpi_cpu_topology_tag(cpu, PPTT_ABORT_PACKAGE, ACPI_PPTT_PHYSICAL_PACKAGE); } + +/** + * find_acpi_cpu_topology_hetero_id() - Get a core architecture tag + * @cpu: Kernel logical CPU number + * + * Determine a unique heterogeneous tag for the given CPU. CPUs with the same + * implementation should have matching tags. + * + * The returned tag can be used to group peers with identical implementation. + * + * The search terminates when a level is found with the identical implementation + * flag set or we reach a root node. + * + * Due to limitations in the PPTT data structure, there may be rare situations + * where two cores in a heterogeneous machine may be identical, but won't have + * the same tag. + * + * Return: -ENOENT if the PPTT doesn't exist, or the CPU cannot be found. + * Otherwise returns a value which represents a group of identical cores + * similar to this CPU. + */ +int find_acpi_cpu_topology_hetero_id(unsigned int cpu) +{ + return find_acpi_cpu_topology_tag(cpu, PPTT_ABORT_PACKAGE, + ACPI_PPTT_ACPI_IDENTICAL); +} diff --git a/include/linux/acpi.h b/include/linux/acpi.h index d315d86844e4..5bcd23e5ccd6 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1303,6 +1303,7 @@ static inline int lpit_read_residency_count_address(u64 *address) #ifdef CONFIG_ACPI_PPTT int find_acpi_cpu_topology(unsigned int cpu, int level); int find_acpi_cpu_topology_package(unsigned int cpu); +int find_acpi_cpu_topology_hetero_id(unsigned int cpu); int find_acpi_cpu_cache_topology(unsigned int cpu, int level); #else static inline int find_acpi_cpu_topology(unsigned int cpu, int level) @@ -1313,6 +1314,10 @@ static inline int find_acpi_cpu_topology_package(unsigned int cpu) { return -EINVAL; } +static inline int find_acpi_cpu_topology_hetero_id(unsigned int cpu) +{ + return -EINVAL; +} static inline int find_acpi_cpu_cache_topology(unsigned int cpu, int level) { return -EINVAL; -- cgit v1.2.3 From d24a0c7099b32b6981d7f126c45348e381718350 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Wed, 26 Jun 2019 16:37:17 -0500 Subject: arm_pmu: acpi: spe: Add initial MADT/SPE probing ACPI 6.3 adds additional fields to the MADT GICC structure to describe SPE PPI's. We pick these out of the cached reference to the madt_gicc structure similarly to the core PMU code. We then create a platform device referring to the IRQ and let the user/module loader decide whether to load the SPE driver. Tested-by: Hanjun Guo Reviewed-by: Sudeep Holla Reviewed-by: Lorenzo Pieralisi Signed-off-by: Jeremy Linton Signed-off-by: Will Deacon --- arch/arm64/include/asm/acpi.h | 3 ++ drivers/perf/arm_pmu_acpi.c | 72 +++++++++++++++++++++++++++++++++++++++++++ include/linux/perf/arm_pmu.h | 2 ++ 3 files changed, 77 insertions(+) (limited to 'drivers') diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index 7628efbe6c12..d10399b9f998 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -41,6 +41,9 @@ (!(entry) || (entry)->header.length < ACPI_MADT_GICC_MIN_LENGTH || \ (unsigned long)(entry) + (entry)->header.length > (end)) +#define ACPI_MADT_GICC_SPE (ACPI_OFFSET(struct acpi_madt_generic_interrupt, \ + spe_interrupt) + sizeof(u16)) + /* Basic configuration for ACPI */ #ifdef CONFIG_ACPI pgprot_t __acpi_get_mem_attribute(phys_addr_t addr); diff --git a/drivers/perf/arm_pmu_acpi.c b/drivers/perf/arm_pmu_acpi.c index 0f197516d708..864d7ebe45e9 100644 --- a/drivers/perf/arm_pmu_acpi.c +++ b/drivers/perf/arm_pmu_acpi.c @@ -74,6 +74,76 @@ static void arm_pmu_acpi_unregister_irq(int cpu) acpi_unregister_gsi(gsi); } +#if IS_ENABLED(CONFIG_ARM_SPE_PMU) +static struct resource spe_resources[] = { + { + /* irq */ + .flags = IORESOURCE_IRQ, + } +}; + +static struct platform_device spe_dev = { + .name = ARMV8_SPE_PDEV_NAME, + .id = -1, + .resource = spe_resources, + .num_resources = ARRAY_SIZE(spe_resources) +}; + +/* + * For lack of a better place, hook the normal PMU MADT walk + * and create a SPE device if we detect a recent MADT with + * a homogeneous PPI mapping. + */ +static void arm_spe_acpi_register_device(void) +{ + int cpu, hetid, irq, ret; + bool first = true; + u16 gsi = 0; + + /* + * Sanity check all the GICC tables for the same interrupt number. + * For now, we only support homogeneous ACPI/SPE machines. + */ + for_each_possible_cpu(cpu) { + struct acpi_madt_generic_interrupt *gicc; + + gicc = acpi_cpu_get_madt_gicc(cpu); + if (gicc->header.length < ACPI_MADT_GICC_SPE) + return; + + if (first) { + gsi = gicc->spe_interrupt; + if (!gsi) + return; + hetid = find_acpi_cpu_topology_hetero_id(cpu); + first = false; + } else if ((gsi != gicc->spe_interrupt) || + (hetid != find_acpi_cpu_topology_hetero_id(cpu))) { + pr_warn("ACPI: SPE must be homogeneous\n"); + return; + } + } + + irq = acpi_register_gsi(NULL, gsi, ACPI_LEVEL_SENSITIVE, + ACPI_ACTIVE_HIGH); + if (irq < 0) { + pr_warn("ACPI: SPE Unable to register interrupt: %d\n", gsi); + return; + } + + spe_resources[0].start = irq; + ret = platform_device_register(&spe_dev); + if (ret < 0) { + pr_warn("ACPI: SPE: Unable to register device\n"); + acpi_unregister_gsi(gsi); + } +} +#else +static inline void arm_spe_acpi_register_device(void) +{ +} +#endif /* CONFIG_ARM_SPE_PMU */ + static int arm_pmu_acpi_parse_irqs(void) { int irq, cpu, irq_cpu, err; @@ -279,6 +349,8 @@ static int arm_pmu_acpi_init(void) if (acpi_disabled) return 0; + arm_spe_acpi_register_device(); + ret = arm_pmu_acpi_parse_irqs(); if (ret) return ret; diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 4641e850b204..784bc58f165a 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -175,4 +175,6 @@ void armpmu_free_irq(int irq, int cpu); #endif /* CONFIG_ARM_PMU */ +#define ARMV8_SPE_PDEV_NAME "arm,spe-v1" + #endif /* __ARM_PMU_H__ */ -- cgit v1.2.3 From d482e575fbf0f7ec9319bded951f21bbc84312bf Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Wed, 26 Jun 2019 16:37:18 -0500 Subject: perf: arm_spe: Enable ACPI/Platform automatic module loading Lets add the MODULE_TABLE and platform id_table entries so that the SPE driver can attach to the ACPI platform device created by the core pmu code. Tested-by: Hanjun Guo Reviewed-by: Sudeep Holla Signed-off-by: Jeremy Linton Signed-off-by: Will Deacon --- drivers/perf/arm_spe_pmu.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c index e120f933412a..4fb65c61c8ea 100644 --- a/drivers/perf/arm_spe_pmu.c +++ b/drivers/perf/arm_spe_pmu.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -1168,7 +1169,13 @@ static const struct of_device_id arm_spe_pmu_of_match[] = { }; MODULE_DEVICE_TABLE(of, arm_spe_pmu_of_match); -static int arm_spe_pmu_device_dt_probe(struct platform_device *pdev) +static const struct platform_device_id arm_spe_match[] = { + { ARMV8_SPE_PDEV_NAME, 0}, + { } +}; +MODULE_DEVICE_TABLE(platform, arm_spe_match); + +static int arm_spe_pmu_device_probe(struct platform_device *pdev) { int ret; struct arm_spe_pmu *spe_pmu; @@ -1228,11 +1235,12 @@ static int arm_spe_pmu_device_remove(struct platform_device *pdev) } static struct platform_driver arm_spe_pmu_driver = { + .id_table = arm_spe_match, .driver = { .name = DRVNAME, .of_match_table = of_match_ptr(arm_spe_pmu_of_match), }, - .probe = arm_spe_pmu_device_dt_probe, + .probe = arm_spe_pmu_device_probe, .remove = arm_spe_pmu_device_remove, }; -- cgit v1.2.3