summaryrefslogtreecommitdiffstats
path: root/tools/perf/arch
diff options
context:
space:
mode:
authorKim Phillips <kim.phillips@arm.com>2018-01-14 13:28:50 -0600
committerArnaldo Carvalho de Melo <acme@redhat.com>2018-01-17 10:23:31 -0300
commitffd3d18c20b8df281a18940ee80a99b28114d4b7 (patch)
tree34dc21f7af0fe43bbe16a04aeff46c59fb668593 /tools/perf/arch
parentd777f8de99b05d399c0e4e51cdce016f26bd971b (diff)
downloadlinux-ffd3d18c20b8df281a18940ee80a99b28114d4b7.tar.bz2
perf tools: Add ARM Statistical Profiling Extensions (SPE) support
'perf record' and 'perf report --dump-raw-trace' supported in this release. Example usage: # perf record -e arm_spe/ts_enable=1,pa_enable=1/ dd if=/dev/zero of=/dev/null count=10000 # perf report --dump-raw-trace Note that the perf.data file is portable, so the report can be run on another architecture host if necessary. Output will contain raw SPE data and its textual representation, such as: 0x5c8 [0x30]: PERF_RECORD_AUXTRACE size: 0x200000 offset: 0 ref: 0x1891ad0e idx: 1 tid: 2227 cpu: 1 . . ... ARM SPE data: size 2097152 bytes . 00000000: 49 00 LD . 00000002: b2 c0 3b 29 0f 00 00 ff ff VA 0xffff00000f293bc0 . 0000000b: b3 c0 eb 24 fb 00 00 00 80 PA 0xfb24ebc0 ns=1 . 00000014: 9a 00 00 LAT 0 XLAT . 00000017: 42 16 EV RETIRED L1D-ACCESS TLB-ACCESS . 00000019: b0 00 c4 15 08 00 00 ff ff PC 0xff00000815c400 el3 ns=1 . 00000022: 98 00 00 LAT 0 TOT . 00000025: 71 36 6c 21 2c 09 00 00 00 TS 39395093558 . 0000002e: 49 00 LD . 00000030: b2 80 3c 29 0f 00 00 ff ff VA 0xffff00000f293c80 . 00000039: b3 80 ec 24 fb 00 00 00 80 PA 0xfb24ec80 ns=1 . 00000042: 9a 00 00 LAT 0 XLAT . 00000045: 42 16 EV RETIRED L1D-ACCESS TLB-ACCESS . 00000047: b0 f4 11 16 08 00 00 ff ff PC 0xff0000081611f4 el3 ns=1 . 00000050: 98 00 00 LAT 0 TOT . 00000053: 71 36 6c 21 2c 09 00 00 00 TS 39395093558 . 0000005c: 48 00 INSN-OTHER . 0000005e: 42 02 EV RETIRED . 00000060: b0 2c ef 7f 08 00 00 ff ff PC 0xff0000087fef2c el3 ns=1 . 00000069: 98 00 00 LAT 0 TOT . 0000006c: 71 d1 6f 21 2c 09 00 00 00 TS 39395094481 ... Other release notes: - applies to acme's perf/{core,urgent} branches, likely elsewhere - Report is self-contained within the tool. Record requires enabling the kernel SPE driver by setting CONFIG_ARM_SPE_PMU. - The intel-bts implementation was used as a starting point; its min/default/max buffer sizes and power of 2 pages granularity need to be revisited for ARM SPE - Recording across multiple SPE clusters/domains not supported - Snapshot support (record -S), and conversion to native perf events (e.g., via 'perf inject --itrace'), are also not supported - Technically both cs-etm and spe can be used simultaneously, however disabled for simplicity in this release Signed-off-by: Kim Phillips <kim.phillips@arm.com> Reviewed-by: Dongjiu Geng <gengdongjiu@huawei.com> Acked-by: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: linux-arm-kernel@lists.infradead.org Cc: Marc Zyngier <marc.zyngier@arm.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Mathieu Poirier <mathieu.poirier@linaro.org> Cc: Pawel Moll <pawel.moll@arm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Rob Herring <robh@kernel.org> Cc: Suzuki Poulouse <suzuki.poulose@arm.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Wang Nan <wangnan0@huawei.com> Cc: Will Deacon <will.deacon@arm.com> Link: http://lkml.kernel.org/r/20180114132850.0b127434b704a26bad13268f@arm.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf/arch')
-rw-r--r--tools/perf/arch/arm/util/auxtrace.c77
-rw-r--r--tools/perf/arch/arm/util/pmu.c6
-rw-r--r--tools/perf/arch/arm64/util/Build3
-rw-r--r--tools/perf/arch/arm64/util/arm-spe.c225
4 files changed, 304 insertions, 7 deletions
diff --git a/tools/perf/arch/arm/util/auxtrace.c b/tools/perf/arch/arm/util/auxtrace.c
index 8edf2cb71564..2323581b157d 100644
--- a/tools/perf/arch/arm/util/auxtrace.c
+++ b/tools/perf/arch/arm/util/auxtrace.c
@@ -22,6 +22,42 @@
#include "../../util/evlist.h"
#include "../../util/pmu.h"
#include "cs-etm.h"
+#include "arm-spe.h"
+
+static struct perf_pmu **find_all_arm_spe_pmus(int *nr_spes, int *err)
+{
+ struct perf_pmu **arm_spe_pmus = NULL;
+ int ret, i, nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+ /* arm_spe_xxxxxxxxx\0 */
+ char arm_spe_pmu_name[sizeof(ARM_SPE_PMU_NAME) + 10];
+
+ arm_spe_pmus = zalloc(sizeof(struct perf_pmu *) * nr_cpus);
+ if (!arm_spe_pmus) {
+ pr_err("spes alloc failed\n");
+ *err = -ENOMEM;
+ return NULL;
+ }
+
+ for (i = 0; i < nr_cpus; i++) {
+ ret = sprintf(arm_spe_pmu_name, "%s%d", ARM_SPE_PMU_NAME, i);
+ if (ret < 0) {
+ pr_err("sprintf failed\n");
+ *err = -ENOMEM;
+ return NULL;
+ }
+
+ arm_spe_pmus[*nr_spes] = perf_pmu__find(arm_spe_pmu_name);
+ if (arm_spe_pmus[*nr_spes]) {
+ pr_debug2("%s %d: arm_spe_pmu %d type %d name %s\n",
+ __func__, __LINE__, *nr_spes,
+ arm_spe_pmus[*nr_spes]->type,
+ arm_spe_pmus[*nr_spes]->name);
+ (*nr_spes)++;
+ }
+ }
+
+ return arm_spe_pmus;
+}
struct auxtrace_record
*auxtrace_record__init(struct perf_evlist *evlist, int *err)
@@ -29,22 +65,51 @@ struct auxtrace_record
struct perf_pmu *cs_etm_pmu;
struct perf_evsel *evsel;
bool found_etm = false;
+ bool found_spe = false;
+ static struct perf_pmu **arm_spe_pmus = NULL;
+ static int nr_spes = 0;
+ int i;
+
+ if (!evlist)
+ return NULL;
cs_etm_pmu = perf_pmu__find(CORESIGHT_ETM_PMU_NAME);
- if (evlist) {
- evlist__for_each_entry(evlist, evsel) {
- if (cs_etm_pmu &&
- evsel->attr.type == cs_etm_pmu->type)
- found_etm = true;
+ if (!arm_spe_pmus)
+ arm_spe_pmus = find_all_arm_spe_pmus(&nr_spes, err);
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (cs_etm_pmu &&
+ evsel->attr.type == cs_etm_pmu->type)
+ found_etm = true;
+
+ if (!nr_spes)
+ continue;
+
+ for (i = 0; i < nr_spes; i++) {
+ if (evsel->attr.type == arm_spe_pmus[i]->type) {
+ found_spe = true;
+ break;
+ }
}
}
+ if (found_etm && found_spe) {
+ pr_err("Concurrent ARM Coresight ETM and SPE operation not currently supported\n");
+ *err = -EOPNOTSUPP;
+ return NULL;
+ }
+
if (found_etm)
return cs_etm_record_init(err);
+#if defined(__aarch64__)
+ if (found_spe)
+ return arm_spe_recording_init(err, arm_spe_pmus[i]);
+#endif
+
/*
- * Clear 'err' even if we haven't found a cs_etm event - that way perf
+ * Clear 'err' even if we haven't found an event - that way perf
* record can still be used even if tracers aren't present. The NULL
* return value will take care of telling the infrastructure HW tracing
* isn't available.
diff --git a/tools/perf/arch/arm/util/pmu.c b/tools/perf/arch/arm/util/pmu.c
index 98d67399a0d6..ac4dffc807b8 100644
--- a/tools/perf/arch/arm/util/pmu.c
+++ b/tools/perf/arch/arm/util/pmu.c
@@ -20,6 +20,7 @@
#include <linux/perf_event.h>
#include "cs-etm.h"
+#include "arm-spe.h"
#include "../../util/pmu.h"
struct perf_event_attr
@@ -30,7 +31,12 @@ struct perf_event_attr
/* add ETM default config here */
pmu->selectable = true;
pmu->set_drv_config = cs_etm_set_drv_config;
+#if defined(__aarch64__)
+ } else if (strstarts(pmu->name, ARM_SPE_PMU_NAME)) {
+ return arm_spe_pmu_default_config(pmu);
+#endif
}
+
#endif
return NULL;
}
diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build
index e04f6cdd6f32..c0b8dfef98ba 100644
--- a/tools/perf/arch/arm64/util/Build
+++ b/tools/perf/arch/arm64/util/Build
@@ -5,4 +5,5 @@ libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
libperf-$(CONFIG_AUXTRACE) += ../../arm/util/pmu.o \
../../arm/util/auxtrace.o \
- ../../arm/util/cs-etm.o
+ ../../arm/util/cs-etm.o \
+ arm-spe.o
diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c
new file mode 100644
index 000000000000..1120e39c1b00
--- /dev/null
+++ b/tools/perf/arch/arm64/util/arm-spe.c
@@ -0,0 +1,225 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Arm Statistical Profiling Extensions (SPE) support
+ * Copyright (c) 2017-2018, Arm Ltd.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/log2.h>
+#include <time.h>
+
+#include "../../util/cpumap.h"
+#include "../../util/evsel.h"
+#include "../../util/evlist.h"
+#include "../../util/session.h"
+#include "../../util/util.h"
+#include "../../util/pmu.h"
+#include "../../util/debug.h"
+#include "../../util/auxtrace.h"
+#include "../../util/arm-spe.h"
+
+#define KiB(x) ((x) * 1024)
+#define MiB(x) ((x) * 1024 * 1024)
+
+struct arm_spe_recording {
+ struct auxtrace_record itr;
+ struct perf_pmu *arm_spe_pmu;
+ struct perf_evlist *evlist;
+};
+
+static size_t
+arm_spe_info_priv_size(struct auxtrace_record *itr __maybe_unused,
+ struct perf_evlist *evlist __maybe_unused)
+{
+ return ARM_SPE_AUXTRACE_PRIV_SIZE;
+}
+
+static int arm_spe_info_fill(struct auxtrace_record *itr,
+ struct perf_session *session,
+ struct auxtrace_info_event *auxtrace_info,
+ size_t priv_size)
+{
+ struct arm_spe_recording *sper =
+ container_of(itr, struct arm_spe_recording, itr);
+ struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu;
+
+ if (priv_size != ARM_SPE_AUXTRACE_PRIV_SIZE)
+ return -EINVAL;
+
+ if (!session->evlist->nr_mmaps)
+ return -EINVAL;
+
+ auxtrace_info->type = PERF_AUXTRACE_ARM_SPE;
+ auxtrace_info->priv[ARM_SPE_PMU_TYPE] = arm_spe_pmu->type;
+
+ return 0;
+}
+
+static int arm_spe_recording_options(struct auxtrace_record *itr,
+ struct perf_evlist *evlist,
+ struct record_opts *opts)
+{
+ struct arm_spe_recording *sper =
+ container_of(itr, struct arm_spe_recording, itr);
+ struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu;
+ struct perf_evsel *evsel, *arm_spe_evsel = NULL;
+ bool privileged = geteuid() == 0 || perf_event_paranoid() < 0;
+ struct perf_evsel *tracking_evsel;
+ int err;
+
+ sper->evlist = evlist;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->attr.type == arm_spe_pmu->type) {
+ if (arm_spe_evsel) {
+ pr_err("There may be only one " ARM_SPE_PMU_NAME "x event\n");
+ return -EINVAL;
+ }
+ evsel->attr.freq = 0;
+ evsel->attr.sample_period = 1;
+ arm_spe_evsel = evsel;
+ opts->full_auxtrace = true;
+ }
+ }
+
+ if (!opts->full_auxtrace)
+ return 0;
+
+ /* We are in full trace mode but '-m,xyz' wasn't specified */
+ if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) {
+ if (privileged) {
+ opts->auxtrace_mmap_pages = MiB(4) / page_size;
+ } else {
+ opts->auxtrace_mmap_pages = KiB(128) / page_size;
+ if (opts->mmap_pages == UINT_MAX)
+ opts->mmap_pages = KiB(256) / page_size;
+ }
+ }
+
+ /* Validate auxtrace_mmap_pages */
+ if (opts->auxtrace_mmap_pages) {
+ size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size;
+ size_t min_sz = KiB(8);
+
+ if (sz < min_sz || !is_power_of_2(sz)) {
+ pr_err("Invalid mmap size for ARM SPE: must be at least %zuKiB and a power of 2\n",
+ min_sz / 1024);
+ return -EINVAL;
+ }
+ }
+
+
+ /*
+ * To obtain the auxtrace buffer file descriptor, the auxtrace event
+ * must come first.
+ */
+ perf_evlist__to_front(evlist, arm_spe_evsel);
+
+ perf_evsel__set_sample_bit(arm_spe_evsel, CPU);
+ perf_evsel__set_sample_bit(arm_spe_evsel, TIME);
+ perf_evsel__set_sample_bit(arm_spe_evsel, TID);
+
+ /* Add dummy event to keep tracking */
+ err = parse_events(evlist, "dummy:u", NULL);
+ if (err)
+ return err;
+
+ tracking_evsel = perf_evlist__last(evlist);
+ perf_evlist__set_tracking_event(evlist, tracking_evsel);
+
+ tracking_evsel->attr.freq = 0;
+ tracking_evsel->attr.sample_period = 1;
+ perf_evsel__set_sample_bit(tracking_evsel, TIME);
+ perf_evsel__set_sample_bit(tracking_evsel, CPU);
+ perf_evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK);
+
+ return 0;
+}
+
+static u64 arm_spe_reference(struct auxtrace_record *itr __maybe_unused)
+{
+ struct timespec ts;
+
+ clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
+
+ return ts.tv_sec ^ ts.tv_nsec;
+}
+
+static void arm_spe_recording_free(struct auxtrace_record *itr)
+{
+ struct arm_spe_recording *sper =
+ container_of(itr, struct arm_spe_recording, itr);
+
+ free(sper);
+}
+
+static int arm_spe_read_finish(struct auxtrace_record *itr, int idx)
+{
+ struct arm_spe_recording *sper =
+ container_of(itr, struct arm_spe_recording, itr);
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(sper->evlist, evsel) {
+ if (evsel->attr.type == sper->arm_spe_pmu->type)
+ return perf_evlist__enable_event_idx(sper->evlist,
+ evsel, idx);
+ }
+ return -EINVAL;
+}
+
+struct auxtrace_record *arm_spe_recording_init(int *err,
+ struct perf_pmu *arm_spe_pmu)
+{
+ struct arm_spe_recording *sper;
+
+ if (!arm_spe_pmu) {
+ *err = -ENODEV;
+ return NULL;
+ }
+
+ sper = zalloc(sizeof(struct arm_spe_recording));
+ if (!sper) {
+ *err = -ENOMEM;
+ return NULL;
+ }
+
+ sper->arm_spe_pmu = arm_spe_pmu;
+ sper->itr.recording_options = arm_spe_recording_options;
+ sper->itr.info_priv_size = arm_spe_info_priv_size;
+ sper->itr.info_fill = arm_spe_info_fill;
+ sper->itr.free = arm_spe_recording_free;
+ sper->itr.reference = arm_spe_reference;
+ sper->itr.read_finish = arm_spe_read_finish;
+ sper->itr.alignment = 0;
+
+ return &sper->itr;
+}
+
+struct perf_event_attr
+*arm_spe_pmu_default_config(struct perf_pmu *arm_spe_pmu)
+{
+ struct perf_event_attr *attr;
+
+ attr = zalloc(sizeof(struct perf_event_attr));
+ if (!attr) {
+ pr_err("arm_spe default config cannot allocate a perf_event_attr\n");
+ return NULL;
+ }
+
+ /*
+ * If kernel driver doesn't advertise a minimum,
+ * use max allowable by PMSIDR_EL1.INTERVAL
+ */
+ if (perf_pmu__scan_file(arm_spe_pmu, "caps/min_interval", "%llu",
+ &attr->sample_period) != 1) {
+ pr_debug("arm_spe driver doesn't advertise a min. interval. Using 4096\n");
+ attr->sample_period = 4096;
+ }
+
+ arm_spe_pmu->selectable = true;
+ arm_spe_pmu->is_uncore = false;
+
+ return attr;
+}