diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-09-05 11:56:18 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-09-05 11:56:18 -0700 |
commit | 27151f177827d478508e756c7657273261aaf8a9 (patch) | |
tree | 393ee6f1b12842c87461c471c0da70e4ecd93da8 /tools/perf/arch | |
parent | 58ca24158758f1784400d32743373d7d6227d018 (diff) | |
parent | c7a3828d98db2730079265b5f51933dfcef8bb5f (diff) | |
download | linux-27151f177827d478508e756c7657273261aaf8a9.tar.bz2 |
Merge tag 'perf-tools-for-v5.15-2021-09-04' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux
Pull perf tool updates from Arnaldo Carvalho de Melo:
"New features:
- Improvements for the flamegraph python script, including:
- Display perf.data header
- Display PIDs of user stacks
- Added option to change color scheme
- Default to blue/green color scheme to improve accessibility
- Correctly identify kernel stacks when debuginfo is available
- Improvements for 'perf bench futex':
- Add --mlockall parameter
- Add --broadcast and --pi to the 'requeue' sub benchmark
- Add support for PMU aliases.
- Introduce an ARM Coresight ETE decoder.
- Add a 'perf bench' entry for evlist open/close operations, to help
quantify improvements with multithreading 'perf record'.
- Allow reporting the [un]throttle PERF_RECORD_ meta event in 'perf
script's python scripting.
- Add a 'perf test' entry for PMU aliases.
- Add a 'perf test' entry for 'perf record/perf report/perf script'
pipe mode.
Fixes:
- perf script dlfilter (API for filtering via dynamically loaded
shared object introduced in v5.14) fixes and a 'perf test' entry
for it.
- Fix get_current_dir_name() compilation on Android.
- Fix issues with asciidoc and double dashes uses.
- Fix memory leaks in the BTF handling code.
- Fix leftover problems in the Documentation from the infrastructure
originally lifted from the git codebase.
- Fix *probe_vfs_getname.sh 'perf test' failures.
- Handle fd gaps in 'perf test's test__dso_data_reopen().
- Make sure to show disasembly warnings for 'perf annotate --stdio'.
- Fix output from pipe to file and vice-versa in 'perf
record/report/script'.
- Correct 'perf data -h' output.
- Fix wrong comm in system-wide mode with 'perf record --delay'.
- Do not allow --for-each-cgroup without cpu in 'perf stat'
- Make 'perf test --skip' work on shell tests.
- Fix libperf's verbose printing.
Misc improvements:
- Preparatory patches for multithreading various 'perf record' phases
(synthesizing, opening, recording, etc).
- Add sparse context/locking annotations in compiler-types.h, also to
help with the multithreading effort.
- Optimize the generation of the arch specific erno tables used in
'perf trace'.
- Optimize libperf's perf_cpu_map__max().
- Improve ARM's CoreSight warnings.
- Report collisions in AUX records.
- Improve warnings for the LLVM 'perf test' entry.
- Improve the PMU events 'perf test' codebase.
- perf test: Do not compare overheads in the zstd comp test
- Better support annotation on ARM.
- Update 'perf trace's cmd string table to decode sys_bpf() first
arg.
Vendor events:
- Add JSON events and metrics for Intel's Ice Lake, Tiger Lake and
Elhart Lake.
- Update JSON eventsand metrics for Intel's Cascade Lake and Sky Lake
servers.
Hardware tracing:
- Improvements for the ARM hardware tracing auxtrace support"
* tag 'perf-tools-for-v5.15-2021-09-04' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (130 commits)
perf tests: Add test for PMU aliases
perf pmu: Add PMU alias support
perf session: Report collisions in AUX records
perf script python: Allow reporting the [un]throttle PERF_RECORD_ meta event
perf build: Report failure for testing feature libopencsd
perf cs-etm: Show a warning for an unknown magic number
perf cs-etm: Print the decoder name
perf cs-etm: Create ETE decoder
perf cs-etm: Update OpenCSD decoder for ETE
perf cs-etm: Fix typo
perf cs-etm: Save TRCDEVARCH register
perf cs-etm: Refactor out ETMv4 header saving
perf cs-etm: Initialise architecture based on TRCIDR1
perf cs-etm: Refactor initialisation of decoder params.
tools build: Fix feature detect clean for out of source builds
perf evlist: Add evlist__for_each_entry_from() macro
perf evsel: Handle precise_ip fallback in evsel__open_cpu()
perf evsel: Move bpf_counter__install_pe() to success path in evsel__open_cpu()
perf evsel: Move test_attr__open() to success path in evsel__open_cpu()
perf evsel: Move ignore_missing_thread() to fallback code
...
Diffstat (limited to 'tools/perf/arch')
-rw-r--r-- | tools/perf/arch/arm/util/auxtrace.c | 32 | ||||
-rw-r--r-- | tools/perf/arch/arm/util/cs-etm.c | 97 | ||||
-rw-r--r-- | tools/perf/arch/x86/util/pmu.c | 155 |
3 files changed, 254 insertions, 30 deletions
diff --git a/tools/perf/arch/arm/util/auxtrace.c b/tools/perf/arch/arm/util/auxtrace.c index b187bddbd01a..c7c7ec0812d5 100644 --- a/tools/perf/arch/arm/util/auxtrace.c +++ b/tools/perf/arch/arm/util/auxtrace.c @@ -107,3 +107,35 @@ struct auxtrace_record *err = 0; return NULL; } + +#if defined(__arm__) +u64 compat_auxtrace_mmap__read_head(struct auxtrace_mmap *mm) +{ + struct perf_event_mmap_page *pc = mm->userpg; + u64 result; + + __asm__ __volatile__( +" ldrd %0, %H0, [%1]" + : "=&r" (result) + : "r" (&pc->aux_head), "Qo" (pc->aux_head) + ); + + return result; +} + +int compat_auxtrace_mmap__write_tail(struct auxtrace_mmap *mm, u64 tail) +{ + struct perf_event_mmap_page *pc = mm->userpg; + + /* Ensure all reads are done before we write the tail out */ + smp_mb(); + + __asm__ __volatile__( +" strd %2, %H2, [%1]" + : "=Qo" (pc->aux_tail) + : "r" (&pc->aux_tail), "r" (tail) + ); + + return 0; +} +#endif diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index 85168d87b2d7..515aae470e23 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -47,15 +47,17 @@ static const char *metadata_etmv3_ro[CS_ETM_PRIV_MAX] = { [CS_ETM_ETMIDR] = "mgmt/etmidr", }; -static const char *metadata_etmv4_ro[CS_ETMV4_PRIV_MAX] = { +static const char * const metadata_etmv4_ro[] = { [CS_ETMV4_TRCIDR0] = "trcidr/trcidr0", [CS_ETMV4_TRCIDR1] = "trcidr/trcidr1", [CS_ETMV4_TRCIDR2] = "trcidr/trcidr2", [CS_ETMV4_TRCIDR8] = "trcidr/trcidr8", [CS_ETMV4_TRCAUTHSTATUS] = "mgmt/trcauthstatus", + [CS_ETE_TRCDEVARCH] = "mgmt/trcdevarch" }; static bool cs_etm_is_etmv4(struct auxtrace_record *itr, int cpu); +static bool cs_etm_is_ete(struct auxtrace_record *itr, int cpu); static int cs_etm_set_context_id(struct auxtrace_record *itr, struct evsel *evsel, int cpu) @@ -73,7 +75,7 @@ static int cs_etm_set_context_id(struct auxtrace_record *itr, if (!cs_etm_is_etmv4(itr, cpu)) goto out; - /* Get a handle on TRCIRD2 */ + /* Get a handle on TRCIDR2 */ snprintf(path, PATH_MAX, "cpu%d/%s", cpu, metadata_etmv4_ro[CS_ETMV4_TRCIDR2]); err = perf_pmu__scan_file(cs_etm_pmu, path, "%x", &val); @@ -533,7 +535,7 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused, struct evlist *evlist __maybe_unused) { int i; - int etmv3 = 0, etmv4 = 0; + int etmv3 = 0, etmv4 = 0, ete = 0; struct perf_cpu_map *event_cpus = evlist->core.cpus; struct perf_cpu_map *online_cpus = perf_cpu_map__new(NULL); @@ -544,7 +546,9 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused, !cpu_map__has(online_cpus, i)) continue; - if (cs_etm_is_etmv4(itr, i)) + if (cs_etm_is_ete(itr, i)) + ete++; + else if (cs_etm_is_etmv4(itr, i)) etmv4++; else etmv3++; @@ -555,7 +559,9 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused, if (!cpu_map__has(online_cpus, i)) continue; - if (cs_etm_is_etmv4(itr, i)) + if (cs_etm_is_ete(itr, i)) + ete++; + else if (cs_etm_is_etmv4(itr, i)) etmv4++; else etmv3++; @@ -565,6 +571,7 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused, perf_cpu_map__put(online_cpus); return (CS_ETM_HEADER_SIZE + + (ete * CS_ETE_PRIV_SIZE) + (etmv4 * CS_ETMV4_PRIV_SIZE) + (etmv3 * CS_ETMV3_PRIV_SIZE)); } @@ -607,6 +614,49 @@ static int cs_etm_get_ro(struct perf_pmu *pmu, int cpu, const char *path) return val; } +#define TRCDEVARCH_ARCHPART_SHIFT 0 +#define TRCDEVARCH_ARCHPART_MASK GENMASK(11, 0) +#define TRCDEVARCH_ARCHPART(x) (((x) & TRCDEVARCH_ARCHPART_MASK) >> TRCDEVARCH_ARCHPART_SHIFT) + +#define TRCDEVARCH_ARCHVER_SHIFT 12 +#define TRCDEVARCH_ARCHVER_MASK GENMASK(15, 12) +#define TRCDEVARCH_ARCHVER(x) (((x) & TRCDEVARCH_ARCHVER_MASK) >> TRCDEVARCH_ARCHVER_SHIFT) + +static bool cs_etm_is_ete(struct auxtrace_record *itr, int cpu) +{ + struct cs_etm_recording *ptr = container_of(itr, struct cs_etm_recording, itr); + struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu; + int trcdevarch = cs_etm_get_ro(cs_etm_pmu, cpu, metadata_etmv4_ro[CS_ETE_TRCDEVARCH]); + + /* + * ETE if ARCHVER is 5 (ARCHVER is 4 for ETM) and ARCHPART is 0xA13. + * See ETM_DEVARCH_ETE_ARCH in coresight-etm4x.h + */ + return TRCDEVARCH_ARCHVER(trcdevarch) == 5 && TRCDEVARCH_ARCHPART(trcdevarch) == 0xA13; +} + +static void cs_etm_save_etmv4_header(__u64 data[], struct auxtrace_record *itr, int cpu) +{ + struct cs_etm_recording *ptr = container_of(itr, struct cs_etm_recording, itr); + struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu; + + /* Get trace configuration register */ + data[CS_ETMV4_TRCCONFIGR] = cs_etmv4_get_config(itr); + /* Get traceID from the framework */ + data[CS_ETMV4_TRCTRACEIDR] = coresight_get_trace_id(cpu); + /* Get read-only information from sysFS */ + data[CS_ETMV4_TRCIDR0] = cs_etm_get_ro(cs_etm_pmu, cpu, + metadata_etmv4_ro[CS_ETMV4_TRCIDR0]); + data[CS_ETMV4_TRCIDR1] = cs_etm_get_ro(cs_etm_pmu, cpu, + metadata_etmv4_ro[CS_ETMV4_TRCIDR1]); + data[CS_ETMV4_TRCIDR2] = cs_etm_get_ro(cs_etm_pmu, cpu, + metadata_etmv4_ro[CS_ETMV4_TRCIDR2]); + data[CS_ETMV4_TRCIDR8] = cs_etm_get_ro(cs_etm_pmu, cpu, + metadata_etmv4_ro[CS_ETMV4_TRCIDR8]); + data[CS_ETMV4_TRCAUTHSTATUS] = cs_etm_get_ro(cs_etm_pmu, cpu, + metadata_etmv4_ro[CS_ETMV4_TRCAUTHSTATUS]); +} + static void cs_etm_get_metadata(int cpu, u32 *offset, struct auxtrace_record *itr, struct perf_record_auxtrace_info *info) @@ -618,31 +668,20 @@ static void cs_etm_get_metadata(int cpu, u32 *offset, struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu; /* first see what kind of tracer this cpu is affined to */ - if (cs_etm_is_etmv4(itr, cpu)) { - magic = __perf_cs_etmv4_magic; - /* Get trace configuration register */ - info->priv[*offset + CS_ETMV4_TRCCONFIGR] = - cs_etmv4_get_config(itr); - /* Get traceID from the framework */ - info->priv[*offset + CS_ETMV4_TRCTRACEIDR] = - coresight_get_trace_id(cpu); - /* Get read-only information from sysFS */ - info->priv[*offset + CS_ETMV4_TRCIDR0] = - cs_etm_get_ro(cs_etm_pmu, cpu, - metadata_etmv4_ro[CS_ETMV4_TRCIDR0]); - info->priv[*offset + CS_ETMV4_TRCIDR1] = + if (cs_etm_is_ete(itr, cpu)) { + magic = __perf_cs_ete_magic; + /* ETE uses the same registers as ETMv4 plus TRCDEVARCH */ + cs_etm_save_etmv4_header(&info->priv[*offset], itr, cpu); + info->priv[*offset + CS_ETE_TRCDEVARCH] = cs_etm_get_ro(cs_etm_pmu, cpu, - metadata_etmv4_ro[CS_ETMV4_TRCIDR1]); - info->priv[*offset + CS_ETMV4_TRCIDR2] = - cs_etm_get_ro(cs_etm_pmu, cpu, - metadata_etmv4_ro[CS_ETMV4_TRCIDR2]); - info->priv[*offset + CS_ETMV4_TRCIDR8] = - cs_etm_get_ro(cs_etm_pmu, cpu, - metadata_etmv4_ro[CS_ETMV4_TRCIDR8]); - info->priv[*offset + CS_ETMV4_TRCAUTHSTATUS] = - cs_etm_get_ro(cs_etm_pmu, cpu, - metadata_etmv4_ro - [CS_ETMV4_TRCAUTHSTATUS]); + metadata_etmv4_ro[CS_ETE_TRCDEVARCH]); + + /* How much space was used */ + increment = CS_ETE_PRIV_MAX; + nr_trc_params = CS_ETE_PRIV_MAX - CS_ETM_COMMON_BLK_MAX_V1; + } else if (cs_etm_is_etmv4(itr, cpu)) { + magic = __perf_cs_etmv4_magic; + cs_etm_save_etmv4_header(&info->priv[*offset], itr, cpu); /* How much space was used */ increment = CS_ETMV4_PRIV_MAX; diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c index d48d608517fd..74d69db1ea99 100644 --- a/tools/perf/arch/x86/util/pmu.c +++ b/tools/perf/arch/x86/util/pmu.c @@ -1,12 +1,30 @@ // SPDX-License-Identifier: GPL-2.0 #include <string.h> - +#include <stdio.h> +#include <sys/types.h> +#include <dirent.h> +#include <fcntl.h> #include <linux/stddef.h> #include <linux/perf_event.h> +#include <linux/zalloc.h> +#include <api/fs/fs.h> +#include <errno.h> #include "../../../util/intel-pt.h" #include "../../../util/intel-bts.h" #include "../../../util/pmu.h" +#include "../../../util/fncache.h" + +#define TEMPLATE_ALIAS "%s/bus/event_source/devices/%s/alias" + +struct pmu_alias { + char *name; + char *alias; + struct list_head list; +}; + +static LIST_HEAD(pmu_alias_name_list); +static bool cached_list; struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused) { @@ -18,3 +36,138 @@ struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __mayb #endif return NULL; } + +static void pmu_alias__delete(struct pmu_alias *pmu_alias) +{ + if (!pmu_alias) + return; + + zfree(&pmu_alias->name); + zfree(&pmu_alias->alias); + free(pmu_alias); +} + +static struct pmu_alias *pmu_alias__new(char *name, char *alias) +{ + struct pmu_alias *pmu_alias = zalloc(sizeof(*pmu_alias)); + + if (pmu_alias) { + pmu_alias->name = strdup(name); + if (!pmu_alias->name) + goto out_delete; + + pmu_alias->alias = strdup(alias); + if (!pmu_alias->alias) + goto out_delete; + } + return pmu_alias; + +out_delete: + pmu_alias__delete(pmu_alias); + return NULL; +} + +static int setup_pmu_alias_list(void) +{ + char path[PATH_MAX]; + DIR *dir; + struct dirent *dent; + const char *sysfs = sysfs__mountpoint(); + struct pmu_alias *pmu_alias; + char buf[MAX_PMU_NAME_LEN]; + FILE *file; + int ret = -ENOMEM; + + if (!sysfs) + return -1; + + snprintf(path, PATH_MAX, + "%s" EVENT_SOURCE_DEVICE_PATH, sysfs); + + dir = opendir(path); + if (!dir) + return -errno; + + while ((dent = readdir(dir))) { + if (!strcmp(dent->d_name, ".") || + !strcmp(dent->d_name, "..")) + continue; + + snprintf(path, PATH_MAX, + TEMPLATE_ALIAS, sysfs, dent->d_name); + + if (!file_available(path)) + continue; + + file = fopen(path, "r"); + if (!file) + continue; + + if (!fgets(buf, sizeof(buf), file)) { + fclose(file); + continue; + } + + fclose(file); + + /* Remove the last '\n' */ + buf[strlen(buf) - 1] = 0; + + pmu_alias = pmu_alias__new(dent->d_name, buf); + if (!pmu_alias) + goto close_dir; + + list_add_tail(&pmu_alias->list, &pmu_alias_name_list); + } + + ret = 0; + +close_dir: + closedir(dir); + return ret; +} + +static char *__pmu_find_real_name(const char *name) +{ + struct pmu_alias *pmu_alias; + + list_for_each_entry(pmu_alias, &pmu_alias_name_list, list) { + if (!strcmp(name, pmu_alias->alias)) + return pmu_alias->name; + } + + return (char *)name; +} + +char *pmu_find_real_name(const char *name) +{ + if (cached_list) + return __pmu_find_real_name(name); + + setup_pmu_alias_list(); + cached_list = true; + + return __pmu_find_real_name(name); +} + +static char *__pmu_find_alias_name(const char *name) +{ + struct pmu_alias *pmu_alias; + + list_for_each_entry(pmu_alias, &pmu_alias_name_list, list) { + if (!strcmp(name, pmu_alias->name)) + return pmu_alias->alias; + } + return NULL; +} + +char *pmu_find_alias_name(const char *name) +{ + if (cached_list) + return __pmu_find_alias_name(name); + + setup_pmu_alias_list(); + cached_list = true; + + return __pmu_find_alias_name(name); +} |