diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-10-17 11:47:46 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-10-17 11:47:46 -0700 |
commit | 9d9af1007bc08971953ae915d88dc9bb21344b53 (patch) | |
tree | 02090da0b271c31f19d58d80f4cff19c8ef89971 /tools/perf/util/tsc.c | |
parent | a1e16bc7d5f7ca3599d8a7f061841c93a563665e (diff) | |
parent | 744aec4df2c5b4d12af26a57d8858af2f59ef3d0 (diff) | |
download | linux-9d9af1007bc08971953ae915d88dc9bb21344b53.tar.bz2 |
Merge tag 'perf-tools-for-v5.10-2020-10-15' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux
Pull perf tools updates from Arnaldo Carvalho de Melo:
- cgroup improvements for 'perf stat', allowing for compact
specification of events and cgroups in the command line.
- Support per thread topdown metrics in 'perf stat'.
- Support sample-read topdown metric group in 'perf record'
- Show start of latency in addition to its start in 'perf sched
latency'.
- Add min, max to 'perf script' futex-contention output, in addition to
avg.
- Allow usage of 'perf_event_attr->exclusive' attribute via the new
':e' event modifier.
- Add 'snapshot' command to 'perf record --control', using it with
Intel PT.
- Support FIFO file names as alternative options to 'perf record
--control'.
- Introduce branch history "streams", to compare 'perf record' runs
with 'perf diff' based on branch records and report hot streams.
- Support PE executable symbol tables using libbfd, to profile, for
instance, wine binaries.
- Add filter support for option 'perf ftrace -F/--funcs'.
- Allow configuring the 'disassembler_style' 'perf annotate' knob via
'perf config'
- Update CascadelakeX and SkylakeX JSON vendor events files.
- Add support for parsing perchip/percore JSON vendor events.
- Add power9 hv_24x7 core level metric events.
- Add L2 prefetch, ITLB instruction fetch hits JSON events for AMD
zen1.
- Enable Family 19h users by matching Zen2 AMD vendor events.
- Use debuginfod in 'perf probe' when required debug files not found
locally.
- Display negative tid in non-sample events in 'perf script'.
- Make GTK2 support opt-in
- Add build test with GTK+
- Add missing -lzstd to the fast path feature detection
- Add scripts to auto generate 'mmap', 'mremap' string<->id tables for
use in 'perf trace'.
- Show python test script in verbose mode.
- Fix uncore metric expressions
- Msan uninitialized use fixes.
- Use condition variables in 'perf bench numa'
- Autodetect python3 binary in systems without python2.
- Support md5 build ids in addition to sha1.
- Add build id 'perf test' regression test.
- Fix printable strings in python3 scripts.
- Fix off by ones in 'perf trace' in arches using libaudit.
- Fix JSON event code for events referencing std arch events.
- Introduce 'perf test' shell script for Arm CoreSight testing.
- Add rdtsc() for Arm64 for used in the PERF_RECORD_TIME_CONV metadata
event and in 'perf test tsc'.
- 'perf c2c' improvements: Add "RMT Load Hit" metric, "Total Stores",
fixes and documentation update.
- Fix usage of reloc_sym in 'perf probe' when using both kallsyms and
debuginfo files.
- Do not print 'Metric Groups:' unnecessarily in 'perf list'
- Refcounting fixes in the event parsing code.
- Add expand cgroup event 'perf test' entry.
- Fix out of bounds CPU map access when handling armv8_pmu events in
'perf stat'.
- Add build-id injection 'perf bench' benchmark.
- Enter namespace when reading build-id in 'perf inject'.
- Do not load map/dso when injecting build-id speeding up the 'perf
inject' process.
- Add --buildid-all option to avoid processing all samples, just the
mmap metadata events.
- Add feature test to check if libbfd has buildid support
- Add 'perf test' entry for PE binary format support.
- Fix typos in power8 PMU vendor events JSON files.
- Hide libtraceevent non API functions.
* tag 'perf-tools-for-v5.10-2020-10-15' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (113 commits)
perf c2c: Update documentation for metrics reorganization
perf c2c: Add metrics "RMT Load Hit"
perf c2c: Correct LLC load hit metrics
perf c2c: Change header for LLC local hit
perf c2c: Use more explicit headers for HITM
perf c2c: Change header from "LLC Load Hitm" to "Load Hitm"
perf c2c: Organize metrics based on memory hierarchy
perf c2c: Display "Total Stores" as a standalone metrics
perf c2c: Display the total numbers continuously
perf bench: Use condition variables in numa.
perf jevents: Fix event code for events referencing std arch events
perf diff: Support hot streams comparison
perf streams: Report hot streams
perf streams: Calculate the sum of total streams hits
perf streams: Link stream pair
perf streams: Compare two streams
perf streams: Get the evsel_streams by evsel_idx
perf streams: Introduce branch history "streams"
perf intel-pt: Improve PT documentation slightly
perf tools: Add support for exclusive groups/events
...
Diffstat (limited to 'tools/perf/util/tsc.c')
-rw-r--r-- | tools/perf/util/tsc.c | 81 |
1 files changed, 81 insertions, 0 deletions
diff --git a/tools/perf/util/tsc.c b/tools/perf/util/tsc.c index bfa782421cbd..62b4c75c966c 100644 --- a/tools/perf/util/tsc.c +++ b/tools/perf/util/tsc.c @@ -1,7 +1,16 @@ // SPDX-License-Identifier: GPL-2.0 +#include <errno.h> + #include <linux/compiler.h> +#include <linux/perf_event.h> +#include <linux/stddef.h> #include <linux/types.h> +#include <asm/barrier.h> + +#include "event.h" +#include "synthetic-events.h" +#include "debug.h" #include "tsc.h" u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc) @@ -19,12 +28,84 @@ u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc) { u64 quot, rem; + if (tc->cap_user_time_short) + cyc = tc->time_cycles + + ((cyc - tc->time_cycles) & tc->time_mask); + quot = cyc >> tc->time_shift; rem = cyc & (((u64)1 << tc->time_shift) - 1); return tc->time_zero + quot * tc->time_mult + ((rem * tc->time_mult) >> tc->time_shift); } +int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, + struct perf_tsc_conversion *tc) +{ + u32 seq; + int i = 0; + + while (1) { + seq = pc->lock; + rmb(); + tc->time_mult = pc->time_mult; + tc->time_shift = pc->time_shift; + tc->time_zero = pc->time_zero; + tc->time_cycles = pc->time_cycles; + tc->time_mask = pc->time_mask; + tc->cap_user_time_zero = pc->cap_user_time_zero; + tc->cap_user_time_short = pc->cap_user_time_short; + rmb(); + if (pc->lock == seq && !(seq & 1)) + break; + if (++i > 10000) { + pr_debug("failed to get perf_event_mmap_page lock\n"); + return -EINVAL; + } + } + + if (!tc->cap_user_time_zero) + return -EOPNOTSUPP; + + return 0; +} + +int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc, + struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine) +{ + union perf_event event = { + .time_conv = { + .header = { + .type = PERF_RECORD_TIME_CONV, + .size = sizeof(struct perf_record_time_conv), + }, + }, + }; + struct perf_tsc_conversion tc; + int err; + + if (!pc) + return 0; + err = perf_read_tsc_conversion(pc, &tc); + if (err == -EOPNOTSUPP) + return 0; + if (err) + return err; + + pr_debug2("Synthesizing TSC conversion information\n"); + + event.time_conv.time_mult = tc.time_mult; + event.time_conv.time_shift = tc.time_shift; + event.time_conv.time_zero = tc.time_zero; + event.time_conv.time_cycles = tc.time_cycles; + event.time_conv.time_mask = tc.time_mask; + event.time_conv.cap_user_time_zero = tc.cap_user_time_zero; + event.time_conv.cap_user_time_short = tc.cap_user_time_short; + + return process(tool, &event, NULL, machine); +} + u64 __weak rdtsc(void) { return 0; |