diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-05-25 14:46:09 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-05-25 14:46:09 -0700 |
commit | d223575e50f8da0de358c0098defd560069ecf66 (patch) | |
tree | bf00de6a42d073b157256ca44298acbc072aa923 /tools/perf/util/intel-pt.c | |
parent | e908305fb262588471958f560eb3c6c18cc683a1 (diff) | |
parent | 5d2b6bc3a6a27ad265d2ec0d53dd7ef33bd314fc (diff) | |
download | linux-d223575e50f8da0de358c0098defd560069ecf66.tar.bz2 |
Merge tag 'perf-tools-for-v5.19-2022-05-23' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux
Pull perf tool updates from Arnaldo Carvalho de Melo:
"Intel PT:
- Allow hardware tracing on KVM test programs. In this case, the VM
is not running an OS, but only the functions loaded into it by the
hypervisor test program, and conveniently, loaded at the same
virtual addresses.
- Improve documentation:
- Add link to perf wiki's page
- Cleanups:
- Delete now unused perf-with-kcore.sh script
- Remove unused machines__find_host()
ARM SPE (Statistical Profile Extensions):
- Add man page entry.
Vendor Events:
- Update various Intel event topics
- Update various microarch events
- Fix various cstate metrics
- Fix Alderlake metric groups
- Add sapphirerapids events
- Add JSON files for ARM Cortex A34, A35, A55, A510, A65, A73, A75,
A77, A78, A710, X1, X2 and Neoverse E1
- Update Cortex A57/A72
perf stat:
- Introduce stats for the user and system rusage times
perf c2c:
- Prep work to support ARM systems
perf annotate:
- Add --percent-limit option
perf lock:
- Add -t/--thread option for report
- Do not discard broken lock stats
perf bench:
- Add breakpoint benchmarks
perf test:
- Limit to only run executable scripts in tests
- Add basic perf record tests
- Add stat record+report test
- Add basic stat and topdown group test
- Skip several tests when the user hasn't permission to perform them
- Fix test case 81 ("perf record tests") on s390x
perf version:
- debuginfod support improvements
perf scripting python:
- Expose symbol offset and source information
perf build:
- Error for BPF skeletons without LIBBPF
- Use Python devtools for version autodetection rather than runtime
Miscellaneous:
- Add riscv64 support to 'perf jitdump'
- Various fixes/tidy ups related to cpu_map
- Fixes for handling Intel hybrid systems"
* tag 'perf-tools-for-v5.19-2022-05-23' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (122 commits)
perf intel-pt: Add guest_code support
perf kvm report: Add guest_code support
perf script: Add guest_code support
perf tools: Add guest_code support
perf tools: Factor out thread__set_guest_comm()
perf tools: Add machine to machines back pointer
perf vendors events arm64: Update Cortex A57/A72
perf vendors events arm64: Arm Neoverse E1
perf vendors events arm64: Arm Cortex-X2
perf vendors events arm64: Arm Cortex-X1
perf vendors events arm64: Arm Cortex-A710
perf vendors events arm64: Arm Cortex-A78
perf vendors events arm64: Arm Cortex-A77
perf vendors events arm64: Arm Cortex-A75
perf vendors events arm64: Arm Cortex-A73
perf vendors events arm64: Arm Cortex-A65
perf vendors events arm64: Arm Cortex-A510
perf vendors events arm64: Arm Cortex-A55
perf vendors events arm64: Arm Cortex-A35
perf vendors events arm64: Arm Cortex-A34
...
Diffstat (limited to 'tools/perf/util/intel-pt.c')
-rw-r--r-- | tools/perf/util/intel-pt.c | 57 |
1 files changed, 54 insertions, 3 deletions
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index ec43d364d0de..62b2f375a94d 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -192,6 +192,7 @@ struct intel_pt_queue { pid_t next_tid; struct thread *thread; struct machine *guest_machine; + struct thread *guest_thread; struct thread *unknown_guest_thread; pid_t guest_machine_pid; bool exclude_kernel; @@ -530,6 +531,7 @@ struct intel_pt_cache_entry { u64 byte_cnt; enum intel_pt_insn_op op; enum intel_pt_insn_branch branch; + bool emulated_ptwrite; int length; int32_t rel; char insn[INTEL_PT_INSN_BUF_SZ]; @@ -616,6 +618,7 @@ static int intel_pt_cache_add(struct dso *dso, struct machine *machine, e->byte_cnt = byte_cnt; e->op = intel_pt_insn->op; e->branch = intel_pt_insn->branch; + e->emulated_ptwrite = intel_pt_insn->emulated_ptwrite; e->length = intel_pt_insn->length; e->rel = intel_pt_insn->rel; memcpy(e->insn, intel_pt_insn->buf, INTEL_PT_INSN_BUF_SZ); @@ -688,6 +691,11 @@ static int intel_pt_get_guest(struct intel_pt_queue *ptq) ptq->guest_machine = NULL; thread__zput(ptq->unknown_guest_thread); + if (symbol_conf.guest_code) { + thread__zput(ptq->guest_thread); + ptq->guest_thread = machines__findnew_guest_code(machines, pid); + } + machine = machines__find_guest(machines, pid); if (!machine) return -1; @@ -702,6 +710,28 @@ static int intel_pt_get_guest(struct intel_pt_queue *ptq) return 0; } +static inline bool intel_pt_jmp_16(struct intel_pt_insn *intel_pt_insn) +{ + return intel_pt_insn->rel == 16 && intel_pt_insn->branch == INTEL_PT_BR_UNCONDITIONAL; +} + +#define PTWRITE_MAGIC "\x0f\x0bperf,ptwrite " +#define PTWRITE_MAGIC_LEN 16 + +static bool intel_pt_emulated_ptwrite(struct dso *dso, struct machine *machine, u64 offset) +{ + unsigned char buf[PTWRITE_MAGIC_LEN]; + ssize_t len; + + len = dso__data_read_offset(dso, machine, offset, buf, PTWRITE_MAGIC_LEN); + if (len == PTWRITE_MAGIC_LEN && !memcmp(buf, PTWRITE_MAGIC, PTWRITE_MAGIC_LEN)) { + intel_pt_log("Emulated ptwrite signature found\n"); + return true; + } + intel_pt_log("Emulated ptwrite signature not found\n"); + return false; +} + static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip, uint64_t max_insn_cnt, @@ -729,11 +759,16 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, cpumode = intel_pt_nr_cpumode(ptq, *ip, nr); if (nr) { - if (cpumode != PERF_RECORD_MISC_GUEST_KERNEL || + if ((!symbol_conf.guest_code && cpumode != PERF_RECORD_MISC_GUEST_KERNEL) || intel_pt_get_guest(ptq)) return -EINVAL; machine = ptq->guest_machine; - thread = ptq->unknown_guest_thread; + thread = ptq->guest_thread; + if (!thread) { + if (cpumode != PERF_RECORD_MISC_GUEST_KERNEL) + return -EINVAL; + thread = ptq->unknown_guest_thread; + } } else { thread = ptq->thread; if (!thread) { @@ -764,6 +799,7 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, *ip += e->byte_cnt; intel_pt_insn->op = e->op; intel_pt_insn->branch = e->branch; + intel_pt_insn->emulated_ptwrite = e->emulated_ptwrite; intel_pt_insn->length = e->length; intel_pt_insn->rel = e->rel; memcpy(intel_pt_insn->buf, e->insn, @@ -795,8 +831,18 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, insn_cnt += 1; - if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH) + if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH) { + bool eptw; + u64 offs; + + if (!intel_pt_jmp_16(intel_pt_insn)) + goto out; + /* Check for emulated ptwrite */ + offs = offset + intel_pt_insn->length; + eptw = intel_pt_emulated_ptwrite(al.map->dso, machine, offs); + intel_pt_insn->emulated_ptwrite = eptw; goto out; + } if (max_insn_cnt && insn_cnt >= max_insn_cnt) goto out_no_cache; @@ -1300,6 +1346,7 @@ static void intel_pt_free_queue(void *priv) if (!ptq) return; thread__zput(ptq->thread); + thread__zput(ptq->guest_thread); thread__zput(ptq->unknown_guest_thread); intel_pt_decoder_free(ptq->decoder); zfree(&ptq->event_buf); @@ -2372,6 +2419,10 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) ptq->sample_ipc = ptq->state->flags & INTEL_PT_SAMPLE_IPC; } + /* Ensure guest code maps are set up */ + if (symbol_conf.guest_code && (state->from_nr || state->to_nr)) + intel_pt_get_guest(ptq); + /* * Do PEBS first to allow for the possibility that the PEBS timestamp * precedes the current timestamp. |