diff options
Diffstat (limited to 'tools')
38 files changed, 8858 insertions, 21 deletions
diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build index faca2bf6a430..8120af9c0341 100644 --- a/tools/build/Makefile.build +++ b/tools/build/Makefile.build @@ -57,6 +57,8 @@ quiet_cmd_cc_i_c = CPP $@ quiet_cmd_cc_s_c = AS $@ cmd_cc_s_c = $(CC) $(c_flags) -S -o $@ $< +quiet_cmd_gen = GEN $@ + # Link agregate command # If there's nothing to link, create empty $@ object. quiet_cmd_ld_multi = LD $@ diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 1c0d69f44552..74ca42093d70 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -70,8 +70,13 @@ test-libelf.bin: test-glibc.bin: $(BUILD) +DWARFLIBS := -ldw +ifeq ($(findstring -static,${LDFLAGS}),-static) +DWARFLIBS += -lelf -lebl -lz -llzma -lbz2 +endif + test-dwarf.bin: - $(BUILD) -ldw + $(BUILD) $(DWARFLIBS) test-libelf-mmap.bin: $(BUILD) -lelf diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index 09db62ba5786..3d1bb802dbf4 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -29,3 +29,4 @@ config.mak.autogen *.pyc *.pyo .config-detected +util/intel-pt-decoder/inat-tables.c diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt new file mode 100644 index 000000000000..2866b62eb293 --- /dev/null +++ b/tools/perf/Documentation/intel-pt.txt @@ -0,0 +1,588 @@ +Intel Processor Trace +===================== + +Overview +======== + +Intel Processor Trace (Intel PT) is an extension of Intel Architecture that +collects information about software execution such as control flow, execution +modes and timings and formats it into highly compressed binary packets. +Technical details are documented in the Intel 64 and IA-32 Architectures +Software Developer Manuals, Chapter 36 Intel Processor Trace. + +Intel PT is first supported in Intel Core M and 5th generation Intel Core +processors that are based on the Intel micro-architecture code name Broadwell. + +Trace data is collected by 'perf record' and stored within the perf.data file. +See below for options to 'perf record'. + +Trace data must be 'decoded' which involves walking the object code and matching +the trace data packets. For example a TNT packet only tells whether a +conditional branch was taken or not taken, so to make use of that packet the +decoder must know precisely which instruction was being executed. + +Decoding is done on-the-fly. The decoder outputs samples in the same format as +samples output by perf hardware events, for example as though the "instructions" +or "branches" events had been recorded. Presently 3 tools support this: +'perf script', 'perf report' and 'perf inject'. See below for more information +on using those tools. + +The main distinguishing feature of Intel PT is that the decoder can determine +the exact flow of software execution. Intel PT can be used to understand why +and how did software get to a certain point, or behave a certain way. The +software does not have to be recompiled, so Intel PT works with debug or release +builds, however the executed images are needed - which makes use in JIT-compiled +environments, or with self-modified code, a challenge. Also symbols need to be +provided to make sense of addresses. + +A limitation of Intel PT is that it produces huge amounts of trace data +(hundreds of megabytes per second per core) which takes a long time to decode, +for example two or three orders of magnitude longer than it took to collect. +Another limitation is the performance impact of tracing, something that will +vary depending on the use-case and architecture. + + +Quickstart +========== + +It is important to start small. That is because it is easy to capture vastly +more data than can possibly be processed. + +The simplest thing to do with Intel PT is userspace profiling of small programs. +Data is captured with 'perf record' e.g. to trace 'ls' userspace-only: + + perf record -e intel_pt//u ls + +And profiled with 'perf report' e.g. + + perf report + +To also trace kernel space presents a problem, namely kernel self-modifying +code. A fairly good kernel image is available in /proc/kcore but to get an +accurate image a copy of /proc/kcore needs to be made under the same conditions +as the data capture. A script perf-with-kcore can do that, but beware that the +script makes use of 'sudo' to copy /proc/kcore. If you have perf installed +locally from the source tree you can do: + + ~/libexec/perf-core/perf-with-kcore record pt_ls -e intel_pt// -- ls + +which will create a directory named 'pt_ls' and put the perf.data file and +copies of /proc/kcore, /proc/kallsyms and /proc/modules into it. Then to use +'perf report' becomes: + + ~/libexec/perf-core/perf-with-kcore report pt_ls + +Because samples are synthesized after-the-fact, the sampling period can be +selected for reporting. e.g. sample every microsecond + + ~/libexec/perf-core/perf-with-kcore report pt_ls --itrace=i1usge + +See the sections below for more information about the --itrace option. + +Beware the smaller the period, the more samples that are produced, and the +longer it takes to process them. + +Also note that the coarseness of Intel PT timing information will start to +distort the statistical value of the sampling as the sampling period becomes +smaller. + +To represent software control flow, "branches" samples are produced. By default +a branch sample is synthesized for every single branch. To get an idea what +data is available you can use the 'perf script' tool with no parameters, which +will list all the samples. + + perf record -e intel_pt//u ls + perf script + +An interesting field that is not printed by default is 'flags' which can be +displayed as follows: + + perf script -Fcomm,tid,pid,time,cpu,event,trace,ip,sym,dso,addr,symoff,flags + +The flags are "bcrosyiABEx" which stand for branch, call, return, conditional, +system, asynchronous, interrupt, transaction abort, trace begin, trace end, and +in transaction, respectively. + +While it is possible to create scripts to analyze the data, an alternative +approach is available to export the data to a postgresql database. Refer to +script export-to-postgresql.py for more details, and to script +call-graph-from-postgresql.py for an example of using the database. + +As mentioned above, it is easy to capture too much data. One way to limit the +data captured is to use 'snapshot' mode which is explained further below. +Refer to 'new snapshot option' and 'Intel PT modes of operation' further below. + +Another problem that will be experienced is decoder errors. They can be caused +by inability to access the executed image, self-modified or JIT-ed code, or the +inability to match side-band information (such as context switches and mmaps) +which results in the decoder not knowing what code was executed. + +There is also the problem of perf not being able to copy the data fast enough, +resulting in data lost because the buffer was full. See 'Buffer handling' below +for more details. + + +perf record +=========== + +new event +--------- + +The Intel PT kernel driver creates a new PMU for Intel PT. PMU events are +selected by providing the PMU name followed by the "config" separated by slashes. +An enhancement has been made to allow default "config" e.g. the option + + -e intel_pt// + +will use a default config value. Currently that is the same as + + -e intel_pt/tsc,noretcomp=0/ + +which is the same as + + -e intel_pt/tsc=1,noretcomp=0/ + +The config terms are listed in /sys/devices/intel_pt/format. They are bit +fields within the config member of the struct perf_event_attr which is +passed to the kernel by the perf_event_open system call. They correspond to bit +fields in the IA32_RTIT_CTL MSR. Here is a list of them and their definitions: + + $ for f in `ls /sys/devices/intel_pt/format`;do + > echo $f + > cat /sys/devices/intel_pt/format/$f + > done + noretcomp + config:11 + tsc + config:10 + +Note that the default config must be overridden for each term i.e. + + -e intel_pt/noretcomp=0/ + +is the same as: + + -e intel_pt/tsc=1,noretcomp=0/ + +So, to disable TSC packets use: + + -e intel_pt/tsc=0/ + +It is also possible to specify the config value explicitly: + + -e intel_pt/config=0x400/ + +Note that, as with all events, the event is suffixed with event modifiers: + + u userspace + k kernel + h hypervisor + G guest + H host + p precise ip + +'h', 'G' and 'H' are for virtualization which is not supported by Intel PT. +'p' is also not relevant to Intel PT. So only options 'u' and 'k' are +meaningful for Intel PT. + +perf_event_attr is displayed if the -vv option is used e.g. + + ------------------------------------------------------------ + perf_event_attr: + type 6 + size 112 + config 0x400 + { sample_period, sample_freq } 1 + sample_type IP|TID|TIME|CPU|IDENTIFIER + read_format ID + disabled 1 + inherit 1 + exclude_kernel 1 + exclude_hv 1 + enable_on_exec 1 + sample_id_all 1 + ------------------------------------------------------------ + sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8 + ------------------------------------------------------------ + + +new snapshot option +------------------- + +To select snapshot mode a new option has been added: + + -S + +Optionally it can be followed by the snapshot size e.g. + + -S0x100000 + +The default snapshot size is the auxtrace mmap size. If neither auxtrace mmap size +nor snapshot size is specified, then the default is 4MiB for privileged users +(or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users. +If an unprivileged user does not specify mmap pages, the mmap pages will be +reduced as described in the 'new auxtrace mmap size option' section below. + +The snapshot size is displayed if the option -vv is used e.g. + + Intel PT snapshot size: %zu + + +new auxtrace mmap size option +--------------------------- + +Intel PT buffer size is specified by an addition to the -m option e.g. + + -m,16 + +selects a buffer size of 16 pages i.e. 64KiB. + +Note that the existing functionality of -m is unchanged. The auxtrace mmap size +is specified by the optional addition of a comma and the value. + +The default auxtrace mmap size for Intel PT is 4MiB/page_size for privileged users +(or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users. +If an unprivileged user does not specify mmap pages, the mmap pages will be +reduced from the default 512KiB/page_size to 256KiB/page_size, otherwise the +user is likely to get an error as they exceed their mlock limit (Max locked +memory as shown in /proc/self/limits). Note that perf does not count the first +512KiB (actually /proc/sys/kernel/perf_event_mlock_kb minus 1 page) per cpu +against the mlock limit so an unprivileged user is allowed 512KiB per cpu plus +their mlock limit (which defaults to 64KiB but is not multiplied by the number +of cpus). + +In full-trace mode, powers of two are allowed for buffer size, with a minimum +size of 2 pages. In snapshot mode, it is the same but the minimum size is +1 page. + +The mmap size and auxtrace mmap size are displayed if the -vv option is used e.g. + + mmap length 528384 + auxtrace mmap length 4198400 + + +Intel PT modes of operation +--------------------------- + +Intel PT can be used in 2 modes: + full-trace mode + snapshot mode + +Full-trace mode traces continuously e.g. + + perf record -e intel_pt//u uname + +Snapshot mode captures the available data when a signal is sent e.g. + + perf record -v -e intel_pt//u -S ./loopy 1000000000 & + [1] 11435 + kill -USR2 11435 + Recording AUX area tracing snapshot + +Note that the signal sent is SIGUSR2. +Note that "Recording AUX area tracing snapshot" is displayed because the -v +option is used. + +The 2 modes cannot be used together. + + +Buffer handling +--------------- + +There may be buffer limitations (i.e. single ToPa entry) which means that actual +buffer sizes are limited to powers of 2 up to 4MiB (MAX_ORDER). In order to +provide other sizes, and in particular an arbitrarily large size, multiple +buffers are logically concatenated. However an interrupt must be used to switch +between buffers. That has two potential problems: + a) the interrupt may not be handled in time so that the current buffer + becomes full and some trace data is lost. + b) the interrupts may slow the system and affect the performance + results. + +If trace data is lost, the driver sets 'truncated' in the PERF_RECORD_AUX event +which the tools report as an error. + +In full-trace mode, the driver waits for data to be copied out before allowing +the (logical) buffer to wrap-around. If data is not copied out quickly enough, +again 'truncated' is set in the PERF_RECORD_AUX event. If the driver has to +wait, the intel_pt event gets disabled. Because it is difficult to know when +that happens, perf tools always re-enable the intel_pt event after copying out +data. + + +Intel PT and build ids +---------------------- + +By default "perf record" post-processes the event stream to find all build ids +for executables for all addresses sampled. Deliberately, Intel PT is not +decoded for that purpose (it would take too long). Instead the build ids for +all executables encountered (due to mmap, comm or task events) are included +in the perf.data file. + +To see buildids included in the perf.data file use the command: + + perf buildid-list + +If the perf.data file contains Intel PT data, that is the same as: + + perf buildid-list --with-hits + + +Snapshot mode and event disabling +--------------------------------- + +In order to make a snapshot, the intel_pt event is disabled using an IOCTL, +namely PERF_EVENT_IOC_DISABLE. However doing that can also disable the +collection of side-band information. In order to prevent that, a dummy +software event has been introduced that permits tracking events (like mmaps) to +continue to be recorded while intel_pt is disabled. That is important to ensure +there is complete side-band information to allow the decoding of subsequent +snapshots. + +A test has been created for that. To find the test: + + perf test list + ... + 23: Test using a dummy software event to keep tracking + +To run the test: + + perf test 23 + 23: Test using a dummy software event to keep tracking : Ok + + +perf record modes (nothing new here) +------------------------------------ + +perf record essentially operates in one of three modes: + per thread + per cpu + workload only + +"per thread" mode is selected by -t or by --per-thread (with -p or -u or just a +workload). +"per cpu" is selected by -C or -a. +"workload only" mode is selected by not using the other options but providing a +command to run (i.e. the workload). + +In per-thread mode an exact list of threads is traced. There is no inheritance. +Each thread has its own event buffer. + +In per-cpu mode all processes (or processes from the selected cgroup i.e. -G +option, or processes selected with -p or -u) are traced. Each cpu has its own +buffer. Inheritance is allowed. + +In workload-only mode, the workload is traced but with per-cpu buffers. +Inheritance is allowed. Note that you can now trace a workload in per-thread +mode by using the --per-thread option. + + +Privileged vs non-privileged users +---------------------------------- + +Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users +have memory limits imposed upon them. That affects what buffer sizes they can +have as outlined above. + +Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users are +not permitted to use tracepoints which means there is insufficient side-band +information to decode Intel PT in per-cpu mode, and potentially workload-only +mode too if the workload creates new processes. + +Note also, that to use tracepoints, read-access to debugfs is required. So if +debugfs is not mounted or the user does not have read-access, it will again not +be possible to decode Intel PT in per-cpu mode. + + +sched_switch tracepoint +----------------------- + +The sched_switch tracepoint is used to provide side-band data for Intel PT +decoding. sched_switch events are automatically added. e.g. the second event +shown below + + $ perf record -vv -e intel_pt//u uname + ------------------------------------------------------------ + perf_event_attr: + type 6 + size 112 + config 0x400 + { sample_period, sample_freq } 1 + sample_type IP|TID|TIME|CPU|IDENTIFIER + read_format ID + disabled 1 + inherit 1 + exclude_kernel 1 + exclude_hv 1 + enable_on_exec 1 + sample_id_all 1 + ------------------------------------------------------------ + sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8 + ------------------------------------------------------------ + perf_event_attr: + type 2 + size 112 + config 0x108 + { sample_period, sample_freq } 1 + sample_type IP|TID|TIME|CPU|PERIOD|RAW|IDENTIFIER + read_format ID + inherit 1 + sample_id_all 1 + exclude_guest 1 + ------------------------------------------------------------ + sys_perf_event_open: pid -1 cpu 0 group_fd -1 flags 0x8 + sys_perf_event_open: pid -1 cpu 1 group_fd -1 flags 0x8 + sys_perf_event_open: pid -1 cpu 2 group_fd -1 flags 0x8 + sys_perf_event_open: pid -1 cpu 3 group_fd -1 flags 0x8 + ------------------------------------------------------------ + perf_event_attr: + type 1 + size 112 + config 0x9 + { sample_period, sample_freq } 1 + sample_type IP|TID|TIME|IDENTIFIER + read_format ID + disabled 1 + inherit 1 + exclude_kernel 1 + exclude_hv 1 + mmap 1 + comm 1 + enable_on_exec 1 + task 1 + sample_id_all 1 + mmap2 1 + comm_exec 1 + ------------------------------------------------------------ + sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8 + mmap size 528384B + AUX area mmap length 4194304 + perf event ring buffer mmapped per cpu + Synthesizing auxtrace information + Linux + [ perf record: Woken up 1 times to write data ] + [ perf record: Captured and wrote 0.042 MB perf.data ] + +Note, the sched_switch event is only added if the user is permitted to use it +and only in per-cpu mode. + +Note also, the sched_switch event is only added if TSC packets are requested. +That is because, in the absence of timing information, the sched_switch events +cannot be matched against the Intel PT trace. + + +perf script +=========== + +By default, perf script will decode trace data found in the perf.data file. +This can be further controlled by new option --itrace. + + +New --itrace option +------------------- + +Having no option is the same as + + --itrace + +which, in turn, is the same as + + --itrace=ibxe + +The letters are: + + i synthesize "instructions" events + b synthesize "branches" events + x synthesize "transactions" events + c synthesize branches events (calls only) + r synthesize branches events (returns only) + e synthesize tracing error events + d create a debug log + g synthesize a call chain (use with i or x) + +"Instructions" events look like they were recorded by "perf record -e +instructions". + +"Branches" events look like they were recorded by "perf record -e branches". "c" +and "r" can be combined to get calls and returns. + +"Transactions" events correspond to the start or end of transactions. The +'flags' field can be used in perf script to determine whether the event is a +tranasaction start, commit or abort. + +Error events are new. They show where the decoder lost the trace. Error events +are quite important. Users must know if what they are seeing is a complete +picture or not. + +The "d" option will cause the creation of a file "intel_pt.log" containing all +decoded packets and instructions. Note that this option slows down the decoder +and that the resulting file may be very large. + +In addition, the period of the "instructions" event can be specified. e.g. + + --itrace=i10us + +sets the period to 10us i.e. one instruction sample is synthesized for each 10 +microseconds of trace. Alternatives to "us" are "ms" (milliseconds), +"ns" (nanoseconds), "t" (TSC ticks) or "i" (instructions). + +"ms", "us" and "ns" are converted to TSC ticks. + +The timing information included with Intel PT does not give the time of every +instruction. Consequently, for the purpose of sampling, the decoder estimates +the time since the last timing packet based on 1 tick per instruction. The time +on the sample is *not* adjusted and reflects the last known value of TSC. + +For Intel PT, the default period is 100us. + +Also the call chain size (default 16, max. 1024) for instructions or +transactions events can be specified. e.g. + + --itrace=ig32 + --itrace=xg32 + +To disable trace decoding entirely, use the option --no-itrace. + + +dump option +----------- + +perf script has an option (-D) to "dump" the events i.e. display the binary +data. + +When -D is used, Intel PT packets are displayed. The packet decoder does not +pay attention to PSB packets, but just decodes the bytes - so the packets seen +by the actual decoder may not be identical in places where the data is corrupt. +One example of that would be when the buffer-switching interrupt has been too +slow, and the buffer has been filled completely. In that case, the last packet +in the buffer might be truncated and immediately followed by a PSB as the trace +continues in the next buffer. + +To disable the display of Intel PT packets, combine the -D option with +--no-itrace. + + +perf report +=========== + +By default, perf report will decode trace data found in the perf.data file. +This can be further controlled by new option --itrace exactly the same as +perf script, with the exception that the default is --itrace=igxe. + + +perf inject +=========== + +perf inject also accepts the --itrace option in which case tracing data is +removed and replaced with the synthesized events. e.g. + + perf inject --itrace -i perf.data -o perf.data.new diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 4b58daeff881..d9863cb96f59 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -76,6 +76,12 @@ include config/utilities.mak # # Define NO_AUXTRACE if you do not want AUX area tracing support +# As per kernel Makefile, avoid funny character set dependencies +unexport LC_ALL +LC_COLLATE=C +LC_NUMERIC=C +export LC_COLLATE LC_NUMERIC + ifeq ($(srctree),) srctree := $(patsubst %/,%,$(dir $(shell pwd))) srctree := $(patsubst %/,%,$(dir $(srctree))) @@ -135,6 +141,7 @@ INSTALL = install FLEX = flex BISON = bison STRIP = strip +AWK = awk LIB_DIR = $(srctree)/tools/lib/api/ TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/ @@ -289,7 +296,7 @@ strip: $(PROGRAMS) $(OUTPUT)perf PERF_IN := $(OUTPUT)perf-in.o -export srctree OUTPUT RM CC LD AR CFLAGS V BISON FLEX +export srctree OUTPUT RM CC LD AR CFLAGS V BISON FLEX AWK build := -f $(srctree)/tools/build/Makefile.build dir=. obj $(PERF_IN): $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h FORCE @@ -565,7 +572,8 @@ clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean config-clean $(Q)find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete $(Q)$(RM) $(OUTPUT).config-detected $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 - $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* + $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* \ + $(OUTPUT)util/intel-pt-decoder/inat-tables.c $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean $(python-clean) diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build index cfbccc4e3187..a8be9f9d0462 100644 --- a/tools/perf/arch/x86/util/Build +++ b/tools/perf/arch/x86/util/Build @@ -1,8 +1,12 @@ libperf-y += header.o libperf-y += tsc.o +libperf-y += pmu.o libperf-y += kvm-stat.o libperf-$(CONFIG_DWARF) += dwarf-regs.o libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o + +libperf-$(CONFIG_AUXTRACE) += auxtrace.o +libperf-$(CONFIG_AUXTRACE) += intel-pt.o diff --git a/tools/perf/arch/x86/util/auxtrace.c b/tools/perf/arch/x86/util/auxtrace.c new file mode 100644 index 000000000000..e7654b506312 --- /dev/null +++ b/tools/perf/arch/x86/util/auxtrace.c @@ -0,0 +1,38 @@ +/* + * auxtrace.c: AUX area tracing support + * Copyright (c) 2013-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include "../../util/header.h" +#include "../../util/auxtrace.h" +#include "../../util/intel-pt.h" + +struct auxtrace_record *auxtrace_record__init(struct perf_evlist *evlist __maybe_unused, + int *err) +{ + char buffer[64]; + int ret; + + *err = 0; + + ret = get_cpuid(buffer, sizeof(buffer)); + if (ret) { + *err = ret; + return NULL; + } + + if (!strncmp(buffer, "GenuineIntel,", 13)) + return intel_pt_recording_init(err); + + return NULL; +} diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c new file mode 100644 index 000000000000..da7d2c15e611 --- /dev/null +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -0,0 +1,752 @@ +/* + * intel_pt.c: Intel Processor Trace support + * Copyright (c) 2013-2015, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include <stdbool.h> +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/bitops.h> +#include <linux/log2.h> + +#include "../../perf.h" +#include "../../util/session.h" +#include "../../util/event.h" +#include "../../util/evlist.h" +#include "../../util/evsel.h" +#include "../../util/cpumap.h" +#include "../../util/parse-options.h" +#include "../../util/parse-events.h" +#include "../../util/pmu.h" +#include "../../util/debug.h" +#include "../../util/auxtrace.h" +#include "../../util/tsc.h" +#include "../../util/intel-pt.h" + +#define KiB(x) ((x) * 1024) +#define MiB(x) ((x) * 1024 * 1024) +#define KiB_MASK(x) (KiB(x) - 1) +#define MiB_MASK(x) (MiB(x) - 1) + +#define INTEL_PT_DEFAULT_SAMPLE_SIZE KiB(4) + +#define INTEL_PT_MAX_SAMPLE_SIZE KiB(60) + +#define INTEL_PT_PSB_PERIOD_NEAR 256 + +struct intel_pt_snapshot_ref { + void *ref_buf; + size_t ref_offset; + bool wrapped; +}; + +struct intel_pt_recording { + struct auxtrace_record itr; + struct perf_pmu *intel_pt_pmu; + int have_sched_switch; + struct perf_evlist *evlist; + bool snapshot_mode; + bool snapshot_init_done; + size_t snapshot_size; + size_t snapshot_ref_buf_size; + int snapshot_ref_cnt; + struct intel_pt_snapshot_ref *snapshot_refs; +}; + +static int intel_pt_parse_terms_with_default(struct list_head *formats, + const char *str, + u64 *config) +{ + struct list_head *terms; + struct perf_event_attr attr = { .size = 0, }; + int err; + + terms = malloc(sizeof(struct list_head)); + if (!terms) + return -ENOMEM; + + INIT_LIST_HEAD(terms); + + err = parse_events_terms(terms, str); + if (err) + goto out_free; + + attr.config = *config; + err = perf_pmu__config_terms(formats, &attr, terms, true, NULL); + if (err) + goto out_free; + + *config = attr.config; +out_free: + parse_events__free_terms(terms); + return err; +} + +static int intel_pt_parse_terms(struct list_head *formats, const char *str, + u64 *config) +{ + *config = 0; + return intel_pt_parse_terms_with_default(formats, str, config); +} + +static size_t intel_pt_psb_period(struct perf_pmu *intel_pt_pmu __maybe_unused, + struct perf_evlist *evlist __maybe_unused) +{ + return 256; +} + +static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu) +{ + u64 config; + + intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &config); + return config; +} + +static int intel_pt_parse_snapshot_options(struct auxtrace_record *itr, + struct record_opts *opts, + const char *str) +{ + struct intel_pt_recording *ptr = + container_of(itr, struct intel_pt_recording, itr); + unsigned long long snapshot_size = 0; + char *endptr; + + if (str) { + snapshot_size = strtoull(str, &endptr, 0); + if (*endptr || snapshot_size > SIZE_MAX) + return -1; + } + + opts->auxtrace_snapshot_mode = true; + opts->auxtrace_snapshot_size = snapshot_size; + + ptr->snapshot_size = snapshot_size; + + return 0; +} + +struct perf_event_attr * +intel_pt_pmu_default_config(struct perf_pmu *intel_pt_pmu) +{ + struct perf_event_attr *attr; + + attr = zalloc(sizeof(struct perf_event_attr)); + if (!attr) + return NULL; + + attr->config = intel_pt_default_config(intel_pt_pmu); + + intel_pt_pmu->selectable = true; + + return attr; +} + +static size_t intel_pt_info_priv_size(struct auxtrace_record *itr __maybe_unused) +{ + return INTEL_PT_AUXTRACE_PRIV_SIZE; +} + +static int intel_pt_info_fill(struct auxtrace_record *itr, + struct perf_session *session, + struct auxtrace_info_event *auxtrace_info, + size_t priv_size) +{ + struct intel_pt_recording *ptr = + container_of(itr, struct intel_pt_recording, itr); + struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu; + struct perf_event_mmap_page *pc; + struct perf_tsc_conversion tc = { .time_mult = 0, }; + bool cap_user_time_zero = false, per_cpu_mmaps; + u64 tsc_bit, noretcomp_bit; + int err; + + if (priv_size != INTEL_PT_AUXTRACE_PRIV_SIZE) + return -EINVAL; + + intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit); + intel_pt_parse_terms(&intel_pt_pmu->format, "noretcomp", + &noretcomp_bit); + + if (!session->evlist->nr_mmaps) + return -EINVAL; + + pc = session->evlist->mmap[0].base; + if (pc) { + err = perf_read_tsc_conversion(pc, &tc); + if (err) { + if (err != -EOPNOTSUPP) + return err; + } else { + cap_user_time_zero = tc.time_mult != 0; + } + if (!cap_user_time_zero) + ui__warning("Intel Processor Trace: TSC not available\n"); + } + + per_cpu_mmaps = !cpu_map__empty(session->evlist->cpus); + + auxtrace_info->type = PERF_AUXTRACE_INTEL_PT; + auxtrace_info->priv[INTEL_PT_PMU_TYPE] = intel_pt_pmu->type; + auxtrace_info->priv[INTEL_PT_TIME_SHIFT] = tc.time_shift; + auxtrace_info->priv[INTEL_PT_TIME_MULT] = tc.time_mult; + auxtrace_info->priv[INTEL_PT_TIME_ZERO] = tc.time_zero; + auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO] = cap_user_time_zero; + auxtrace_info->priv[INTEL_PT_TSC_BIT] = tsc_bit; + auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT] = noretcomp_bit; + auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH] = ptr->have_sched_switch; + auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE] = ptr->snapshot_mode; + auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS] = per_cpu_mmaps; + + return 0; +} + +static int intel_pt_track_switches(struct perf_evlist *evlist) +{ + const char *sched_switch = "sched:sched_switch"; + struct perf_evsel *evsel; + int err; + + if (!perf_evlist__can_select_event(evlist, sched_switch)) + return -EPERM; + + err = parse_events(evlist, sched_switch, NULL); + if (err) { + pr_debug2("%s: failed to parse %s, error %d\n", + __func__, sched_switch, err); + return err; + } + + evsel = perf_evlist__last(evlist); + + perf_evsel__set_sample_bit(evsel, CPU); + perf_evsel__set_sample_bit(evsel, TIME); + + evsel->system_wide = true; + evsel->no_aux_samples = true; + evsel->immediate = true; + + return 0; +} + +static int intel_pt_recording_options(struct auxtrace_record *itr, + struct perf_evlist *evlist, + struct record_opts *opts) +{ + struct intel_pt_recording *ptr = + container_of(itr, struct intel_pt_recording, itr); + struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu; + bool have_timing_info; + struct perf_evsel *evsel, *intel_pt_evsel = NULL; + const struct cpu_map *cpus = evlist->cpus; + bool privileged = geteuid() == 0 || perf_event_paranoid() < 0; + u64 tsc_bit; + + ptr->evlist = evlist; + ptr->snapshot_mode = opts->auxtrace_snapshot_mode; + + evlist__for_each(evlist, evsel) { + if (evsel->attr.type == intel_pt_pmu->type) { + if (intel_pt_evsel) { + pr_err("There may be only one " INTEL_PT_PMU_NAME " event\n"); + return -EINVAL; + } + evsel->attr.freq = 0; + evsel->attr.sample_period = 1; + intel_pt_evsel = evsel; + opts->full_auxtrace = true; + } + } + + if (opts->auxtrace_snapshot_mode && !opts->full_auxtrace) { + pr_err("Snapshot mode (-S option) requires " INTEL_PT_PMU_NAME " PMU event (-e " INTEL_PT_PMU_NAME ")\n"); + return -EINVAL; + } + + if (opts->use_clockid) { + pr_err("Cannot use clockid (-k option) with " INTEL_PT_PMU_NAME "\n"); + return -EINVAL; + } + + if (!opts->full_auxtrace) + return 0; + + /* Set default sizes for snapshot mode */ + if (opts->auxtrace_snapshot_mode) { + size_t psb_period = intel_pt_psb_period(intel_pt_pmu, evlist); + + if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) { + if (privileged) { + opts->auxtrace_mmap_pages = MiB(4) / page_size; + } else { + opts->auxtrace_mmap_pages = KiB(128) / page_size; + if (opts->mmap_pages == UINT_MAX) + opts->mmap_pages = KiB(256) / page_size; + } + } else if (!opts->auxtrace_mmap_pages && !privileged && + opts->mmap_pages == UINT_MAX) { + opts->mmap_pages = KiB(256) / page_size; + } + if (!opts->auxtrace_snapshot_size) + opts->auxtrace_snapshot_size = + opts->auxtrace_mmap_pages * (size_t)page_size; + if (!opts->auxtrace_mmap_pages) { + size_t sz = opts->auxtrace_snapshot_size; + + sz = round_up(sz, page_size) / page_size; + opts->auxtrace_mmap_pages = roundup_pow_of_two(sz); + } + if (opts->auxtrace_snapshot_size > + opts->auxtrace_mmap_pages * (size_t)page_size) { + pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n", + opts->auxtrace_snapshot_size, + opts->auxtrace_mmap_pages * (size_t)page_size); + return -EINVAL; + } + if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) { + pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n"); + return -EINVAL; + } + pr_debug2("Intel PT snapshot size: %zu\n", + opts->auxtrace_snapshot_size); + if (psb_period && + opts->auxtrace_snapshot_size <= psb_period + + INTEL_PT_PSB_PERIOD_NEAR) + ui__warning("Intel PT snapshot size (%zu) may be too small for PSB period (%zu)\n", + opts->auxtrace_snapshot_size, psb_period); + } + + /* Set default sizes for full trace mode */ + if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) { + if (privileged) { + opts->auxtrace_mmap_pages = MiB(4) / page_size; + } else { + opts->auxtrace_mmap_pages = KiB(128) / page_size; + if (opts->mmap_pages == UINT_MAX) + opts->mmap_pages = KiB(256) / page_size; + } + } + + /* Validate auxtrace_mmap_pages */ + if (opts->auxtrace_mmap_pages) { + size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size; + size_t min_sz; + + if (opts->auxtrace_snapshot_mode) + min_sz = KiB(4); + else + min_sz = KiB(8); + + if (sz < min_sz || !is_power_of_2(sz)) { + pr_err("Invalid mmap size for Intel Processor Trace: must be at least %zuKiB and a power of 2\n", + min_sz / 1024); + return -EINVAL; + } + } + + intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit); + + if (opts->full_auxtrace && (intel_pt_evsel->attr.config & tsc_bit)) + have_timing_info = true; + else + have_timing_info = false; + + /* + * Per-cpu recording needs sched_switch events to distinguish different + * threads. + */ + if (have_timing_info && !cpu_map__empty(cpus)) { + int err; + + err = intel_pt_track_switches(evlist); + if (err == -EPERM) + pr_debug2("Unable to select sched:sched_switch\n"); + else if (err) + return err; + else + ptr->have_sched_switch = 1; + } + + if (intel_pt_evsel) { + /* + * To obtain the auxtrace buffer file descriptor, the auxtrace + * event must come first. + */ + perf_evlist__to_front(evlist, intel_pt_evsel); + /* + * In the case of per-cpu mmaps, we need the CPU on the + * AUX event. + */ + if (!cpu_map__empty(cpus)) + perf_evsel__set_sample_bit(intel_pt_evsel, CPU); + } + + /* Add dummy event to keep tracking */ + if (opts->full_auxtrace) { + struct perf_evsel *tracking_evsel; + int err; + + err = parse_events(evlist, "dummy:u", NULL); + if (err) + return err; + + tracking_evsel = perf_evlist__last(evlist); + + perf_evlist__set_tracking_event(evlist, tracking_evsel); + + tracking_evsel->attr.freq = 0; + tracking_evsel->attr.sample_period = 1; + + /* In per-cpu case, always need the time of mmap events etc */ + if (!cpu_map__empty(cpus)) + perf_evsel__set_sample_bit(tracking_evsel, TIME); + } + + /* + * Warn the user when we do not have enough information to decode i.e. + * per-cpu with no sched_switch (except workload-only). + */ + if (!ptr->have_sched_switch && !cpu_map__empty(cpus) && + !target__none(&opts->target)) + ui__warning("Intel Processor Trace decoding will not be possible except for kernel tracing!\n"); + + return 0; +} + +static int intel_pt_snapshot_start(struct auxtrace_record *itr) +{ + struct intel_pt_recording *ptr = + container_of(itr, struct intel_pt_recording, itr); + struct perf_evsel *evsel; + + evlist__for_each(ptr->evlist, evsel) { + if (evsel->attr.type == ptr->intel_pt_pmu->type) + return perf_evlist__disable_event(ptr->evlist, evsel); + } + return -EINVAL; +} + +static int intel_pt_snapshot_finish(struct auxtrace_record *itr) +{ + struct intel_pt_recording *ptr = + container_of(itr, struct intel_pt_recording, itr); + struct perf_evsel *evsel; + + evlist__for_each(ptr->evlist, evsel) { + if (evsel->attr.type == ptr->intel_pt_pmu->type) + return perf_evlist__enable_event(ptr->evlist, evsel); + } + return -EINVAL; +} + +static int intel_pt_alloc_snapshot_refs(struct intel_pt_recording *ptr, int idx) +{ + const size_t sz = sizeof(struct intel_pt_snapshot_ref); + int cnt = ptr->snapshot_ref_cnt, new_cnt = cnt * 2; + struct intel_pt_snapshot_ref *refs; + + if (!new_cnt) + new_cnt = 16; + + while (new_cnt <= idx) + new_cnt *= 2; + + refs = calloc(new_cnt, sz); + if (!refs) + return -ENOMEM; + + memcpy(refs, ptr->snapshot_refs, cnt * sz); + + ptr->snapshot_refs = refs; + ptr->snapshot_ref_cnt = new_cnt; + + return 0; +} + +static void intel_pt_free_snapshot_refs(struct intel_pt_recording *ptr) +{ + int i; + + for (i = 0; i < ptr->snapshot_ref_cnt; i++) + zfree(&ptr->snapshot_refs[i].ref_buf); + zfree(&ptr->snapshot_refs); +} + +static void intel_pt_recording_free(struct auxtrace_record *itr) +{ + struct intel_pt_recording *ptr = + container_of(itr, struct intel_pt_recording, itr); + + intel_pt_free_snapshot_refs(ptr); + free(ptr); +} + +static int intel_pt_alloc_snapshot_ref(struct intel_pt_recording *ptr, int idx, + size_t snapshot_buf_size) +{ + size_t ref_buf_size = ptr->snapshot_ref_buf_size; + void *ref_buf; + + ref_buf = zalloc(ref_buf_size); + if (!ref_buf) + return -ENOMEM; + + ptr->snapshot_refs[idx].ref_buf = ref_buf; + ptr->snapshot_refs[idx].ref_offset = snapshot_buf_size - ref_buf_size; + + return 0; +} + +static size_t intel_pt_snapshot_ref_buf_size(struct intel_pt_recording *ptr, + size_t snapshot_buf_size) +{ + const size_t max_size = 256 * 1024; + size_t buf_size = 0, psb_period; + + if (ptr->snapshot_size <= 64 * 1024) + return 0; + + psb_period = intel_pt_psb_period(ptr->intel_pt_pmu, ptr->evlist); + if (psb_period) + buf_size = psb_period * 2; + + if (!buf_size || buf_size > max_size) + buf_size = max_size; + + if (buf_size >= snapshot_buf_size) + return 0; + + if (buf_size >= ptr->snapshot_size / 2) + return 0; + + return buf_size; +} + +static int intel_pt_snapshot_init(struct intel_pt_recording *ptr, + size_t snapshot_buf_size) +{ + if (ptr->snapshot_init_done) + return 0; + + ptr->snapshot_init_done = true; + + ptr->snapshot_ref_buf_size = intel_pt_snapshot_ref_buf_size(ptr, + snapshot_buf_size); + + return 0; +} + +/** + * intel_pt_compare_buffers - compare bytes in a buffer to a circular buffer. + * @buf1: first buffer + * @compare_size: number of bytes to compare + * @buf2: second buffer (a circular buffer) + * @offs2: offset in second buffer + * @buf2_size: size of second buffer + * + * The comparison allows for the possibility that the bytes to compare in the + * circular buffer are not contiguous. It is assumed that @compare_size <= + * @buf2_size. This function returns %false if the bytes are identical, %true + * otherwise. + */ +static bool intel_pt_compare_buffers(void *buf1, size_t compare_size, + void *buf2, size_t offs2, size_t buf2_size) +{ + size_t end2 = offs2 + compare_size, part_size; + + if (end2 <= buf2_size) + return memcmp(buf1, buf2 + offs2, compare_size); + + part_size = end2 - buf2_size; + if (memcmp(buf1, buf2 + offs2, part_size)) + return true; + + compare_size -= part_size; + + return memcmp(buf1 + part_size, buf2, compare_size); +} + +static bool intel_pt_compare_ref(void *ref_buf, size_t ref_offset, + size_t ref_size, size_t buf_size, + void *data, size_t head) +{ + size_t ref_end = ref_offset + ref_size; + + if (ref_end > buf_size) { + if (head > ref_offset || head < ref_end - buf_size) + return true; + } else if (head > ref_offset && head < ref_end) { + return true; + } + + return intel_pt_compare_buffers(ref_buf, ref_size, data, ref_offset, + buf_size); +} + +static void intel_pt_copy_ref(void *ref_buf, size_t ref_size, size_t buf_size, + void *data, size_t head) +{ + if (head >= ref_size) { + memcpy(ref_buf, data + head - ref_size, ref_size); + } else { + memcpy(ref_buf, data, head); + ref_size -= head; + memcpy(ref_buf + head, data + buf_size - ref_size, ref_size); + } +} + +static bool intel_pt_wrapped(struct intel_pt_recording *ptr, int idx, + struct auxtrace_mmap *mm, unsigned char *data, + u64 head) +{ + struct intel_pt_snapshot_ref *ref = &ptr->snapshot_refs[idx]; + bool wrapped; + + wrapped = intel_pt_compare_ref(ref->ref_buf, ref->ref_offset, + ptr->snapshot_ref_buf_size, mm->len, + data, head); + + intel_pt_copy_ref(ref->ref_buf, ptr->snapshot_ref_buf_size, mm->len, + data, head); + + return wrapped; +} + +static bool intel_pt_first_wrap(u64 *data, size_t buf_size) +{ + int i, a, b; + + b = buf_size >> 3; + a = b - 512; + if (a < 0) + a = 0; + + for (i = a; i < b; i++) { + if (data[i]) + return true; + } + + return false; +} + +static int intel_pt_find_snapshot(struct auxtrace_record *itr, int idx, + struct auxtrace_mmap *mm, unsigned char *data, + u64 *head, u64 *old) +{ + struct intel_pt_recording *ptr = + container_of(itr, struct intel_pt_recording, itr); + bool wrapped; + int err; + + pr_debug3("%s: mmap index %d old head %zu new head %zu\n", + __func__, idx, (size_t)*old, (size_t)*head); + + err = intel_pt_snapshot_init(ptr, mm->len); + if (err) + goto out_err; + + if (idx >= ptr->snapshot_ref_cnt) { + err = intel_pt_alloc_snapshot_refs(ptr, idx); + if (err) + goto out_err; + } + + if (ptr->snapshot_ref_buf_size) { + if (!ptr->snapshot_refs[idx].ref_buf) { + err = intel_pt_alloc_snapshot_ref(ptr, idx, mm->len); + if (err) + goto out_err; + } + wrapped = intel_pt_wrapped(ptr, idx, mm, data, *head); + } else { + wrapped = ptr->snapshot_refs[idx].wrapped; + if (!wrapped && intel_pt_first_wrap((u64 *)data, mm->len)) { + ptr->snapshot_refs[idx].wrapped = true; + wrapped = true; + } + } + + /* + * In full trace mode 'head' continually increases. However in snapshot + * mode 'head' is an offset within the buffer. Here 'old' and 'head' + * are adjusted to match the full trace case which expects that 'old' is + * always less than 'head'. + */ + if (wrapped) { + *old = *head; + *head += mm->len; + } else { + if (mm->mask) + *old &= mm->mask; + else + *old %= mm->len; + if (*old > *head) + *head += mm->len; + } + + pr_debug3("%s: wrap-around %sdetected, adjusted old head %zu adjusted new head %zu\n", + __func__, wrapped ? "" : "not ", (size_t)*old, (size_t)*head); + + return 0; + +out_err: + pr_err("%s: failed, error %d\n", __func__, err); + return err; +} + +static u64 intel_pt_reference(struct auxtrace_record *itr __maybe_unused) +{ + return rdtsc(); +} + +static int intel_pt_read_finish(struct auxtrace_record *itr, int idx) +{ + struct intel_pt_recording *ptr = + container_of(itr, struct intel_pt_recording, itr); + struct perf_evsel *evsel; + + evlist__for_each(ptr->evlist, evsel) { + if (evsel->attr.type == ptr->intel_pt_pmu->type) + return perf_evlist__enable_event_idx(ptr->evlist, evsel, + idx); + } + return -EINVAL; +} + +struct auxtrace_record *intel_pt_recording_init(int *err) +{ + struct perf_pmu *intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME); + struct intel_pt_recording *ptr; + + if (!intel_pt_pmu) + return NULL; + + ptr = zalloc(sizeof(struct intel_pt_recording)); + if (!ptr) { + *err = -ENOMEM; + return NULL; + } + + ptr->intel_pt_pmu = intel_pt_pmu; + ptr->itr.recording_options = intel_pt_recording_options; + ptr->itr.info_priv_size = intel_pt_info_priv_size; + ptr->itr.info_fill = intel_pt_info_fill; + ptr->itr.free = intel_pt_recording_free; + ptr->itr.snapshot_start = intel_pt_snapshot_start; + ptr->itr.snapshot_finish = intel_pt_snapshot_finish; + ptr->itr.find_snapshot = intel_pt_find_snapshot; + ptr->itr.parse_snapshot_options = intel_pt_parse_snapshot_options; + ptr->itr.reference = intel_pt_reference; + ptr->itr.read_finish = intel_pt_read_finish; + return &ptr->itr; +} diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c new file mode 100644 index 000000000000..fd11cc3ce780 --- /dev/null +++ b/tools/perf/arch/x86/util/pmu.c @@ -0,0 +1,15 @@ +#include <string.h> + +#include <linux/perf_event.h> + +#include "../../util/intel-pt.h" +#include "../../util/pmu.h" + +struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused) +{ +#ifdef HAVE_AUXTRACE_SUPPORT + if (!strcmp(pmu->name, INTEL_PT_PMU_NAME)) + return intel_pt_pmu_default_config(pmu); +#endif + return NULL; +} diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 7b376d215e94..105332e950a9 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1561,6 +1561,22 @@ static int have_cmd(int argc, const char **argv) return 0; } +static void script__setup_sample_type(struct perf_script *script) +{ + struct perf_session *session = script->session; + u64 sample_type = perf_evlist__combined_sample_type(session->evlist); + + if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) { + if ((sample_type & PERF_SAMPLE_REGS_USER) && + (sample_type & PERF_SAMPLE_STACK_USER)) + callchain_param.record_mode = CALLCHAIN_DWARF; + else if (sample_type & PERF_SAMPLE_BRANCH_STACK) + callchain_param.record_mode = CALLCHAIN_LBR; + else + callchain_param.record_mode = CALLCHAIN_FP; + } +} + int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) { bool show_full_info = false; @@ -1849,6 +1865,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) goto out_delete; script.session = session; + script__setup_sample_type(&script); session->itrace_synth_opts = &itrace_synth_opts; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 489cc118a36a..2f1162daa3c5 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1315,7 +1315,10 @@ struct thread_trace { double runtime_ms; struct { unsigned long ptr; - int entry_str_pos; + short int entry_str_pos; + bool pending_open; + unsigned int namelen; + char *name; } filename; struct { int max; @@ -1391,7 +1394,6 @@ struct trace { size_t nr; int *entries; } ev_qualifier_ids; - const char *last_vfs_getname; struct intlist *tid_list; struct intlist *pid_list; struct { @@ -1966,8 +1968,11 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output); fprintf(trace->output, "%-70s\n", ttrace->entry_str); } - } else + } else { ttrace->entry_pending = true; + /* See trace__vfs_getname & trace__sys_exit */ + ttrace->filename.pending_open = false; + } if (trace->current != thread) { thread__put(trace->current); @@ -2003,9 +2008,9 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, ret = perf_evsel__sc_tp_uint(evsel, ret, sample); - if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) { - trace__set_fd_pathname(thread, ret, trace->last_vfs_getname); - trace->last_vfs_getname = NULL; + if (id == trace->audit.open_id && ret >= 0 && ttrace->filename.pending_open) { + trace__set_fd_pathname(thread, ret, ttrace->filename.name); + ttrace->filename.pending_open = false; ++trace->stats.vfs_getname; } @@ -2065,9 +2070,7 @@ static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel, size_t filename_len, entry_str_len, to_move; ssize_t remaining_space; char *pos; - const char *filename; - - trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname"); + const char *filename = perf_evsel__rawptr(evsel, sample, "pathname"); if (!thread) goto out; @@ -2076,6 +2079,21 @@ static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel, if (!ttrace) goto out; + filename_len = strlen(filename); + + if (ttrace->filename.namelen < filename_len) { + char *f = realloc(ttrace->filename.name, filename_len + 1); + + if (f == NULL) + goto out; + + ttrace->filename.namelen = filename_len; + ttrace->filename.name = f; + } + + strcpy(ttrace->filename.name, filename); + ttrace->filename.pending_open = true; + if (!ttrace->filename.ptr) goto out; @@ -2084,8 +2102,6 @@ static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel, if (remaining_space <= 0) goto out; - filename = trace->last_vfs_getname; - filename_len = strlen(filename); if (filename_len > (size_t)remaining_space) { filename += filename_len - remaining_space; filename_len = remaining_space; diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 823195aa6d4b..827557fc7511 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -297,7 +297,11 @@ ifndef NO_LIBELF else CFLAGS += -DHAVE_DWARF_SUPPORT $(LIBDW_CFLAGS) LDFLAGS += $(LIBDW_LDFLAGS) - EXTLIBS += -ldw + DWARFLIBS := -ldw + ifeq ($(findstring -static,${LDFLAGS}),-static) + DWARFLIBS += -lelf -lebl -lz -llzma -lbz2 + endif + EXTLIBS += ${DWARFLIBS} $(call detected,CONFIG_DWARF) endif # PERF_HAVE_DWARF_REGS endif # NO_DWARF diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 1ce0adc8b3cb..c20473d1369e 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -78,6 +78,8 @@ libperf-$(CONFIG_X86) += tsc.o libperf-y += cloexec.o libperf-y += thread-stack.o libperf-$(CONFIG_AUXTRACE) += auxtrace.o +libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/ +libperf-$(CONFIG_AUXTRACE) += intel-pt.o libperf-y += parse-branch-options.o libperf-$(CONFIG_LIBELF) += symbol-elf.o diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index e0b614648044..8a18347709e1 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -621,7 +621,7 @@ int addr_map_symbol__account_cycles(struct addr_map_symbol *ams, struct addr_map_symbol *start, unsigned cycles) { - unsigned long saddr = 0; + u64 saddr = 0; int err; if (!cycles) @@ -640,7 +640,7 @@ int addr_map_symbol__account_cycles(struct addr_map_symbol *ams, start->addr == ams->sym->start + ams->map->start))) saddr = start->al_addr; if (saddr == 0) - pr_debug2("BB with bad start: addr %lx start %lx sym %lx saddr %lx\n", + pr_debug2("BB with bad start: addr %"PRIx64" start %"PRIx64" sym %"PRIx64" saddr %"PRIx64"\n", ams->addr, start ? start->addr : 0, ams->sym ? ams->sym->start + ams->map->start : 0, diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index a25b3609cef8..0f0b7e11e2d9 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -47,6 +47,8 @@ #include "debug.h" #include "parse-options.h" +#include "intel-pt.h" + int auxtrace_mmap__mmap(struct auxtrace_mmap *mm, struct auxtrace_mmap_params *mp, void *userpg, int fd) @@ -876,7 +878,7 @@ static bool auxtrace__dont_decode(struct perf_session *session) int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused, union perf_event *event, - struct perf_session *session __maybe_unused) + struct perf_session *session) { enum auxtrace_type type = event->auxtrace_info.type; @@ -884,6 +886,8 @@ int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused, fprintf(stdout, " type: %u\n", type); switch (type) { + case PERF_AUXTRACE_INTEL_PT: + return intel_pt_process_auxtrace_info(event, session); case PERF_AUXTRACE_UNKNOWN: default: return -EINVAL; diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 471aecbc4d68..7d12f33a3a06 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -39,6 +39,7 @@ struct events_stats; enum auxtrace_type { PERF_AUXTRACE_UNKNOWN, + PERF_AUXTRACE_INTEL_PT, }; enum itrace_period_type { diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 445f455dd377..a509aa8433a1 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -770,7 +770,7 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data) continue; } /* Filter lines based on address */ - if (rt_die != cu_die) + if (rt_die != cu_die) { /* * Address filtering * The line is included in given function, and @@ -784,6 +784,7 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data) decf != dwarf_decl_file(&die_mem)) continue; } + } /* Get source line */ fname = dwarf_linesrc(line, NULL, NULL); diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 397757063da1..436e358300b1 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -115,6 +115,7 @@ void perf_evlist__close(struct perf_evlist *evlist); void perf_evlist__set_id_pos(struct perf_evlist *evlist); bool perf_can_sample_identifier(void); bool perf_can_record_switch_events(void); +bool perf_can_record_cpu_wide(void); void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts); int record_opts__config(struct record_opts *opts); diff --git a/tools/perf/util/intel-pt-decoder/Build b/tools/perf/util/intel-pt-decoder/Build new file mode 100644 index 000000000000..240730d682c1 --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/Build @@ -0,0 +1,11 @@ +libperf-$(CONFIG_AUXTRACE) += intel-pt-pkt-decoder.o intel-pt-insn-decoder.o intel-pt-log.o intel-pt-decoder.o + +inat_tables_script = util/intel-pt-decoder/gen-insn-attr-x86.awk +inat_tables_maps = util/intel-pt-decoder/x86-opcode-map.txt + +$(OUTPUT)util/intel-pt-decoder/inat-tables.c: $(inat_tables_script) $(inat_tables_maps) + @$(call echo-cmd,gen)$(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ || rm -f $@ + +$(OUTPUT)util/intel-pt-decoder/intel-pt-insn-decoder.o: util/intel-pt-decoder/inat.c $(OUTPUT)util/intel-pt-decoder/inat-tables.c + +CFLAGS_intel-pt-insn-decoder.o += -I$(OUTPUT)util/intel-pt-decoder -Wno-override-init diff --git a/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk b/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk new file mode 100644 index 000000000000..517567347aac --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk @@ -0,0 +1,386 @@ +#!/bin/awk -f +# gen-insn-attr-x86.awk: Instruction attribute table generator +# Written by Masami Hiramatsu <mhiramat@redhat.com> +# +# Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c + +# Awk implementation sanity check +function check_awk_implement() { + if (sprintf("%x", 0) != "0") + return "Your awk has a printf-format problem." + return "" +} + +# Clear working vars +function clear_vars() { + delete table + delete lptable2 + delete lptable1 + delete lptable3 + eid = -1 # escape id + gid = -1 # group id + aid = -1 # AVX id + tname = "" +} + +BEGIN { + # Implementation error checking + awkchecked = check_awk_implement() + if (awkchecked != "") { + print "Error: " awkchecked > "/dev/stderr" + print "Please try to use gawk." > "/dev/stderr" + exit 1 + } + + # Setup generating tables + print "/* x86 opcode map generated from x86-opcode-map.txt */" + print "/* Do not change this code. */\n" + ggid = 1 + geid = 1 + gaid = 0 + delete etable + delete gtable + delete atable + + opnd_expr = "^[A-Za-z/]" + ext_expr = "^\\(" + sep_expr = "^\\|$" + group_expr = "^Grp[0-9A-Za-z]+" + + imm_expr = "^[IJAOL][a-z]" + imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" + imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" + imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)" + imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)" + imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)" + imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)" + imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" + imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" + imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)" + imm_flag["Ob"] = "INAT_MOFFSET" + imm_flag["Ov"] = "INAT_MOFFSET" + imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" + + modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])" + force64_expr = "\\([df]64\\)" + rex_expr = "^REX(\\.[XRWB]+)*" + fpu_expr = "^ESC" # TODO + + lprefix1_expr = "\\((66|!F3)\\)" + lprefix2_expr = "\\(F3\\)" + lprefix3_expr = "\\((F2|!F3|66\\&F2)\\)" + lprefix_expr = "\\((66|F2|F3)\\)" + max_lprefix = 4 + + # All opcodes starting with lower-case 'v' or with (v1) superscript + # accepts VEX prefix + vexok_opcode_expr = "^v.*" + vexok_expr = "\\(v1\\)" + # All opcodes with (v) superscript supports *only* VEX prefix + vexonly_expr = "\\(v\\)" + + prefix_expr = "\\(Prefix\\)" + prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" + prefix_num["REPNE"] = "INAT_PFX_REPNE" + prefix_num["REP/REPE"] = "INAT_PFX_REPE" + prefix_num["XACQUIRE"] = "INAT_PFX_REPNE" + prefix_num["XRELEASE"] = "INAT_PFX_REPE" + prefix_num["LOCK"] = "INAT_PFX_LOCK" + prefix_num["SEG=CS"] = "INAT_PFX_CS" + prefix_num["SEG=DS"] = "INAT_PFX_DS" + prefix_num["SEG=ES"] = "INAT_PFX_ES" + prefix_num["SEG=FS"] = "INAT_PFX_FS" + prefix_num["SEG=GS"] = "INAT_PFX_GS" + prefix_num["SEG=SS"] = "INAT_PFX_SS" + prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ" + prefix_num["VEX+1byte"] = "INAT_PFX_VEX2" + prefix_num["VEX+2byte"] = "INAT_PFX_VEX3" + + clear_vars() +} + +function semantic_error(msg) { + print "Semantic error at " NR ": " msg > "/dev/stderr" + exit 1 +} + +function debug(msg) { + print "DEBUG: " msg +} + +function array_size(arr, i,c) { + c = 0 + for (i in arr) + c++ + return c +} + +/^Table:/ { + print "/* " $0 " */" + if (tname != "") + semantic_error("Hit Table: before EndTable:."); +} + +/^Referrer:/ { + if (NF != 1) { + # escape opcode table + ref = "" + for (i = 2; i <= NF; i++) + ref = ref $i + eid = escape[ref] + tname = sprintf("inat_escape_table_%d", eid) + } +} + +/^AVXcode:/ { + if (NF != 1) { + # AVX/escape opcode table + aid = $2 + if (gaid <= aid) + gaid = aid + 1 + if (tname == "") # AVX only opcode table + tname = sprintf("inat_avx_table_%d", $2) + } + if (aid == -1 && eid == -1) # primary opcode table + tname = "inat_primary_table" +} + +/^GrpTable:/ { + print "/* " $0 " */" + if (!($2 in group)) + semantic_error("No group: " $2 ) + gid = group[$2] + tname = "inat_group_table_" gid +} + +function print_table(tbl,name,fmt,n) +{ + print "const insn_attr_t " name " = {" + for (i = 0; i < n; i++) { + id = sprintf(fmt, i) + if (tbl[id]) + print " [" id "] = " tbl[id] "," + } + print "};" +} + +/^EndTable/ { + if (gid != -1) { + # print group tables + if (array_size(table) != 0) { + print_table(table, tname "[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,0] = tname + } + if (array_size(lptable1) != 0) { + print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,1] = tname "_1" + } + if (array_size(lptable2) != 0) { + print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,2] = tname "_2" + } + if (array_size(lptable3) != 0) { + print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,3] = tname "_3" + } + } else { + # print primary/escaped tables + if (array_size(table) != 0) { + print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,0] = tname + if (aid >= 0) + atable[aid,0] = tname + } + if (array_size(lptable1) != 0) { + print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,1] = tname "_1" + if (aid >= 0) + atable[aid,1] = tname "_1" + } + if (array_size(lptable2) != 0) { + print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,2] = tname "_2" + if (aid >= 0) + atable[aid,2] = tname "_2" + } + if (array_size(lptable3) != 0) { + print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,3] = tname "_3" + if (aid >= 0) + atable[aid,3] = tname "_3" + } + } + print "" + clear_vars() +} + +function add_flags(old,new) { + if (old && new) + return old " | " new + else if (old) + return old + else + return new +} + +# convert operands to flags. +function convert_operands(count,opnd, i,j,imm,mod) +{ + imm = null + mod = null + for (j = 1; j <= count; j++) { + i = opnd[j] + if (match(i, imm_expr) == 1) { + if (!imm_flag[i]) + semantic_error("Unknown imm opnd: " i) + if (imm) { + if (i != "Ib") + semantic_error("Second IMM error") + imm = add_flags(imm, "INAT_SCNDIMM") + } else + imm = imm_flag[i] + } else if (match(i, modrm_expr)) + mod = "INAT_MODRM" + } + return add_flags(imm, mod) +} + +/^[0-9a-f]+\:/ { + if (NR == 1) + next + # get index + idx = "0x" substr($1, 1, index($1,":") - 1) + if (idx in table) + semantic_error("Redefine " idx " in " tname) + + # check if escaped opcode + if ("escape" == $2) { + if ($3 != "#") + semantic_error("No escaped name") + ref = "" + for (i = 4; i <= NF; i++) + ref = ref $i + if (ref in escape) + semantic_error("Redefine escape (" ref ")") + escape[ref] = geid + geid++ + table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")" + next + } + + variant = null + # converts + i = 2 + while (i <= NF) { + opcode = $(i++) + delete opnds + ext = null + flags = null + opnd = null + # parse one opcode + if (match($i, opnd_expr)) { + opnd = $i + count = split($(i++), opnds, ",") + flags = convert_operands(count, opnds) + } + if (match($i, ext_expr)) + ext = $(i++) + if (match($i, sep_expr)) + i++ + else if (i < NF) + semantic_error($i " is not a separator") + + # check if group opcode + if (match(opcode, group_expr)) { + if (!(opcode in group)) { + group[opcode] = ggid + ggid++ + } + flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")") + } + # check force(or default) 64bit + if (match(ext, force64_expr)) + flags = add_flags(flags, "INAT_FORCE64") + + # check REX prefix + if (match(opcode, rex_expr)) + flags = add_flags(flags, "INAT_MAKE_PREFIX(INAT_PFX_REX)") + + # check coprocessor escape : TODO + if (match(opcode, fpu_expr)) + flags = add_flags(flags, "INAT_MODRM") + + # check VEX codes + if (match(ext, vexonly_expr)) + flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY") + else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr)) + flags = add_flags(flags, "INAT_VEXOK") + + # check prefixes + if (match(ext, prefix_expr)) { + if (!prefix_num[opcode]) + semantic_error("Unknown prefix: " opcode) + flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")") + } + if (length(flags) == 0) + continue + # check if last prefix + if (match(ext, lprefix1_expr)) { + lptable1[idx] = add_flags(lptable1[idx],flags) + variant = "INAT_VARIANT" + } + if (match(ext, lprefix2_expr)) { + lptable2[idx] = add_flags(lptable2[idx],flags) + variant = "INAT_VARIANT" + } + if (match(ext, lprefix3_expr)) { + lptable3[idx] = add_flags(lptable3[idx],flags) + variant = "INAT_VARIANT" + } + if (!match(ext, lprefix_expr)){ + table[idx] = add_flags(table[idx],flags) + } + } + if (variant) + table[idx] = add_flags(table[idx],variant) +} + +END { + if (awkchecked != "") + exit 1 + # print escape opcode map's array + print "/* Escape opcode map array */" + print "const insn_attr_t * const inat_escape_tables[INAT_ESC_MAX + 1]" \ + "[INAT_LSTPFX_MAX + 1] = {" + for (i = 0; i < geid; i++) + for (j = 0; j < max_lprefix; j++) + if (etable[i,j]) + print " ["i"]["j"] = "etable[i,j]"," + print "};\n" + # print group opcode map's array + print "/* Group opcode map array */" + print "const insn_attr_t * const inat_group_tables[INAT_GRP_MAX + 1]"\ + "[INAT_LSTPFX_MAX + 1] = {" + for (i = 0; i < ggid; i++) + for (j = 0; j < max_lprefix; j++) + if (gtable[i,j]) + print " ["i"]["j"] = "gtable[i,j]"," + print "};\n" + # print AVX opcode map's array + print "/* AVX opcode map array */" + print "const insn_attr_t * const inat_avx_tables[X86_VEX_M_MAX + 1]"\ + "[INAT_LSTPFX_MAX + 1] = {" + for (i = 0; i < gaid; i++) + for (j = 0; j < max_lprefix; j++) + if (atable[i,j]) + print " ["i"]["j"] = "atable[i,j]"," + print "};" +} diff --git a/tools/perf/util/intel-pt-decoder/inat.c b/tools/perf/util/intel-pt-decoder/inat.c new file mode 100644 index 000000000000..feeaa509dfe4 --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/inat.c @@ -0,0 +1,96 @@ +/* + * x86 instruction attribute tables + * + * Written by Masami Hiramatsu <mhiramat@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ +#include <asm/insn.h> + +/* Attribute tables are generated from opcode map */ +#include "inat-tables.c" + +/* Attribute search APIs */ +insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode) +{ + return inat_primary_table[opcode]; +} + +int inat_get_last_prefix_id(insn_byte_t last_pfx) +{ + insn_attr_t lpfx_attr; + + lpfx_attr = inat_get_opcode_attribute(last_pfx); + return inat_last_prefix_id(lpfx_attr); +} + +insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, int lpfx_id, + insn_attr_t esc_attr) +{ + const insn_attr_t *table; + int n; + + n = inat_escape_id(esc_attr); + + table = inat_escape_tables[n][0]; + if (!table) + return 0; + if (inat_has_variant(table[opcode]) && lpfx_id) { + table = inat_escape_tables[n][lpfx_id]; + if (!table) + return 0; + } + return table[opcode]; +} + +insn_attr_t inat_get_group_attribute(insn_byte_t modrm, int lpfx_id, + insn_attr_t grp_attr) +{ + const insn_attr_t *table; + int n; + + n = inat_group_id(grp_attr); + + table = inat_group_tables[n][0]; + if (!table) + return inat_group_common_attribute(grp_attr); + if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && lpfx_id) { + table = inat_group_tables[n][lpfx_id]; + if (!table) + return inat_group_common_attribute(grp_attr); + } + return table[X86_MODRM_REG(modrm)] | + inat_group_common_attribute(grp_attr); +} + +insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m, + insn_byte_t vex_p) +{ + const insn_attr_t *table; + if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX) + return 0; + /* At first, this checks the master table */ + table = inat_avx_tables[vex_m][0]; + if (!table) + return 0; + if (!inat_is_group(table[opcode]) && vex_p) { + /* If this is not a group, get attribute directly */ + table = inat_avx_tables[vex_m][vex_p]; + if (!table) + return 0; + } + return table[opcode]; +} diff --git a/tools/perf/util/intel-pt-decoder/inat.h b/tools/perf/util/intel-pt-decoder/inat.h new file mode 100644 index 000000000000..74a2e312e8a2 --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/inat.h @@ -0,0 +1,221 @@ +#ifndef _ASM_X86_INAT_H +#define _ASM_X86_INAT_H +/* + * x86 instruction attributes + * + * Written by Masami Hiramatsu <mhiramat@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ +#include <asm/inat_types.h> + +/* + * Internal bits. Don't use bitmasks directly, because these bits are + * unstable. You should use checking functions. + */ + +#define INAT_OPCODE_TABLE_SIZE 256 +#define INAT_GROUP_TABLE_SIZE 8 + +/* Legacy last prefixes */ +#define INAT_PFX_OPNDSZ 1 /* 0x66 */ /* LPFX1 */ +#define INAT_PFX_REPE 2 /* 0xF3 */ /* LPFX2 */ +#define INAT_PFX_REPNE 3 /* 0xF2 */ /* LPFX3 */ +/* Other Legacy prefixes */ +#define INAT_PFX_LOCK 4 /* 0xF0 */ +#define INAT_PFX_CS 5 /* 0x2E */ +#define INAT_PFX_DS 6 /* 0x3E */ +#define INAT_PFX_ES 7 /* 0x26 */ +#define INAT_PFX_FS 8 /* 0x64 */ +#define INAT_PFX_GS 9 /* 0x65 */ +#define INAT_PFX_SS 10 /* 0x36 */ +#define INAT_PFX_ADDRSZ 11 /* 0x67 */ +/* x86-64 REX prefix */ +#define INAT_PFX_REX 12 /* 0x4X */ +/* AVX VEX prefixes */ +#define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */ +#define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */ + +#define INAT_LSTPFX_MAX 3 +#define INAT_LGCPFX_MAX 11 + +/* Immediate size */ +#define INAT_IMM_BYTE 1 +#define INAT_IMM_WORD 2 +#define INAT_IMM_DWORD 3 +#define INAT_IMM_QWORD 4 +#define INAT_IMM_PTR 5 +#define INAT_IMM_VWORD32 6 +#define INAT_IMM_VWORD 7 + +/* Legacy prefix */ +#define INAT_PFX_OFFS 0 +#define INAT_PFX_BITS 4 +#define INAT_PFX_MAX ((1 << INAT_PFX_BITS) - 1) +#define INAT_PFX_MASK (INAT_PFX_MAX << INAT_PFX_OFFS) +/* Escape opcodes */ +#define INAT_ESC_OFFS (INAT_PFX_OFFS + INAT_PFX_BITS) +#define INAT_ESC_BITS 2 +#define INAT_ESC_MAX ((1 << INAT_ESC_BITS) - 1) +#define INAT_ESC_MASK (INAT_ESC_MAX << INAT_ESC_OFFS) +/* Group opcodes (1-16) */ +#define INAT_GRP_OFFS (INAT_ESC_OFFS + INAT_ESC_BITS) +#define INAT_GRP_BITS 5 +#define INAT_GRP_MAX ((1 << INAT_GRP_BITS) - 1) +#define INAT_GRP_MASK (INAT_GRP_MAX << INAT_GRP_OFFS) +/* Immediates */ +#define INAT_IMM_OFFS (INAT_GRP_OFFS + INAT_GRP_BITS) +#define INAT_IMM_BITS 3 +#define INAT_IMM_MASK (((1 << INAT_IMM_BITS) - 1) << INAT_IMM_OFFS) +/* Flags */ +#define INAT_FLAG_OFFS (INAT_IMM_OFFS + INAT_IMM_BITS) +#define INAT_MODRM (1 << (INAT_FLAG_OFFS)) +#define INAT_FORCE64 (1 << (INAT_FLAG_OFFS + 1)) +#define INAT_SCNDIMM (1 << (INAT_FLAG_OFFS + 2)) +#define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 3)) +#define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4)) +#define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5)) +#define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6)) +/* Attribute making macros for attribute tables */ +#define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS) +#define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS) +#define INAT_MAKE_GROUP(grp) ((grp << INAT_GRP_OFFS) | INAT_MODRM) +#define INAT_MAKE_IMM(imm) (imm << INAT_IMM_OFFS) + +/* Attribute search APIs */ +extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode); +extern int inat_get_last_prefix_id(insn_byte_t last_pfx); +extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, + int lpfx_id, + insn_attr_t esc_attr); +extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm, + int lpfx_id, + insn_attr_t esc_attr); +extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, + insn_byte_t vex_m, + insn_byte_t vex_pp); + +/* Attribute checking functions */ +static inline int inat_is_legacy_prefix(insn_attr_t attr) +{ + attr &= INAT_PFX_MASK; + return attr && attr <= INAT_LGCPFX_MAX; +} + +static inline int inat_is_address_size_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_ADDRSZ; +} + +static inline int inat_is_operand_size_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_OPNDSZ; +} + +static inline int inat_is_rex_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_REX; +} + +static inline int inat_last_prefix_id(insn_attr_t attr) +{ + if ((attr & INAT_PFX_MASK) > INAT_LSTPFX_MAX) + return 0; + else + return attr & INAT_PFX_MASK; +} + +static inline int inat_is_vex_prefix(insn_attr_t attr) +{ + attr &= INAT_PFX_MASK; + return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3; +} + +static inline int inat_is_vex3_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_VEX3; +} + +static inline int inat_is_escape(insn_attr_t attr) +{ + return attr & INAT_ESC_MASK; +} + +static inline int inat_escape_id(insn_attr_t attr) +{ + return (attr & INAT_ESC_MASK) >> INAT_ESC_OFFS; +} + +static inline int inat_is_group(insn_attr_t attr) +{ + return attr & INAT_GRP_MASK; +} + +static inline int inat_group_id(insn_attr_t attr) +{ + return (attr & INAT_GRP_MASK) >> INAT_GRP_OFFS; +} + +static inline int inat_group_common_attribute(insn_attr_t attr) +{ + return attr & ~INAT_GRP_MASK; +} + +static inline int inat_has_immediate(insn_attr_t attr) +{ + return attr & INAT_IMM_MASK; +} + +static inline int inat_immediate_size(insn_attr_t attr) +{ + return (attr & INAT_IMM_MASK) >> INAT_IMM_OFFS; +} + +static inline int inat_has_modrm(insn_attr_t attr) +{ + return attr & INAT_MODRM; +} + +static inline int inat_is_force64(insn_attr_t attr) +{ + return attr & INAT_FORCE64; +} + +static inline int inat_has_second_immediate(insn_attr_t attr) +{ + return attr & INAT_SCNDIMM; +} + +static inline int inat_has_moffset(insn_attr_t attr) +{ + return attr & INAT_MOFFSET; +} + +static inline int inat_has_variant(insn_attr_t attr) +{ + return attr & INAT_VARIANT; +} + +static inline int inat_accept_vex(insn_attr_t attr) +{ + return attr & INAT_VEXOK; +} + +static inline int inat_must_vex(insn_attr_t attr) +{ + return attr & INAT_VEXONLY; +} +#endif diff --git a/tools/perf/util/intel-pt-decoder/insn.c b/tools/perf/util/intel-pt-decoder/insn.c new file mode 100644 index 000000000000..8f72b334aea0 --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/insn.c @@ -0,0 +1,594 @@ +/* + * x86 instruction analysis + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2002, 2004, 2009 + */ + +#ifdef __KERNEL__ +#include <linux/string.h> +#else +#include <string.h> +#endif +#include <asm/inat.h> +#include <asm/insn.h> + +/* Verify next sizeof(t) bytes can be on the same instruction */ +#define validate_next(t, insn, n) \ + ((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr) + +#define __get_next(t, insn) \ + ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; }) + +#define __peek_nbyte_next(t, insn, n) \ + ({ t r = *(t*)((insn)->next_byte + n); r; }) + +#define get_next(t, insn) \ + ({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); }) + +#define peek_nbyte_next(t, insn, n) \ + ({ if (unlikely(!validate_next(t, insn, n))) goto err_out; __peek_nbyte_next(t, insn, n); }) + +#define peek_next(t, insn) peek_nbyte_next(t, insn, 0) + +/** + * insn_init() - initialize struct insn + * @insn: &struct insn to be initialized + * @kaddr: address (in kernel memory) of instruction (or copy thereof) + * @x86_64: !0 for 64-bit kernel or 64-bit app + */ +void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64) +{ + /* + * Instructions longer than MAX_INSN_SIZE (15 bytes) are invalid + * even if the input buffer is long enough to hold them. + */ + if (buf_len > MAX_INSN_SIZE) + buf_len = MAX_INSN_SIZE; + + memset(insn, 0, sizeof(*insn)); + insn->kaddr = kaddr; + insn->end_kaddr = kaddr + buf_len; + insn->next_byte = kaddr; + insn->x86_64 = x86_64 ? 1 : 0; + insn->opnd_bytes = 4; + if (x86_64) + insn->addr_bytes = 8; + else + insn->addr_bytes = 4; +} + +/** + * insn_get_prefixes - scan x86 instruction prefix bytes + * @insn: &struct insn containing instruction + * + * Populates the @insn->prefixes bitmap, and updates @insn->next_byte + * to point to the (first) opcode. No effect if @insn->prefixes.got + * is already set. + */ +void insn_get_prefixes(struct insn *insn) +{ + struct insn_field *prefixes = &insn->prefixes; + insn_attr_t attr; + insn_byte_t b, lb; + int i, nb; + + if (prefixes->got) + return; + + nb = 0; + lb = 0; + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + while (inat_is_legacy_prefix(attr)) { + /* Skip if same prefix */ + for (i = 0; i < nb; i++) + if (prefixes->bytes[i] == b) + goto found; + if (nb == 4) + /* Invalid instruction */ + break; + prefixes->bytes[nb++] = b; + if (inat_is_address_size_prefix(attr)) { + /* address size switches 2/4 or 4/8 */ + if (insn->x86_64) + insn->addr_bytes ^= 12; + else + insn->addr_bytes ^= 6; + } else if (inat_is_operand_size_prefix(attr)) { + /* oprand size switches 2/4 */ + insn->opnd_bytes ^= 6; + } +found: + prefixes->nbytes++; + insn->next_byte++; + lb = b; + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + } + /* Set the last prefix */ + if (lb && lb != insn->prefixes.bytes[3]) { + if (unlikely(insn->prefixes.bytes[3])) { + /* Swap the last prefix */ + b = insn->prefixes.bytes[3]; + for (i = 0; i < nb; i++) + if (prefixes->bytes[i] == lb) + prefixes->bytes[i] = b; + } + insn->prefixes.bytes[3] = lb; + } + + /* Decode REX prefix */ + if (insn->x86_64) { + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + if (inat_is_rex_prefix(attr)) { + insn->rex_prefix.value = b; + insn->rex_prefix.nbytes = 1; + insn->next_byte++; + if (X86_REX_W(b)) + /* REX.W overrides opnd_size */ + insn->opnd_bytes = 8; + } + } + insn->rex_prefix.got = 1; + + /* Decode VEX prefix */ + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + if (inat_is_vex_prefix(attr)) { + insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1); + if (!insn->x86_64) { + /* + * In 32-bits mode, if the [7:6] bits (mod bits of + * ModRM) on the second byte are not 11b, it is + * LDS or LES. + */ + if (X86_MODRM_MOD(b2) != 3) + goto vex_end; + } + insn->vex_prefix.bytes[0] = b; + insn->vex_prefix.bytes[1] = b2; + if (inat_is_vex3_prefix(attr)) { + b2 = peek_nbyte_next(insn_byte_t, insn, 2); + insn->vex_prefix.bytes[2] = b2; + insn->vex_prefix.nbytes = 3; + insn->next_byte += 3; + if (insn->x86_64 && X86_VEX_W(b2)) + /* VEX.W overrides opnd_size */ + insn->opnd_bytes = 8; + } else { + /* + * For VEX2, fake VEX3-like byte#2. + * Makes it easier to decode vex.W, vex.vvvv, + * vex.L and vex.pp. Masking with 0x7f sets vex.W == 0. + */ + insn->vex_prefix.bytes[2] = b2 & 0x7f; + insn->vex_prefix.nbytes = 2; + insn->next_byte += 2; + } + } +vex_end: + insn->vex_prefix.got = 1; + + prefixes->got = 1; + +err_out: + return; +} + +/** + * insn_get_opcode - collect opcode(s) + * @insn: &struct insn containing instruction + * + * Populates @insn->opcode, updates @insn->next_byte to point past the + * opcode byte(s), and set @insn->attr (except for groups). + * If necessary, first collects any preceding (prefix) bytes. + * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got + * is already 1. + */ +void insn_get_opcode(struct insn *insn) +{ + struct insn_field *opcode = &insn->opcode; + insn_byte_t op; + int pfx_id; + if (opcode->got) + return; + if (!insn->prefixes.got) + insn_get_prefixes(insn); + + /* Get first opcode */ + op = get_next(insn_byte_t, insn); + opcode->bytes[0] = op; + opcode->nbytes = 1; + + /* Check if there is VEX prefix or not */ + if (insn_is_avx(insn)) { + insn_byte_t m, p; + m = insn_vex_m_bits(insn); + p = insn_vex_p_bits(insn); + insn->attr = inat_get_avx_attribute(op, m, p); + if (!inat_accept_vex(insn->attr) && !inat_is_group(insn->attr)) + insn->attr = 0; /* This instruction is bad */ + goto end; /* VEX has only 1 byte for opcode */ + } + + insn->attr = inat_get_opcode_attribute(op); + while (inat_is_escape(insn->attr)) { + /* Get escaped opcode */ + op = get_next(insn_byte_t, insn); + opcode->bytes[opcode->nbytes++] = op; + pfx_id = insn_last_prefix_id(insn); + insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr); + } + if (inat_must_vex(insn->attr)) + insn->attr = 0; /* This instruction is bad */ +end: + opcode->got = 1; + +err_out: + return; +} + +/** + * insn_get_modrm - collect ModRM byte, if any + * @insn: &struct insn containing instruction + * + * Populates @insn->modrm and updates @insn->next_byte to point past the + * ModRM byte, if any. If necessary, first collects the preceding bytes + * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1. + */ +void insn_get_modrm(struct insn *insn) +{ + struct insn_field *modrm = &insn->modrm; + insn_byte_t pfx_id, mod; + if (modrm->got) + return; + if (!insn->opcode.got) + insn_get_opcode(insn); + + if (inat_has_modrm(insn->attr)) { + mod = get_next(insn_byte_t, insn); + modrm->value = mod; + modrm->nbytes = 1; + if (inat_is_group(insn->attr)) { + pfx_id = insn_last_prefix_id(insn); + insn->attr = inat_get_group_attribute(mod, pfx_id, + insn->attr); + if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) + insn->attr = 0; /* This is bad */ + } + } + + if (insn->x86_64 && inat_is_force64(insn->attr)) + insn->opnd_bytes = 8; + modrm->got = 1; + +err_out: + return; +} + + +/** + * insn_rip_relative() - Does instruction use RIP-relative addressing mode? + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * ModRM byte. No effect if @insn->x86_64 is 0. + */ +int insn_rip_relative(struct insn *insn) +{ + struct insn_field *modrm = &insn->modrm; + + if (!insn->x86_64) + return 0; + if (!modrm->got) + insn_get_modrm(insn); + /* + * For rip-relative instructions, the mod field (top 2 bits) + * is zero and the r/m field (bottom 3 bits) is 0x5. + */ + return (modrm->nbytes && (modrm->value & 0xc7) == 0x5); +} + +/** + * insn_get_sib() - Get the SIB byte of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * ModRM byte. + */ +void insn_get_sib(struct insn *insn) +{ + insn_byte_t modrm; + + if (insn->sib.got) + return; + if (!insn->modrm.got) + insn_get_modrm(insn); + if (insn->modrm.nbytes) { + modrm = (insn_byte_t)insn->modrm.value; + if (insn->addr_bytes != 2 && + X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) { + insn->sib.value = get_next(insn_byte_t, insn); + insn->sib.nbytes = 1; + } + } + insn->sib.got = 1; + +err_out: + return; +} + + +/** + * insn_get_displacement() - Get the displacement of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * SIB byte. + * Displacement value is sign-expanded. + */ +void insn_get_displacement(struct insn *insn) +{ + insn_byte_t mod, rm, base; + + if (insn->displacement.got) + return; + if (!insn->sib.got) + insn_get_sib(insn); + if (insn->modrm.nbytes) { + /* + * Interpreting the modrm byte: + * mod = 00 - no displacement fields (exceptions below) + * mod = 01 - 1-byte displacement field + * mod = 10 - displacement field is 4 bytes, or 2 bytes if + * address size = 2 (0x67 prefix in 32-bit mode) + * mod = 11 - no memory operand + * + * If address size = 2... + * mod = 00, r/m = 110 - displacement field is 2 bytes + * + * If address size != 2... + * mod != 11, r/m = 100 - SIB byte exists + * mod = 00, SIB base = 101 - displacement field is 4 bytes + * mod = 00, r/m = 101 - rip-relative addressing, displacement + * field is 4 bytes + */ + mod = X86_MODRM_MOD(insn->modrm.value); + rm = X86_MODRM_RM(insn->modrm.value); + base = X86_SIB_BASE(insn->sib.value); + if (mod == 3) + goto out; + if (mod == 1) { + insn->displacement.value = get_next(char, insn); + insn->displacement.nbytes = 1; + } else if (insn->addr_bytes == 2) { + if ((mod == 0 && rm == 6) || mod == 2) { + insn->displacement.value = + get_next(short, insn); + insn->displacement.nbytes = 2; + } + } else { + if ((mod == 0 && rm == 5) || mod == 2 || + (mod == 0 && base == 5)) { + insn->displacement.value = get_next(int, insn); + insn->displacement.nbytes = 4; + } + } + } +out: + insn->displacement.got = 1; + +err_out: + return; +} + +/* Decode moffset16/32/64. Return 0 if failed */ +static int __get_moffset(struct insn *insn) +{ + switch (insn->addr_bytes) { + case 2: + insn->moffset1.value = get_next(short, insn); + insn->moffset1.nbytes = 2; + break; + case 4: + insn->moffset1.value = get_next(int, insn); + insn->moffset1.nbytes = 4; + break; + case 8: + insn->moffset1.value = get_next(int, insn); + insn->moffset1.nbytes = 4; + insn->moffset2.value = get_next(int, insn); + insn->moffset2.nbytes = 4; + break; + default: /* opnd_bytes must be modified manually */ + goto err_out; + } + insn->moffset1.got = insn->moffset2.got = 1; + + return 1; + +err_out: + return 0; +} + +/* Decode imm v32(Iz). Return 0 if failed */ +static int __get_immv32(struct insn *insn) +{ + switch (insn->opnd_bytes) { + case 2: + insn->immediate.value = get_next(short, insn); + insn->immediate.nbytes = 2; + break; + case 4: + case 8: + insn->immediate.value = get_next(int, insn); + insn->immediate.nbytes = 4; + break; + default: /* opnd_bytes must be modified manually */ + goto err_out; + } + + return 1; + +err_out: + return 0; +} + +/* Decode imm v64(Iv/Ov), Return 0 if failed */ +static int __get_immv(struct insn *insn) +{ + switch (insn->opnd_bytes) { + case 2: + insn->immediate1.value = get_next(short, insn); + insn->immediate1.nbytes = 2; + break; + case 4: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + break; + case 8: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + insn->immediate2.value = get_next(int, insn); + insn->immediate2.nbytes = 4; + break; + default: /* opnd_bytes must be modified manually */ + goto err_out; + } + insn->immediate1.got = insn->immediate2.got = 1; + + return 1; +err_out: + return 0; +} + +/* Decode ptr16:16/32(Ap) */ +static int __get_immptr(struct insn *insn) +{ + switch (insn->opnd_bytes) { + case 2: + insn->immediate1.value = get_next(short, insn); + insn->immediate1.nbytes = 2; + break; + case 4: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + break; + case 8: + /* ptr16:64 is not exist (no segment) */ + return 0; + default: /* opnd_bytes must be modified manually */ + goto err_out; + } + insn->immediate2.value = get_next(unsigned short, insn); + insn->immediate2.nbytes = 2; + insn->immediate1.got = insn->immediate2.got = 1; + + return 1; +err_out: + return 0; +} + +/** + * insn_get_immediate() - Get the immediates of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * displacement bytes. + * Basically, most of immediates are sign-expanded. Unsigned-value can be + * get by bit masking with ((1 << (nbytes * 8)) - 1) + */ +void insn_get_immediate(struct insn *insn) +{ + if (insn->immediate.got) + return; + if (!insn->displacement.got) + insn_get_displacement(insn); + + if (inat_has_moffset(insn->attr)) { + if (!__get_moffset(insn)) + goto err_out; + goto done; + } + + if (!inat_has_immediate(insn->attr)) + /* no immediates */ + goto done; + + switch (inat_immediate_size(insn->attr)) { + case INAT_IMM_BYTE: + insn->immediate.value = get_next(char, insn); + insn->immediate.nbytes = 1; + break; + case INAT_IMM_WORD: + insn->immediate.value = get_next(short, insn); + insn->immediate.nbytes = 2; + break; + case INAT_IMM_DWORD: + insn->immediate.value = get_next(int, insn); + insn->immediate.nbytes = 4; + break; + case INAT_IMM_QWORD: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + insn->immediate2.value = get_next(int, insn); + insn->immediate2.nbytes = 4; + break; + case INAT_IMM_PTR: + if (!__get_immptr(insn)) + goto err_out; + break; + case INAT_IMM_VWORD32: + if (!__get_immv32(insn)) + goto err_out; + break; + case INAT_IMM_VWORD: + if (!__get_immv(insn)) + goto err_out; + break; + default: + /* Here, insn must have an immediate, but failed */ + goto err_out; + } + if (inat_has_second_immediate(insn->attr)) { + insn->immediate2.value = get_next(char, insn); + insn->immediate2.nbytes = 1; + } +done: + insn->immediate.got = 1; + +err_out: + return; +} + +/** + * insn_get_length() - Get the length of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * immediates bytes. + */ +void insn_get_length(struct insn *insn) +{ + if (insn->length) + return; + if (!insn->immediate.got) + insn_get_immediate(insn); + insn->length = (unsigned char)((unsigned long)insn->next_byte + - (unsigned long)insn->kaddr); +} diff --git a/tools/perf/util/intel-pt-decoder/insn.h b/tools/perf/util/intel-pt-decoder/insn.h new file mode 100644 index 000000000000..e7814b74caf8 --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/insn.h @@ -0,0 +1,201 @@ +#ifndef _ASM_X86_INSN_H +#define _ASM_X86_INSN_H +/* + * x86 instruction analysis + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2009 + */ + +/* insn_attr_t is defined in inat.h */ +#include <asm/inat.h> + +struct insn_field { + union { + insn_value_t value; + insn_byte_t bytes[4]; + }; + /* !0 if we've run insn_get_xxx() for this field */ + unsigned char got; + unsigned char nbytes; +}; + +struct insn { + struct insn_field prefixes; /* + * Prefixes + * prefixes.bytes[3]: last prefix + */ + struct insn_field rex_prefix; /* REX prefix */ + struct insn_field vex_prefix; /* VEX prefix */ + struct insn_field opcode; /* + * opcode.bytes[0]: opcode1 + * opcode.bytes[1]: opcode2 + * opcode.bytes[2]: opcode3 + */ + struct insn_field modrm; + struct insn_field sib; + struct insn_field displacement; + union { + struct insn_field immediate; + struct insn_field moffset1; /* for 64bit MOV */ + struct insn_field immediate1; /* for 64bit imm or off16/32 */ + }; + union { + struct insn_field moffset2; /* for 64bit MOV */ + struct insn_field immediate2; /* for 64bit imm or seg16 */ + }; + + insn_attr_t attr; + unsigned char opnd_bytes; + unsigned char addr_bytes; + unsigned char length; + unsigned char x86_64; + + const insn_byte_t *kaddr; /* kernel address of insn to analyze */ + const insn_byte_t *end_kaddr; /* kernel address of last insn in buffer */ + const insn_byte_t *next_byte; +}; + +#define MAX_INSN_SIZE 15 + +#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) +#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) +#define X86_MODRM_RM(modrm) ((modrm) & 0x07) + +#define X86_SIB_SCALE(sib) (((sib) & 0xc0) >> 6) +#define X86_SIB_INDEX(sib) (((sib) & 0x38) >> 3) +#define X86_SIB_BASE(sib) ((sib) & 0x07) + +#define X86_REX_W(rex) ((rex) & 8) +#define X86_REX_R(rex) ((rex) & 4) +#define X86_REX_X(rex) ((rex) & 2) +#define X86_REX_B(rex) ((rex) & 1) + +/* VEX bit flags */ +#define X86_VEX_W(vex) ((vex) & 0x80) /* VEX3 Byte2 */ +#define X86_VEX_R(vex) ((vex) & 0x80) /* VEX2/3 Byte1 */ +#define X86_VEX_X(vex) ((vex) & 0x40) /* VEX3 Byte1 */ +#define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */ +#define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */ +/* VEX bit fields */ +#define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */ +#define X86_VEX2_M 1 /* VEX2.M always 1 */ +#define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */ +#define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */ +#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ + +extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64); +extern void insn_get_prefixes(struct insn *insn); +extern void insn_get_opcode(struct insn *insn); +extern void insn_get_modrm(struct insn *insn); +extern void insn_get_sib(struct insn *insn); +extern void insn_get_displacement(struct insn *insn); +extern void insn_get_immediate(struct insn *insn); +extern void insn_get_length(struct insn *insn); + +/* Attribute will be determined after getting ModRM (for opcode groups) */ +static inline void insn_get_attribute(struct insn *insn) +{ + insn_get_modrm(insn); +} + +/* Instruction uses RIP-relative addressing */ +extern int insn_rip_relative(struct insn *insn); + +/* Init insn for kernel text */ +static inline void kernel_insn_init(struct insn *insn, + const void *kaddr, int buf_len) +{ +#ifdef CONFIG_X86_64 + insn_init(insn, kaddr, buf_len, 1); +#else /* CONFIG_X86_32 */ + insn_init(insn, kaddr, buf_len, 0); +#endif +} + +static inline int insn_is_avx(struct insn *insn) +{ + if (!insn->prefixes.got) + insn_get_prefixes(insn); + return (insn->vex_prefix.value != 0); +} + +/* Ensure this instruction is decoded completely */ +static inline int insn_complete(struct insn *insn) +{ + return insn->opcode.got && insn->modrm.got && insn->sib.got && + insn->displacement.got && insn->immediate.got; +} + +static inline insn_byte_t insn_vex_m_bits(struct insn *insn) +{ + if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ + return X86_VEX2_M; + else + return X86_VEX3_M(insn->vex_prefix.bytes[1]); +} + +static inline insn_byte_t insn_vex_p_bits(struct insn *insn) +{ + if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ + return X86_VEX_P(insn->vex_prefix.bytes[1]); + else + return X86_VEX_P(insn->vex_prefix.bytes[2]); +} + +/* Get the last prefix id from last prefix or VEX prefix */ +static inline int insn_last_prefix_id(struct insn *insn) +{ + if (insn_is_avx(insn)) + return insn_vex_p_bits(insn); /* VEX_p is a SIMD prefix id */ + + if (insn->prefixes.bytes[3]) + return inat_get_last_prefix_id(insn->prefixes.bytes[3]); + + return 0; +} + +/* Offset of each field from kaddr */ +static inline int insn_offset_rex_prefix(struct insn *insn) +{ + return insn->prefixes.nbytes; +} +static inline int insn_offset_vex_prefix(struct insn *insn) +{ + return insn_offset_rex_prefix(insn) + insn->rex_prefix.nbytes; +} +static inline int insn_offset_opcode(struct insn *insn) +{ + return insn_offset_vex_prefix(insn) + insn->vex_prefix.nbytes; +} +static inline int insn_offset_modrm(struct insn *insn) +{ + return insn_offset_opcode(insn) + insn->opcode.nbytes; +} +static inline int insn_offset_sib(struct insn *insn) +{ + return insn_offset_modrm(insn) + insn->modrm.nbytes; +} +static inline int insn_offset_displacement(struct insn *insn) +{ + return insn_offset_sib(insn) + insn->sib.nbytes; +} +static inline int insn_offset_immediate(struct insn *insn) +{ + return insn_offset_displacement(insn) + insn->displacement.nbytes; +} + +#endif /* _ASM_X86_INSN_H */ diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c new file mode 100644 index 000000000000..f8ac462fec1a --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -0,0 +1,1816 @@ +/* + * intel_pt_decoder.c: Intel Processor Trace support + * Copyright (c) 2013-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include <stdlib.h> +#include <stdbool.h> +#include <string.h> +#include <errno.h> +#include <stdint.h> +#include <inttypes.h> + +#include "../cache.h" +#include "../util.h" + +#include "intel-pt-insn-decoder.h" +#include "intel-pt-pkt-decoder.h" +#include "intel-pt-decoder.h" +#include "intel-pt-log.h" + +#define INTEL_PT_BLK_SIZE 1024 + +#define BIT63 (((uint64_t)1 << 63)) + +#define INTEL_PT_RETURN 1 + +/* Maximum number of loops with no packets consumed i.e. stuck in a loop */ +#define INTEL_PT_MAX_LOOPS 10000 + +struct intel_pt_blk { + struct intel_pt_blk *prev; + uint64_t ip[INTEL_PT_BLK_SIZE]; +}; + +struct intel_pt_stack { + struct intel_pt_blk *blk; + struct intel_pt_blk *spare; + int pos; +}; + +enum intel_pt_pkt_state { + INTEL_PT_STATE_NO_PSB, + INTEL_PT_STATE_NO_IP, + INTEL_PT_STATE_ERR_RESYNC, + INTEL_PT_STATE_IN_SYNC, + INTEL_PT_STATE_TNT, + INTEL_PT_STATE_TIP, + INTEL_PT_STATE_TIP_PGD, + INTEL_PT_STATE_FUP, + INTEL_PT_STATE_FUP_NO_TIP, +}; + +#ifdef INTEL_PT_STRICT +#define INTEL_PT_STATE_ERR1 INTEL_PT_STATE_NO_PSB +#define INTEL_PT_STATE_ERR2 INTEL_PT_STATE_NO_PSB +#define INTEL_PT_STATE_ERR3 INTEL_PT_STATE_NO_PSB +#define INTEL_PT_STATE_ERR4 INTEL_PT_STATE_NO_PSB +#else +#define INTEL_PT_STATE_ERR1 (decoder->pkt_state) +#define INTEL_PT_STATE_ERR2 INTEL_PT_STATE_NO_IP +#define INTEL_PT_STATE_ERR3 INTEL_PT_STATE_ERR_RESYNC +#define INTEL_PT_STATE_ERR4 INTEL_PT_STATE_IN_SYNC +#endif + +struct intel_pt_decoder { + int (*get_trace)(struct intel_pt_buffer *buffer, void *data); + int (*walk_insn)(struct intel_pt_insn *intel_pt_insn, + uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip, + uint64_t max_insn_cnt, void *data); + void *data; + struct intel_pt_state state; + const unsigned char *buf; + size_t len; + bool return_compression; + bool pge; + uint64_t pos; + uint64_t last_ip; + uint64_t ip; + uint64_t cr3; + uint64_t timestamp; + uint64_t tsc_timestamp; + uint64_t ref_timestamp; + uint64_t ret_addr; + struct intel_pt_stack stack; + enum intel_pt_pkt_state pkt_state; + struct intel_pt_pkt packet; + struct intel_pt_pkt tnt; + int pkt_step; + int pkt_len; + unsigned int cbr; + unsigned int max_non_turbo_ratio; + int exec_mode; + unsigned int insn_bytes; + uint64_t sign_bit; + uint64_t sign_bits; + uint64_t period; + enum intel_pt_period_type period_type; + uint64_t period_insn_cnt; + uint64_t period_mask; + uint64_t period_ticks; + uint64_t last_masked_timestamp; + bool continuous_period; + bool overflow; + bool set_fup_tx_flags; + unsigned int fup_tx_flags; + unsigned int tx_flags; + uint64_t timestamp_insn_cnt; + uint64_t stuck_ip; + int no_progress; + int stuck_ip_prd; + int stuck_ip_cnt; + const unsigned char *next_buf; + size_t next_len; + unsigned char temp_buf[INTEL_PT_PKT_MAX_SZ]; +}; + +static uint64_t intel_pt_lower_power_of_2(uint64_t x) +{ + int i; + + for (i = 0; x != 1; i++) + x >>= 1; + + return x << i; +} + +static void intel_pt_setup_period(struct intel_pt_decoder *decoder) +{ + if (decoder->period_type == INTEL_PT_PERIOD_TICKS) { + uint64_t period; + + period = intel_pt_lower_power_of_2(decoder->period); + decoder->period_mask = ~(period - 1); + decoder->period_ticks = period; + } +} + +struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params) +{ + struct intel_pt_decoder *decoder; + + if (!params->get_trace || !params->walk_insn) + return NULL; + + decoder = zalloc(sizeof(struct intel_pt_decoder)); + if (!decoder) + return NULL; + + decoder->get_trace = params->get_trace; + decoder->walk_insn = params->walk_insn; + decoder->data = params->data; + decoder->return_compression = params->return_compression; + + decoder->sign_bit = (uint64_t)1 << 47; + decoder->sign_bits = ~(((uint64_t)1 << 48) - 1); + + decoder->period = params->period; + decoder->period_type = params->period_type; + + decoder->max_non_turbo_ratio = params->max_non_turbo_ratio; + + intel_pt_setup_period(decoder); + + return decoder; +} + +static void intel_pt_pop_blk(struct intel_pt_stack *stack) +{ + struct intel_pt_blk *blk = stack->blk; + + stack->blk = blk->prev; + if (!stack->spare) + stack->spare = blk; + else + free(blk); +} + +static uint64_t intel_pt_pop(struct intel_pt_stack *stack) +{ + if (!stack->pos) { + if (!stack->blk) + return 0; + intel_pt_pop_blk(stack); + if (!stack->blk) + return 0; + stack->pos = INTEL_PT_BLK_SIZE; + } + return stack->blk->ip[--stack->pos]; +} + +static int intel_pt_alloc_blk(struct intel_pt_stack *stack) +{ + struct intel_pt_blk *blk; + + if (stack->spare) { + blk = stack->spare; + stack->spare = NULL; + } else { + blk = malloc(sizeof(struct intel_pt_blk)); + if (!blk) + return -ENOMEM; + } + + blk->prev = stack->blk; + stack->blk = blk; + stack->pos = 0; + return 0; +} + +static int intel_pt_push(struct intel_pt_stack *stack, uint64_t ip) +{ + int err; + + if (!stack->blk || stack->pos == INTEL_PT_BLK_SIZE) { + err = intel_pt_alloc_blk(stack); + if (err) + return err; + } + + stack->blk->ip[stack->pos++] = ip; + return 0; +} + +static void intel_pt_clear_stack(struct intel_pt_stack *stack) +{ + while (stack->blk) + intel_pt_pop_blk(stack); + stack->pos = 0; +} + +static void intel_pt_free_stack(struct intel_pt_stack *stack) +{ + intel_pt_clear_stack(stack); + zfree(&stack->blk); + zfree(&stack->spare); +} + +void intel_pt_decoder_free(struct intel_pt_decoder *decoder) +{ + intel_pt_free_stack(&decoder->stack); + free(decoder); +} + +static int intel_pt_ext_err(int code) +{ + switch (code) { + case -ENOMEM: + return INTEL_PT_ERR_NOMEM; + case -ENOSYS: + return INTEL_PT_ERR_INTERN; + case -EBADMSG: + return INTEL_PT_ERR_BADPKT; + case -ENODATA: + return INTEL_PT_ERR_NODATA; + case -EILSEQ: + return INTEL_PT_ERR_NOINSN; + case -ENOENT: + return INTEL_PT_ERR_MISMAT; + case -EOVERFLOW: + return INTEL_PT_ERR_OVR; + case -ENOSPC: + return INTEL_PT_ERR_LOST; + case -ELOOP: + return INTEL_PT_ERR_NELOOP; + default: + return INTEL_PT_ERR_UNK; + } +} + +static const char *intel_pt_err_msgs[] = { + [INTEL_PT_ERR_NOMEM] = "Memory allocation failed", + [INTEL_PT_ERR_INTERN] = "Internal error", + [INTEL_PT_ERR_BADPKT] = "Bad packet", + [INTEL_PT_ERR_NODATA] = "No more data", + [INTEL_PT_ERR_NOINSN] = "Failed to get instruction", + [INTEL_PT_ERR_MISMAT] = "Trace doesn't match instruction", + [INTEL_PT_ERR_OVR] = "Overflow packet", + [INTEL_PT_ERR_LOST] = "Lost trace data", + [INTEL_PT_ERR_UNK] = "Unknown error!", + [INTEL_PT_ERR_NELOOP] = "Never-ending loop", +}; + +int intel_pt__strerror(int code, char *buf, size_t buflen) +{ + if (code < 1 || code > INTEL_PT_ERR_MAX) + code = INTEL_PT_ERR_UNK; + strlcpy(buf, intel_pt_err_msgs[code], buflen); + return 0; +} + +static uint64_t intel_pt_calc_ip(struct intel_pt_decoder *decoder, + const struct intel_pt_pkt *packet, + uint64_t last_ip) +{ + uint64_t ip; + + switch (packet->count) { + case 2: + ip = (last_ip & (uint64_t)0xffffffffffff0000ULL) | + packet->payload; + break; + case 4: + ip = (last_ip & (uint64_t)0xffffffff00000000ULL) | + packet->payload; + break; + case 6: + ip = packet->payload; + break; + default: + return 0; + } + + if (ip & decoder->sign_bit) + return ip | decoder->sign_bits; + + return ip; +} + +static inline void intel_pt_set_last_ip(struct intel_pt_decoder *decoder) +{ + decoder->last_ip = intel_pt_calc_ip(decoder, &decoder->packet, + decoder->last_ip); +} + +static inline void intel_pt_set_ip(struct intel_pt_decoder *decoder) +{ + intel_pt_set_last_ip(decoder); + decoder->ip = decoder->last_ip; +} + +static void intel_pt_decoder_log_packet(struct intel_pt_decoder *decoder) +{ + intel_pt_log_packet(&decoder->packet, decoder->pkt_len, decoder->pos, + decoder->buf); +} + +static int intel_pt_bug(struct intel_pt_decoder *decoder) +{ + intel_pt_log("ERROR: Internal error\n"); + decoder->pkt_state = INTEL_PT_STATE_NO_PSB; + return -ENOSYS; +} + +static inline void intel_pt_clear_tx_flags(struct intel_pt_decoder *decoder) +{ + decoder->tx_flags = 0; +} + +static inline void intel_pt_update_in_tx(struct intel_pt_decoder *decoder) +{ + decoder->tx_flags = decoder->packet.payload & INTEL_PT_IN_TX; +} + +static int intel_pt_bad_packet(struct intel_pt_decoder *decoder) +{ + intel_pt_clear_tx_flags(decoder); + decoder->pkt_len = 1; + decoder->pkt_step = 1; + intel_pt_decoder_log_packet(decoder); + if (decoder->pkt_state != INTEL_PT_STATE_NO_PSB) { + intel_pt_log("ERROR: Bad packet\n"); + decoder->pkt_state = INTEL_PT_STATE_ERR1; + } + return -EBADMSG; +} + +static int intel_pt_get_data(struct intel_pt_decoder *decoder) +{ + struct intel_pt_buffer buffer = { .buf = 0, }; + int ret; + + decoder->pkt_step = 0; + + intel_pt_log("Getting more data\n"); + ret = decoder->get_trace(&buffer, decoder->data); + if (ret) + return ret; + decoder->buf = buffer.buf; + decoder->len = buffer.len; + if (!decoder->len) { + intel_pt_log("No more data\n"); + return -ENODATA; + } + if (!buffer.consecutive) { + decoder->ip = 0; + decoder->pkt_state = INTEL_PT_STATE_NO_PSB; + decoder->ref_timestamp = buffer.ref_timestamp; + decoder->timestamp = 0; + decoder->state.trace_nr = buffer.trace_nr; + intel_pt_log("Reference timestamp 0x%" PRIx64 "\n", + decoder->ref_timestamp); + return -ENOLINK; + } + + return 0; +} + +static int intel_pt_get_next_data(struct intel_pt_decoder *decoder) +{ + if (!decoder->next_buf) + return intel_pt_get_data(decoder); + + decoder->buf = decoder->next_buf; + decoder->len = decoder->next_len; + decoder->next_buf = 0; + decoder->next_len = 0; + return 0; +} + +static int intel_pt_get_split_packet(struct intel_pt_decoder *decoder) +{ + unsigned char *buf = decoder->temp_buf; + size_t old_len, len, n; + int ret; + + old_len = decoder->len; + len = decoder->len; + memcpy(buf, decoder->buf, len); + + ret = intel_pt_get_data(decoder); + if (ret) { + decoder->pos += old_len; + return ret < 0 ? ret : -EINVAL; + } + + n = INTEL_PT_PKT_MAX_SZ - len; + if (n > decoder->len) + n = decoder->len; + memcpy(buf + len, decoder->buf, n); + len += n; + + ret = intel_pt_get_packet(buf, len, &decoder->packet); + if (ret < (int)old_len) { + decoder->next_buf = decoder->buf; + decoder->next_len = decoder->len; + decoder->buf = buf; + decoder->len = old_len; + return intel_pt_bad_packet(decoder); + } + + decoder->next_buf = decoder->buf + (ret - old_len); + decoder->next_len = decoder->len - (ret - old_len); + + decoder->buf = buf; + decoder->len = ret; + + return ret; +} + +static int intel_pt_get_next_packet(struct intel_pt_decoder *decoder) +{ + int ret; + + do { + decoder->pos += decoder->pkt_step; + decoder->buf += decoder->pkt_step; + decoder->len -= decoder->pkt_step; + + if (!decoder->len) { + ret = intel_pt_get_next_data(decoder); + if (ret) + return ret; + } + + ret = intel_pt_get_packet(decoder->buf, decoder->len, + &decoder->packet); + if (ret == INTEL_PT_NEED_MORE_BYTES && + decoder->len < INTEL_PT_PKT_MAX_SZ && !decoder->next_buf) { + ret = intel_pt_get_split_packet(decoder); + if (ret < 0) + return ret; + } + if (ret <= 0) + return intel_pt_bad_packet(decoder); + + decoder->pkt_len = ret; + decoder->pkt_step = ret; + intel_pt_decoder_log_packet(decoder); + } while (decoder->packet.type == INTEL_PT_PAD); + + return 0; +} + +static uint64_t intel_pt_next_period(struct intel_pt_decoder *decoder) +{ + uint64_t timestamp, masked_timestamp; + + timestamp = decoder->timestamp + decoder->timestamp_insn_cnt; + masked_timestamp = timestamp & decoder->period_mask; + if (decoder->continuous_period) { + if (masked_timestamp != decoder->last_masked_timestamp) + return 1; + } else { + timestamp += 1; + masked_timestamp = timestamp & decoder->period_mask; + if (masked_timestamp != decoder->last_masked_timestamp) { + decoder->last_masked_timestamp = masked_timestamp; + decoder->continuous_period = true; + } + } + return decoder->period_ticks - (timestamp - masked_timestamp); +} + +static uint64_t intel_pt_next_sample(struct intel_pt_decoder *decoder) +{ + switch (decoder->period_type) { + case INTEL_PT_PERIOD_INSTRUCTIONS: + return decoder->period - decoder->period_insn_cnt; + case INTEL_PT_PERIOD_TICKS: + return intel_pt_next_period(decoder); + case INTEL_PT_PERIOD_NONE: + default: + return 0; + } +} + +static void intel_pt_sample_insn(struct intel_pt_decoder *decoder) +{ + uint64_t timestamp, masked_timestamp; + + switch (decoder->period_type) { + case INTEL_PT_PERIOD_INSTRUCTIONS: + decoder->period_insn_cnt = 0; + break; + case INTEL_PT_PERIOD_TICKS: + timestamp = decoder->timestamp + decoder->timestamp_insn_cnt; + masked_timestamp = timestamp & decoder->period_mask; + decoder->last_masked_timestamp = masked_timestamp; + break; + case INTEL_PT_PERIOD_NONE: + default: + break; + } + + decoder->state.type |= INTEL_PT_INSTRUCTION; +} + +static int intel_pt_walk_insn(struct intel_pt_decoder *decoder, + struct intel_pt_insn *intel_pt_insn, uint64_t ip) +{ + uint64_t max_insn_cnt, insn_cnt = 0; + int err; + + max_insn_cnt = intel_pt_next_sample(decoder); + + err = decoder->walk_insn(intel_pt_insn, &insn_cnt, &decoder->ip, ip, + max_insn_cnt, decoder->data); + + decoder->timestamp_insn_cnt += insn_cnt; + decoder->period_insn_cnt += insn_cnt; + + if (err) { + decoder->no_progress = 0; + decoder->pkt_state = INTEL_PT_STATE_ERR2; + intel_pt_log_at("ERROR: Failed to get instruction", + decoder->ip); + if (err == -ENOENT) + return -ENOLINK; + return -EILSEQ; + } + + if (ip && decoder->ip == ip) { + err = -EAGAIN; + goto out; + } + + if (max_insn_cnt && insn_cnt >= max_insn_cnt) + intel_pt_sample_insn(decoder); + + if (intel_pt_insn->branch == INTEL_PT_BR_NO_BRANCH) { + decoder->state.type = INTEL_PT_INSTRUCTION; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + decoder->ip += intel_pt_insn->length; + err = INTEL_PT_RETURN; + goto out; + } + + if (intel_pt_insn->op == INTEL_PT_OP_CALL) { + /* Zero-length calls are excluded */ + if (intel_pt_insn->branch != INTEL_PT_BR_UNCONDITIONAL || + intel_pt_insn->rel) { + err = intel_pt_push(&decoder->stack, decoder->ip + + intel_pt_insn->length); + if (err) + goto out; + } + } else if (intel_pt_insn->op == INTEL_PT_OP_RET) { + decoder->ret_addr = intel_pt_pop(&decoder->stack); + } + + if (intel_pt_insn->branch == INTEL_PT_BR_UNCONDITIONAL) { + int cnt = decoder->no_progress++; + + decoder->state.from_ip = decoder->ip; + decoder->ip += intel_pt_insn->length + + intel_pt_insn->rel; + decoder->state.to_ip = decoder->ip; + err = INTEL_PT_RETURN; + + /* + * Check for being stuck in a loop. This can happen if a + * decoder error results in the decoder erroneously setting the + * ip to an address that is itself in an infinite loop that + * consumes no packets. When that happens, there must be an + * unconditional branch. + */ + if (cnt) { + if (cnt == 1) { + decoder->stuck_ip = decoder->state.to_ip; + decoder->stuck_ip_prd = 1; + decoder->stuck_ip_cnt = 1; + } else if (cnt > INTEL_PT_MAX_LOOPS || + decoder->state.to_ip == decoder->stuck_ip) { + intel_pt_log_at("ERROR: Never-ending loop", + decoder->state.to_ip); + decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; + err = -ELOOP; + goto out; + } else if (!--decoder->stuck_ip_cnt) { + decoder->stuck_ip_prd += 1; + decoder->stuck_ip_cnt = decoder->stuck_ip_prd; + decoder->stuck_ip = decoder->state.to_ip; + } + } + goto out_no_progress; + } +out: + decoder->no_progress = 0; +out_no_progress: + decoder->state.insn_op = intel_pt_insn->op; + decoder->state.insn_len = intel_pt_insn->length; + + if (decoder->tx_flags & INTEL_PT_IN_TX) + decoder->state.flags |= INTEL_PT_IN_TX; + + return err; +} + +static int intel_pt_walk_fup(struct intel_pt_decoder *decoder) +{ + struct intel_pt_insn intel_pt_insn; + uint64_t ip; + int err; + + ip = decoder->last_ip; + + while (1) { + err = intel_pt_walk_insn(decoder, &intel_pt_insn, ip); + if (err == INTEL_PT_RETURN) + return 0; + if (err == -EAGAIN) { + if (decoder->set_fup_tx_flags) { + decoder->set_fup_tx_flags = false; + decoder->tx_flags = decoder->fup_tx_flags; + decoder->state.type = INTEL_PT_TRANSACTION; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + decoder->state.flags = decoder->fup_tx_flags; + return 0; + } + return err; + } + decoder->set_fup_tx_flags = false; + if (err) + return err; + + if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) { + intel_pt_log_at("ERROR: Unexpected indirect branch", + decoder->ip); + decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; + return -ENOENT; + } + + if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) { + intel_pt_log_at("ERROR: Unexpected conditional branch", + decoder->ip); + decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; + return -ENOENT; + } + + intel_pt_bug(decoder); + } +} + +static int intel_pt_walk_tip(struct intel_pt_decoder *decoder) +{ + struct intel_pt_insn intel_pt_insn; + int err; + + err = intel_pt_walk_insn(decoder, &intel_pt_insn, 0); + if (err == INTEL_PT_RETURN) + return 0; + if (err) + return err; + + if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) { + if (decoder->pkt_state == INTEL_PT_STATE_TIP_PGD) { + decoder->pge = false; + decoder->continuous_period = false; + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + if (decoder->packet.count != 0) + decoder->ip = decoder->last_ip; + } else { + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + decoder->state.from_ip = decoder->ip; + if (decoder->packet.count == 0) { + decoder->state.to_ip = 0; + } else { + decoder->state.to_ip = decoder->last_ip; + decoder->ip = decoder->last_ip; + } + } + return 0; + } + + if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) { + intel_pt_log_at("ERROR: Conditional branch when expecting indirect branch", + decoder->ip); + decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; + return -ENOENT; + } + + return intel_pt_bug(decoder); +} + +static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder) +{ + struct intel_pt_insn intel_pt_insn; + int err; + + while (1) { + err = intel_pt_walk_insn(decoder, &intel_pt_insn, 0); + if (err == INTEL_PT_RETURN) + return 0; + if (err) + return err; + + if (intel_pt_insn.op == INTEL_PT_OP_RET) { + if (!decoder->return_compression) { + intel_pt_log_at("ERROR: RET when expecting conditional branch", + decoder->ip); + decoder->pkt_state = INTEL_PT_STATE_ERR3; + return -ENOENT; + } + if (!decoder->ret_addr) { + intel_pt_log_at("ERROR: Bad RET compression (stack empty)", + decoder->ip); + decoder->pkt_state = INTEL_PT_STATE_ERR3; + return -ENOENT; + } + if (!(decoder->tnt.payload & BIT63)) { + intel_pt_log_at("ERROR: Bad RET compression (TNT=N)", + decoder->ip); + decoder->pkt_state = INTEL_PT_STATE_ERR3; + return -ENOENT; + } + decoder->tnt.count -= 1; + if (!decoder->tnt.count) + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + decoder->tnt.payload <<= 1; + decoder->state.from_ip = decoder->ip; + decoder->ip = decoder->ret_addr; + decoder->state.to_ip = decoder->ip; + return 0; + } + + if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) { + /* Handle deferred TIPs */ + err = intel_pt_get_next_packet(decoder); + if (err) + return err; + if (decoder->packet.type != INTEL_PT_TIP || + decoder->packet.count == 0) { + intel_pt_log_at("ERROR: Missing deferred TIP for indirect branch", + decoder->ip); + decoder->pkt_state = INTEL_PT_STATE_ERR3; + decoder->pkt_step = 0; + return -ENOENT; + } + intel_pt_set_last_ip(decoder); + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = decoder->last_ip; + decoder->ip = decoder->last_ip; + return 0; + } + + if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) { + decoder->tnt.count -= 1; + if (!decoder->tnt.count) + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + if (decoder->tnt.payload & BIT63) { + decoder->tnt.payload <<= 1; + decoder->state.from_ip = decoder->ip; + decoder->ip += intel_pt_insn.length + + intel_pt_insn.rel; + decoder->state.to_ip = decoder->ip; + return 0; + } + /* Instruction sample for a non-taken branch */ + if (decoder->state.type & INTEL_PT_INSTRUCTION) { + decoder->tnt.payload <<= 1; + decoder->state.type = INTEL_PT_INSTRUCTION; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + decoder->ip += intel_pt_insn.length; + return 0; + } + decoder->ip += intel_pt_insn.length; + if (!decoder->tnt.count) + return -EAGAIN; + decoder->tnt.payload <<= 1; + continue; + } + + return intel_pt_bug(decoder); + } +} + +static int intel_pt_mode_tsx(struct intel_pt_decoder *decoder, bool *no_tip) +{ + unsigned int fup_tx_flags; + int err; + + fup_tx_flags = decoder->packet.payload & + (INTEL_PT_IN_TX | INTEL_PT_ABORT_TX); + err = intel_pt_get_next_packet(decoder); + if (err) + return err; + if (decoder->packet.type == INTEL_PT_FUP) { + decoder->fup_tx_flags = fup_tx_flags; + decoder->set_fup_tx_flags = true; + if (!(decoder->fup_tx_flags & INTEL_PT_ABORT_TX)) + *no_tip = true; + } else { + intel_pt_log_at("ERROR: Missing FUP after MODE.TSX", + decoder->pos); + intel_pt_update_in_tx(decoder); + } + return 0; +} + +static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder) +{ + uint64_t timestamp; + + if (decoder->ref_timestamp) { + timestamp = decoder->packet.payload | + (decoder->ref_timestamp & (0xffULL << 56)); + if (timestamp < decoder->ref_timestamp) { + if (decoder->ref_timestamp - timestamp > (1ULL << 55)) + timestamp += (1ULL << 56); + } else { + if (timestamp - decoder->ref_timestamp > (1ULL << 55)) + timestamp -= (1ULL << 56); + } + decoder->tsc_timestamp = timestamp; + decoder->timestamp = timestamp; + decoder->ref_timestamp = 0; + decoder->timestamp_insn_cnt = 0; + } else if (decoder->timestamp) { + timestamp = decoder->packet.payload | + (decoder->timestamp & (0xffULL << 56)); + if (timestamp < decoder->timestamp && + decoder->timestamp - timestamp < 0x100) { + intel_pt_log_to("ERROR: Suppressing backwards timestamp", + timestamp); + timestamp = decoder->timestamp; + } + while (timestamp < decoder->timestamp) { + intel_pt_log_to("Wraparound timestamp", timestamp); + timestamp += (1ULL << 56); + } + decoder->tsc_timestamp = timestamp; + decoder->timestamp = timestamp; + decoder->timestamp_insn_cnt = 0; + } + + intel_pt_log_to("Setting timestamp", decoder->timestamp); +} + +static int intel_pt_overflow(struct intel_pt_decoder *decoder) +{ + intel_pt_log("ERROR: Buffer overflow\n"); + intel_pt_clear_tx_flags(decoder); + decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; + decoder->overflow = true; + return -EOVERFLOW; +} + +/* Walk PSB+ packets when already in sync. */ +static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder) +{ + int err; + + while (1) { + err = intel_pt_get_next_packet(decoder); + if (err) + return err; + + switch (decoder->packet.type) { + case INTEL_PT_PSBEND: + return 0; + + case INTEL_PT_TIP_PGD: + case INTEL_PT_TIP_PGE: + case INTEL_PT_TIP: + case INTEL_PT_TNT: + case INTEL_PT_BAD: + case INTEL_PT_PSB: + intel_pt_log("ERROR: Unexpected packet\n"); + return -EAGAIN; + + case INTEL_PT_OVF: + return intel_pt_overflow(decoder); + + case INTEL_PT_TSC: + intel_pt_calc_tsc_timestamp(decoder); + break; + + case INTEL_PT_CBR: + decoder->cbr = decoder->packet.payload; + break; + + case INTEL_PT_MODE_EXEC: + decoder->exec_mode = decoder->packet.payload; + break; + + case INTEL_PT_PIP: + decoder->cr3 = decoder->packet.payload; + break; + + case INTEL_PT_FUP: + decoder->pge = true; + intel_pt_set_last_ip(decoder); + break; + + case INTEL_PT_MODE_TSX: + intel_pt_update_in_tx(decoder); + break; + + case INTEL_PT_PAD: + default: + break; + } + } +} + +static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder) +{ + int err; + + if (decoder->tx_flags & INTEL_PT_ABORT_TX) { + decoder->tx_flags = 0; + decoder->state.flags &= ~INTEL_PT_IN_TX; + decoder->state.flags |= INTEL_PT_ABORT_TX; + } else { + decoder->state.flags |= INTEL_PT_ASYNC; + } + + while (1) { + err = intel_pt_get_next_packet(decoder); + if (err) + return err; + + switch (decoder->packet.type) { + case INTEL_PT_TNT: + case INTEL_PT_FUP: + case INTEL_PT_PSB: + case INTEL_PT_TSC: + case INTEL_PT_CBR: + case INTEL_PT_MODE_TSX: + case INTEL_PT_BAD: + case INTEL_PT_PSBEND: + intel_pt_log("ERROR: Missing TIP after FUP\n"); + decoder->pkt_state = INTEL_PT_STATE_ERR3; + return -ENOENT; + + case INTEL_PT_OVF: + return intel_pt_overflow(decoder); + + case INTEL_PT_TIP_PGD: + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + if (decoder->packet.count != 0) { + intel_pt_set_ip(decoder); + intel_pt_log("Omitting PGD ip " x64_fmt "\n", + decoder->ip); + } + decoder->pge = false; + decoder->continuous_period = false; + return 0; + + case INTEL_PT_TIP_PGE: + decoder->pge = true; + intel_pt_log("Omitting PGE ip " x64_fmt "\n", + decoder->ip); + decoder->state.from_ip = 0; + if (decoder->packet.count == 0) { + decoder->state.to_ip = 0; + } else { + intel_pt_set_ip(decoder); + decoder->state.to_ip = decoder->ip; + } + return 0; + + case INTEL_PT_TIP: + decoder->state.from_ip = decoder->ip; + if (decoder->packet.count == 0) { + decoder->state.to_ip = 0; + } else { + intel_pt_set_ip(decoder); + decoder->state.to_ip = decoder->ip; + } + return 0; + + case INTEL_PT_PIP: + decoder->cr3 = decoder->packet.payload; + break; + + case INTEL_PT_MODE_EXEC: + decoder->exec_mode = decoder->packet.payload; + break; + + case INTEL_PT_PAD: + break; + + default: + return intel_pt_bug(decoder); + } + } +} + +static int intel_pt_walk_trace(struct intel_pt_decoder *decoder) +{ + bool no_tip = false; + int err; + + while (1) { + err = intel_pt_get_next_packet(decoder); + if (err) + return err; +next: + switch (decoder->packet.type) { + case INTEL_PT_TNT: + if (!decoder->packet.count) + break; + decoder->tnt = decoder->packet; + decoder->pkt_state = INTEL_PT_STATE_TNT; + err = intel_pt_walk_tnt(decoder); + if (err == -EAGAIN) + break; + return err; + + case INTEL_PT_TIP_PGD: + if (decoder->packet.count != 0) + intel_pt_set_last_ip(decoder); + decoder->pkt_state = INTEL_PT_STATE_TIP_PGD; + return intel_pt_walk_tip(decoder); + + case INTEL_PT_TIP_PGE: { + decoder->pge = true; + if (decoder->packet.count == 0) { + intel_pt_log_at("Skipping zero TIP.PGE", + decoder->pos); + break; + } + intel_pt_set_ip(decoder); + decoder->state.from_ip = 0; + decoder->state.to_ip = decoder->ip; + return 0; + } + + case INTEL_PT_OVF: + return intel_pt_overflow(decoder); + + case INTEL_PT_TIP: + if (decoder->packet.count != 0) + intel_pt_set_last_ip(decoder); + decoder->pkt_state = INTEL_PT_STATE_TIP; + return intel_pt_walk_tip(decoder); + + case INTEL_PT_FUP: + if (decoder->packet.count == 0) { + intel_pt_log_at("Skipping zero FUP", + decoder->pos); + no_tip = false; + break; + } + intel_pt_set_last_ip(decoder); + err = intel_pt_walk_fup(decoder); + if (err != -EAGAIN) { + if (err) + return err; + if (no_tip) + decoder->pkt_state = + INTEL_PT_STATE_FUP_NO_TIP; + else + decoder->pkt_state = INTEL_PT_STATE_FUP; + return 0; + } + if (no_tip) { + no_tip = false; + break; + } + return intel_pt_walk_fup_tip(decoder); + + case INTEL_PT_PSB: + intel_pt_clear_stack(&decoder->stack); + err = intel_pt_walk_psbend(decoder); + if (err == -EAGAIN) + goto next; + if (err) + return err; + break; + + case INTEL_PT_PIP: + decoder->cr3 = decoder->packet.payload; + break; + + case INTEL_PT_TSC: + intel_pt_calc_tsc_timestamp(decoder); + break; + + case INTEL_PT_CBR: + decoder->cbr = decoder->packet.payload; + break; + + case INTEL_PT_MODE_EXEC: + decoder->exec_mode = decoder->packet.payload; + break; + + case INTEL_PT_MODE_TSX: + /* MODE_TSX need not be followed by FUP */ + if (!decoder->pge) { + intel_pt_update_in_tx(decoder); + break; + } + err = intel_pt_mode_tsx(decoder, &no_tip); + if (err) + return err; + goto next; + + case INTEL_PT_BAD: /* Does not happen */ + return intel_pt_bug(decoder); + + case INTEL_PT_PSBEND: + case INTEL_PT_PAD: + break; + + default: + return intel_pt_bug(decoder); + } + } +} + +/* Walk PSB+ packets to get in sync. */ +static int intel_pt_walk_psb(struct intel_pt_decoder *decoder) +{ + int err; + + while (1) { + err = intel_pt_get_next_packet(decoder); + if (err) + return err; + + switch (decoder->packet.type) { + case INTEL_PT_TIP_PGD: + decoder->continuous_period = false; + case INTEL_PT_TIP_PGE: + case INTEL_PT_TIP: + intel_pt_log("ERROR: Unexpected packet\n"); + return -ENOENT; + + case INTEL_PT_FUP: + decoder->pge = true; + if (decoder->last_ip || decoder->packet.count == 6 || + decoder->packet.count == 0) { + uint64_t current_ip = decoder->ip; + + intel_pt_set_ip(decoder); + if (current_ip) + intel_pt_log_to("Setting IP", + decoder->ip); + } + break; + + case INTEL_PT_TSC: + intel_pt_calc_tsc_timestamp(decoder); + break; + + case INTEL_PT_CBR: + decoder->cbr = decoder->packet.payload; + break; + + case INTEL_PT_PIP: + decoder->cr3 = decoder->packet.payload; + break; + + case INTEL_PT_MODE_EXEC: + decoder->exec_mode = decoder->packet.payload; + break; + + case INTEL_PT_MODE_TSX: + intel_pt_update_in_tx(decoder); + break; + + case INTEL_PT_TNT: + intel_pt_log("ERROR: Unexpected packet\n"); + if (decoder->ip) + decoder->pkt_state = INTEL_PT_STATE_ERR4; + else + decoder->pkt_state = INTEL_PT_STATE_ERR3; + return -ENOENT; + + case INTEL_PT_BAD: /* Does not happen */ + return intel_pt_bug(decoder); + + case INTEL_PT_OVF: + return intel_pt_overflow(decoder); + + case INTEL_PT_PSBEND: + return 0; + + case INTEL_PT_PSB: + case INTEL_PT_PAD: + default: + break; + } + } +} + +static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) +{ + int err; + + while (1) { + err = intel_pt_get_next_packet(decoder); + if (err) + return err; + + switch (decoder->packet.type) { + case INTEL_PT_TIP_PGD: + decoder->continuous_period = false; + case INTEL_PT_TIP_PGE: + case INTEL_PT_TIP: + decoder->pge = decoder->packet.type != INTEL_PT_TIP_PGD; + if (decoder->last_ip || decoder->packet.count == 6 || + decoder->packet.count == 0) + intel_pt_set_ip(decoder); + if (decoder->ip) + return 0; + break; + + case INTEL_PT_FUP: + if (decoder->overflow) { + if (decoder->last_ip || + decoder->packet.count == 6 || + decoder->packet.count == 0) + intel_pt_set_ip(decoder); + if (decoder->ip) + return 0; + } + if (decoder->packet.count) + intel_pt_set_last_ip(decoder); + break; + + case INTEL_PT_TSC: + intel_pt_calc_tsc_timestamp(decoder); + break; + + case INTEL_PT_CBR: + decoder->cbr = decoder->packet.payload; + break; + + case INTEL_PT_PIP: + decoder->cr3 = decoder->packet.payload; + break; + + case INTEL_PT_MODE_EXEC: + decoder->exec_mode = decoder->packet.payload; + break; + + case INTEL_PT_MODE_TSX: + intel_pt_update_in_tx(decoder); + break; + + case INTEL_PT_OVF: + return intel_pt_overflow(decoder); + + case INTEL_PT_BAD: /* Does not happen */ + return intel_pt_bug(decoder); + + case INTEL_PT_PSB: + err = intel_pt_walk_psb(decoder); + if (err) + return err; + if (decoder->ip) { + /* Do not have a sample */ + decoder->state.type = 0; + return 0; + } + break; + + case INTEL_PT_TNT: + case INTEL_PT_PSBEND: + case INTEL_PT_PAD: + default: + break; + } + } +} + +static int intel_pt_sync_ip(struct intel_pt_decoder *decoder) +{ + int err; + + intel_pt_log("Scanning for full IP\n"); + err = intel_pt_walk_to_ip(decoder); + if (err) + return err; + + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + decoder->overflow = false; + + decoder->state.from_ip = 0; + decoder->state.to_ip = decoder->ip; + intel_pt_log_to("Setting IP", decoder->ip); + + return 0; +} + +static int intel_pt_part_psb(struct intel_pt_decoder *decoder) +{ + const unsigned char *end = decoder->buf + decoder->len; + size_t i; + + for (i = INTEL_PT_PSB_LEN - 1; i; i--) { + if (i > decoder->len) + continue; + if (!memcmp(end - i, INTEL_PT_PSB_STR, i)) + return i; + } + return 0; +} + +static int intel_pt_rest_psb(struct intel_pt_decoder *decoder, int part_psb) +{ + size_t rest_psb = INTEL_PT_PSB_LEN - part_psb; + const char *psb = INTEL_PT_PSB_STR; + + if (rest_psb > decoder->len || + memcmp(decoder->buf, psb + part_psb, rest_psb)) + return 0; + + return rest_psb; +} + +static int intel_pt_get_split_psb(struct intel_pt_decoder *decoder, + int part_psb) +{ + int rest_psb, ret; + + decoder->pos += decoder->len; + decoder->len = 0; + + ret = intel_pt_get_next_data(decoder); + if (ret) + return ret; + + rest_psb = intel_pt_rest_psb(decoder, part_psb); + if (!rest_psb) + return 0; + + decoder->pos -= part_psb; + decoder->next_buf = decoder->buf + rest_psb; + decoder->next_len = decoder->len - rest_psb; + memcpy(decoder->temp_buf, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN); + decoder->buf = decoder->temp_buf; + decoder->len = INTEL_PT_PSB_LEN; + + return 0; +} + +static int intel_pt_scan_for_psb(struct intel_pt_decoder *decoder) +{ + unsigned char *next; + int ret; + + intel_pt_log("Scanning for PSB\n"); + while (1) { + if (!decoder->len) { + ret = intel_pt_get_next_data(decoder); + if (ret) + return ret; + } + + next = memmem(decoder->buf, decoder->len, INTEL_PT_PSB_STR, + INTEL_PT_PSB_LEN); + if (!next) { + int part_psb; + + part_psb = intel_pt_part_psb(decoder); + if (part_psb) { + ret = intel_pt_get_split_psb(decoder, part_psb); + if (ret) + return ret; + } else { + decoder->pos += decoder->len; + decoder->len = 0; + } + continue; + } + + decoder->pkt_step = next - decoder->buf; + return intel_pt_get_next_packet(decoder); + } +} + +static int intel_pt_sync(struct intel_pt_decoder *decoder) +{ + int err; + + decoder->pge = false; + decoder->continuous_period = false; + decoder->last_ip = 0; + decoder->ip = 0; + intel_pt_clear_stack(&decoder->stack); + + err = intel_pt_scan_for_psb(decoder); + if (err) + return err; + + decoder->pkt_state = INTEL_PT_STATE_NO_IP; + + err = intel_pt_walk_psb(decoder); + if (err) + return err; + + if (decoder->ip) { + decoder->state.type = 0; /* Do not have a sample */ + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + } else { + return intel_pt_sync_ip(decoder); + } + + return 0; +} + +static uint64_t intel_pt_est_timestamp(struct intel_pt_decoder *decoder) +{ + uint64_t est = decoder->timestamp_insn_cnt << 1; + + if (!decoder->cbr || !decoder->max_non_turbo_ratio) + goto out; + + est *= decoder->max_non_turbo_ratio; + est /= decoder->cbr; +out: + return decoder->timestamp + est; +} + +const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) +{ + int err; + + do { + decoder->state.type = INTEL_PT_BRANCH; + decoder->state.flags = 0; + + switch (decoder->pkt_state) { + case INTEL_PT_STATE_NO_PSB: + err = intel_pt_sync(decoder); + break; + case INTEL_PT_STATE_NO_IP: + decoder->last_ip = 0; + /* Fall through */ + case INTEL_PT_STATE_ERR_RESYNC: + err = intel_pt_sync_ip(decoder); + break; + case INTEL_PT_STATE_IN_SYNC: + err = intel_pt_walk_trace(decoder); + break; + case INTEL_PT_STATE_TNT: + err = intel_pt_walk_tnt(decoder); + if (err == -EAGAIN) + err = intel_pt_walk_trace(decoder); + break; + case INTEL_PT_STATE_TIP: + case INTEL_PT_STATE_TIP_PGD: + err = intel_pt_walk_tip(decoder); + break; + case INTEL_PT_STATE_FUP: + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + err = intel_pt_walk_fup(decoder); + if (err == -EAGAIN) + err = intel_pt_walk_fup_tip(decoder); + else if (!err) + decoder->pkt_state = INTEL_PT_STATE_FUP; + break; + case INTEL_PT_STATE_FUP_NO_TIP: + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + err = intel_pt_walk_fup(decoder); + if (err == -EAGAIN) + err = intel_pt_walk_trace(decoder); + break; + default: + err = intel_pt_bug(decoder); + break; + } + } while (err == -ENOLINK); + + decoder->state.err = err ? intel_pt_ext_err(err) : 0; + decoder->state.timestamp = decoder->timestamp; + decoder->state.est_timestamp = intel_pt_est_timestamp(decoder); + decoder->state.cr3 = decoder->cr3; + + if (err) + decoder->state.from_ip = decoder->ip; + + return &decoder->state; +} + +static bool intel_pt_at_psb(unsigned char *buf, size_t len) +{ + if (len < INTEL_PT_PSB_LEN) + return false; + return memmem(buf, INTEL_PT_PSB_LEN, INTEL_PT_PSB_STR, + INTEL_PT_PSB_LEN); +} + +/** + * intel_pt_next_psb - move buffer pointer to the start of the next PSB packet. + * @buf: pointer to buffer pointer + * @len: size of buffer + * + * Updates the buffer pointer to point to the start of the next PSB packet if + * there is one, otherwise the buffer pointer is unchanged. If @buf is updated, + * @len is adjusted accordingly. + * + * Return: %true if a PSB packet is found, %false otherwise. + */ +static bool intel_pt_next_psb(unsigned char **buf, size_t *len) +{ + unsigned char *next; + + next = memmem(*buf, *len, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN); + if (next) { + *len -= next - *buf; + *buf = next; + return true; + } + return false; +} + +/** + * intel_pt_step_psb - move buffer pointer to the start of the following PSB + * packet. + * @buf: pointer to buffer pointer + * @len: size of buffer + * + * Updates the buffer pointer to point to the start of the following PSB packet + * (skipping the PSB at @buf itself) if there is one, otherwise the buffer + * pointer is unchanged. If @buf is updated, @len is adjusted accordingly. + * + * Return: %true if a PSB packet is found, %false otherwise. + */ +static bool intel_pt_step_psb(unsigned char **buf, size_t *len) +{ + unsigned char *next; + + if (!*len) + return false; + + next = memmem(*buf + 1, *len - 1, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN); + if (next) { + *len -= next - *buf; + *buf = next; + return true; + } + return false; +} + +/** + * intel_pt_last_psb - find the last PSB packet in a buffer. + * @buf: buffer + * @len: size of buffer + * + * This function finds the last PSB in a buffer. + * + * Return: A pointer to the last PSB in @buf if found, %NULL otherwise. + */ +static unsigned char *intel_pt_last_psb(unsigned char *buf, size_t len) +{ + const char *n = INTEL_PT_PSB_STR; + unsigned char *p; + size_t k; + + if (len < INTEL_PT_PSB_LEN) + return NULL; + + k = len - INTEL_PT_PSB_LEN + 1; + while (1) { + p = memrchr(buf, n[0], k); + if (!p) + return NULL; + if (!memcmp(p + 1, n + 1, INTEL_PT_PSB_LEN - 1)) + return p; + k = p - buf; + if (!k) + return NULL; + } +} + +/** + * intel_pt_next_tsc - find and return next TSC. + * @buf: buffer + * @len: size of buffer + * @tsc: TSC value returned + * + * Find a TSC packet in @buf and return the TSC value. This function assumes + * that @buf starts at a PSB and that PSB+ will contain TSC and so stops if a + * PSBEND packet is found. + * + * Return: %true if TSC is found, false otherwise. + */ +static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc) +{ + struct intel_pt_pkt packet; + int ret; + + while (len) { + ret = intel_pt_get_packet(buf, len, &packet); + if (ret <= 0) + return false; + if (packet.type == INTEL_PT_TSC) { + *tsc = packet.payload; + return true; + } + if (packet.type == INTEL_PT_PSBEND) + return false; + buf += ret; + len -= ret; + } + return false; +} + +/** + * intel_pt_tsc_cmp - compare 7-byte TSCs. + * @tsc1: first TSC to compare + * @tsc2: second TSC to compare + * + * This function compares 7-byte TSC values allowing for the possibility that + * TSC wrapped around. Generally it is not possible to know if TSC has wrapped + * around so for that purpose this function assumes the absolute difference is + * less than half the maximum difference. + * + * Return: %-1 if @tsc1 is before @tsc2, %0 if @tsc1 == @tsc2, %1 if @tsc1 is + * after @tsc2. + */ +static int intel_pt_tsc_cmp(uint64_t tsc1, uint64_t tsc2) +{ + const uint64_t halfway = (1ULL << 55); + + if (tsc1 == tsc2) + return 0; + + if (tsc1 < tsc2) { + if (tsc2 - tsc1 < halfway) + return -1; + else + return 1; + } else { + if (tsc1 - tsc2 < halfway) + return 1; + else + return -1; + } +} + +/** + * intel_pt_find_overlap_tsc - determine start of non-overlapped trace data + * using TSC. + * @buf_a: first buffer + * @len_a: size of first buffer + * @buf_b: second buffer + * @len_b: size of second buffer + * + * If the trace contains TSC we can look at the last TSC of @buf_a and the + * first TSC of @buf_b in order to determine if the buffers overlap, and then + * walk forward in @buf_b until a later TSC is found. A precondition is that + * @buf_a and @buf_b are positioned at a PSB. + * + * Return: A pointer into @buf_b from where non-overlapped data starts, or + * @buf_b + @len_b if there is no non-overlapped data. + */ +static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a, + size_t len_a, + unsigned char *buf_b, + size_t len_b) +{ + uint64_t tsc_a, tsc_b; + unsigned char *p; + size_t len; + + p = intel_pt_last_psb(buf_a, len_a); + if (!p) + return buf_b; /* No PSB in buf_a => no overlap */ + + len = len_a - (p - buf_a); + if (!intel_pt_next_tsc(p, len, &tsc_a)) { + /* The last PSB+ in buf_a is incomplete, so go back one more */ + len_a -= len; + p = intel_pt_last_psb(buf_a, len_a); + if (!p) + return buf_b; /* No full PSB+ => assume no overlap */ + len = len_a - (p - buf_a); + if (!intel_pt_next_tsc(p, len, &tsc_a)) + return buf_b; /* No TSC in buf_a => assume no overlap */ + } + + while (1) { + /* Ignore PSB+ with no TSC */ + if (intel_pt_next_tsc(buf_b, len_b, &tsc_b) && + intel_pt_tsc_cmp(tsc_a, tsc_b) < 0) + return buf_b; /* tsc_a < tsc_b => no overlap */ + + if (!intel_pt_step_psb(&buf_b, &len_b)) + return buf_b + len_b; /* No PSB in buf_b => no data */ + } +} + +/** + * intel_pt_find_overlap - determine start of non-overlapped trace data. + * @buf_a: first buffer + * @len_a: size of first buffer + * @buf_b: second buffer + * @len_b: size of second buffer + * @have_tsc: can use TSC packets to detect overlap + * + * When trace samples or snapshots are recorded there is the possibility that + * the data overlaps. Note that, for the purposes of decoding, data is only + * useful if it begins with a PSB packet. + * + * Return: A pointer into @buf_b from where non-overlapped data starts, or + * @buf_b + @len_b if there is no non-overlapped data. + */ +unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a, + unsigned char *buf_b, size_t len_b, + bool have_tsc) +{ + unsigned char *found; + + /* Buffer 'b' must start at PSB so throw away everything before that */ + if (!intel_pt_next_psb(&buf_b, &len_b)) + return buf_b + len_b; /* No PSB */ + + if (!intel_pt_next_psb(&buf_a, &len_a)) + return buf_b; /* No overlap */ + + if (have_tsc) { + found = intel_pt_find_overlap_tsc(buf_a, len_a, buf_b, len_b); + if (found) + return found; + } + + /* + * Buffer 'b' cannot end within buffer 'a' so, for comparison purposes, + * we can ignore the first part of buffer 'a'. + */ + while (len_b < len_a) { + if (!intel_pt_step_psb(&buf_a, &len_a)) + return buf_b; /* No overlap */ + } + + /* Now len_b >= len_a */ + if (len_b > len_a) { + /* The leftover buffer 'b' must start at a PSB */ + while (!intel_pt_at_psb(buf_b + len_a, len_b - len_a)) { + if (!intel_pt_step_psb(&buf_a, &len_a)) + return buf_b; /* No overlap */ + } + } + + while (1) { + /* Potential overlap so check the bytes */ + found = memmem(buf_a, len_a, buf_b, len_a); + if (found) + return buf_b + len_a; + + /* Try again at next PSB in buffer 'a' */ + if (!intel_pt_step_psb(&buf_a, &len_a)) + return buf_b; /* No overlap */ + + /* The leftover buffer 'b' must start at a PSB */ + while (!intel_pt_at_psb(buf_b + len_a, len_b - len_a)) { + if (!intel_pt_step_psb(&buf_a, &len_a)) + return buf_b; /* No overlap */ + } + } +} diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h new file mode 100644 index 000000000000..4c4880230cc9 --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h @@ -0,0 +1,104 @@ +/* + * intel_pt_decoder.h: Intel Processor Trace support + * Copyright (c) 2013-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef INCLUDE__INTEL_PT_DECODER_H__ +#define INCLUDE__INTEL_PT_DECODER_H__ + +#include <stdint.h> +#include <stddef.h> +#include <stdbool.h> + +#include "intel-pt-insn-decoder.h" + +#define INTEL_PT_IN_TX (1 << 0) +#define INTEL_PT_ABORT_TX (1 << 1) +#define INTEL_PT_ASYNC (1 << 2) + +enum intel_pt_sample_type { + INTEL_PT_BRANCH = 1 << 0, + INTEL_PT_INSTRUCTION = 1 << 1, + INTEL_PT_TRANSACTION = 1 << 2, +}; + +enum intel_pt_period_type { + INTEL_PT_PERIOD_NONE, + INTEL_PT_PERIOD_INSTRUCTIONS, + INTEL_PT_PERIOD_TICKS, +}; + +enum { + INTEL_PT_ERR_NOMEM = 1, + INTEL_PT_ERR_INTERN, + INTEL_PT_ERR_BADPKT, + INTEL_PT_ERR_NODATA, + INTEL_PT_ERR_NOINSN, + INTEL_PT_ERR_MISMAT, + INTEL_PT_ERR_OVR, + INTEL_PT_ERR_LOST, + INTEL_PT_ERR_UNK, + INTEL_PT_ERR_NELOOP, + INTEL_PT_ERR_MAX, +}; + +struct intel_pt_state { + enum intel_pt_sample_type type; + int err; + uint64_t from_ip; + uint64_t to_ip; + uint64_t cr3; + uint64_t timestamp; + uint64_t est_timestamp; + uint64_t trace_nr; + uint32_t flags; + enum intel_pt_insn_op insn_op; + int insn_len; +}; + +struct intel_pt_insn; + +struct intel_pt_buffer { + const unsigned char *buf; + size_t len; + bool consecutive; + uint64_t ref_timestamp; + uint64_t trace_nr; +}; + +struct intel_pt_params { + int (*get_trace)(struct intel_pt_buffer *buffer, void *data); + int (*walk_insn)(struct intel_pt_insn *intel_pt_insn, + uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip, + uint64_t max_insn_cnt, void *data); + void *data; + bool return_compression; + uint64_t period; + enum intel_pt_period_type period_type; + unsigned max_non_turbo_ratio; +}; + +struct intel_pt_decoder; + +struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params); +void intel_pt_decoder_free(struct intel_pt_decoder *decoder); + +const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder); + +unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a, + unsigned char *buf_b, size_t len_b, + bool have_tsc); + +int intel_pt__strerror(int code, char *buf, size_t buflen); + +#endif diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c new file mode 100644 index 000000000000..46980fc663ac --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c @@ -0,0 +1,246 @@ +/* + * intel_pt_insn_decoder.c: Intel Processor Trace support + * Copyright (c) 2013-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include <stdio.h> +#include <string.h> +#include <endian.h> +#include <byteswap.h> + +#include "event.h" + +#include <asm/insn.h> + +#include "inat.c" +#include "insn.c" + +#include "intel-pt-insn-decoder.h" + +/* Based on branch_type() from perf_event_intel_lbr.c */ +static void intel_pt_insn_decoder(struct insn *insn, + struct intel_pt_insn *intel_pt_insn) +{ + enum intel_pt_insn_op op = INTEL_PT_OP_OTHER; + enum intel_pt_insn_branch branch = INTEL_PT_BR_NO_BRANCH; + int ext; + + if (insn_is_avx(insn)) { + intel_pt_insn->op = INTEL_PT_OP_OTHER; + intel_pt_insn->branch = INTEL_PT_BR_NO_BRANCH; + intel_pt_insn->length = insn->length; + return; + } + + switch (insn->opcode.bytes[0]) { + case 0xf: + switch (insn->opcode.bytes[1]) { + case 0x05: /* syscall */ + case 0x34: /* sysenter */ + op = INTEL_PT_OP_SYSCALL; + branch = INTEL_PT_BR_INDIRECT; + break; + case 0x07: /* sysret */ + case 0x35: /* sysexit */ + op = INTEL_PT_OP_SYSRET; + branch = INTEL_PT_BR_INDIRECT; + break; + case 0x80 ... 0x8f: /* jcc */ + op = INTEL_PT_OP_JCC; + branch = INTEL_PT_BR_CONDITIONAL; + break; + default: + break; + } + break; + case 0x70 ... 0x7f: /* jcc */ + op = INTEL_PT_OP_JCC; + branch = INTEL_PT_BR_CONDITIONAL; + break; + case 0xc2: /* near ret */ + case 0xc3: /* near ret */ + case 0xca: /* far ret */ + case 0xcb: /* far ret */ + op = INTEL_PT_OP_RET; + branch = INTEL_PT_BR_INDIRECT; + break; + case 0xcf: /* iret */ + op = INTEL_PT_OP_IRET; + branch = INTEL_PT_BR_INDIRECT; + break; + case 0xcc ... 0xce: /* int */ + op = INTEL_PT_OP_INT; + branch = INTEL_PT_BR_INDIRECT; + break; + case 0xe8: /* call near rel */ + op = INTEL_PT_OP_CALL; + branch = INTEL_PT_BR_UNCONDITIONAL; + break; + case 0x9a: /* call far absolute */ + op = INTEL_PT_OP_CALL; + branch = INTEL_PT_BR_INDIRECT; + break; + case 0xe0 ... 0xe2: /* loop */ + op = INTEL_PT_OP_LOOP; + branch = INTEL_PT_BR_CONDITIONAL; + break; + case 0xe3: /* jcc */ + op = INTEL_PT_OP_JCC; + branch = INTEL_PT_BR_CONDITIONAL; + break; + case 0xe9: /* jmp */ + case 0xeb: /* jmp */ + op = INTEL_PT_OP_JMP; + branch = INTEL_PT_BR_UNCONDITIONAL; + break; + case 0xea: /* far jmp */ + op = INTEL_PT_OP_JMP; + branch = INTEL_PT_BR_INDIRECT; + break; + case 0xff: /* call near absolute, call far absolute ind */ + ext = (insn->modrm.bytes[0] >> 3) & 0x7; + switch (ext) { + case 2: /* near ind call */ + case 3: /* far ind call */ + op = INTEL_PT_OP_CALL; + branch = INTEL_PT_BR_INDIRECT; + break; + case 4: + case 5: + op = INTEL_PT_OP_JMP; + branch = INTEL_PT_BR_INDIRECT; + break; + default: + break; + } + break; + default: + break; + } + + intel_pt_insn->op = op; + intel_pt_insn->branch = branch; + intel_pt_insn->length = insn->length; + + if (branch == INTEL_PT_BR_CONDITIONAL || + branch == INTEL_PT_BR_UNCONDITIONAL) { +#if __BYTE_ORDER == __BIG_ENDIAN + switch (insn->immediate.nbytes) { + case 1: + intel_pt_insn->rel = insn->immediate.value; + break; + case 2: + intel_pt_insn->rel = + bswap_16((short)insn->immediate.value); + break; + case 4: + intel_pt_insn->rel = bswap_32(insn->immediate.value); + break; + } +#else + intel_pt_insn->rel = insn->immediate.value; +#endif + } +} + +int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64, + struct intel_pt_insn *intel_pt_insn) +{ + struct insn insn; + + insn_init(&insn, buf, len, x86_64); + insn_get_length(&insn); + if (!insn_complete(&insn) || insn.length > len) + return -1; + intel_pt_insn_decoder(&insn, intel_pt_insn); + if (insn.length < INTEL_PT_INSN_DBG_BUF_SZ) + memcpy(intel_pt_insn->buf, buf, insn.length); + else + memcpy(intel_pt_insn->buf, buf, INTEL_PT_INSN_DBG_BUF_SZ); + return 0; +} + +const char *branch_name[] = { + [INTEL_PT_OP_OTHER] = "Other", + [INTEL_PT_OP_CALL] = "Call", + [INTEL_PT_OP_RET] = "Ret", + [INTEL_PT_OP_JCC] = "Jcc", + [INTEL_PT_OP_JMP] = "Jmp", + [INTEL_PT_OP_LOOP] = "Loop", + [INTEL_PT_OP_IRET] = "IRet", + [INTEL_PT_OP_INT] = "Int", + [INTEL_PT_OP_SYSCALL] = "Syscall", + [INTEL_PT_OP_SYSRET] = "Sysret", +}; + +const char *intel_pt_insn_name(enum intel_pt_insn_op op) +{ + return branch_name[op]; +} + +int intel_pt_insn_desc(const struct intel_pt_insn *intel_pt_insn, char *buf, + size_t buf_len) +{ + switch (intel_pt_insn->branch) { + case INTEL_PT_BR_CONDITIONAL: + case INTEL_PT_BR_UNCONDITIONAL: + return snprintf(buf, buf_len, "%s %s%d", + intel_pt_insn_name(intel_pt_insn->op), + intel_pt_insn->rel > 0 ? "+" : "", + intel_pt_insn->rel); + case INTEL_PT_BR_NO_BRANCH: + case INTEL_PT_BR_INDIRECT: + return snprintf(buf, buf_len, "%s", + intel_pt_insn_name(intel_pt_insn->op)); + default: + break; + } + return 0; +} + +size_t intel_pt_insn_max_size(void) +{ + return MAX_INSN_SIZE; +} + +int intel_pt_insn_type(enum intel_pt_insn_op op) +{ + switch (op) { + case INTEL_PT_OP_OTHER: + return 0; + case INTEL_PT_OP_CALL: + return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL; + case INTEL_PT_OP_RET: + return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN; + case INTEL_PT_OP_JCC: + return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CONDITIONAL; + case INTEL_PT_OP_JMP: + return PERF_IP_FLAG_BRANCH; + case INTEL_PT_OP_LOOP: + return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CONDITIONAL; + case INTEL_PT_OP_IRET: + return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | + PERF_IP_FLAG_INTERRUPT; + case INTEL_PT_OP_INT: + return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | + PERF_IP_FLAG_INTERRUPT; + case INTEL_PT_OP_SYSCALL: + return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | + PERF_IP_FLAG_SYSCALLRET; + case INTEL_PT_OP_SYSRET: + return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | + PERF_IP_FLAG_SYSCALLRET; + default: + return 0; + } +} diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h new file mode 100644 index 000000000000..b0adbf37323e --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h @@ -0,0 +1,65 @@ +/* + * intel_pt_insn_decoder.h: Intel Processor Trace support + * Copyright (c) 2013-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef INCLUDE__INTEL_PT_INSN_DECODER_H__ +#define INCLUDE__INTEL_PT_INSN_DECODER_H__ + +#include <stddef.h> +#include <stdint.h> + +#define INTEL_PT_INSN_DESC_MAX 32 +#define INTEL_PT_INSN_DBG_BUF_SZ 16 + +enum intel_pt_insn_op { + INTEL_PT_OP_OTHER, + INTEL_PT_OP_CALL, + INTEL_PT_OP_RET, + INTEL_PT_OP_JCC, + INTEL_PT_OP_JMP, + INTEL_PT_OP_LOOP, + INTEL_PT_OP_IRET, + INTEL_PT_OP_INT, + INTEL_PT_OP_SYSCALL, + INTEL_PT_OP_SYSRET, +}; + +enum intel_pt_insn_branch { + INTEL_PT_BR_NO_BRANCH, + INTEL_PT_BR_INDIRECT, + INTEL_PT_BR_CONDITIONAL, + INTEL_PT_BR_UNCONDITIONAL, +}; + +struct intel_pt_insn { + enum intel_pt_insn_op op; + enum intel_pt_insn_branch branch; + int length; + int32_t rel; + unsigned char buf[INTEL_PT_INSN_DBG_BUF_SZ]; +}; + +int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64, + struct intel_pt_insn *intel_pt_insn); + +const char *intel_pt_insn_name(enum intel_pt_insn_op op); + +int intel_pt_insn_desc(const struct intel_pt_insn *intel_pt_insn, char *buf, + size_t buf_len); + +size_t intel_pt_insn_max_size(void); + +int intel_pt_insn_type(enum intel_pt_insn_op op); + +#endif diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.c b/tools/perf/util/intel-pt-decoder/intel-pt-log.c new file mode 100644 index 000000000000..d09c7d9f9050 --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.c @@ -0,0 +1,155 @@ +/* + * intel_pt_log.c: Intel Processor Trace support + * Copyright (c) 2013-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include <stdio.h> +#include <stdint.h> +#include <inttypes.h> +#include <stdarg.h> +#include <stdbool.h> +#include <string.h> + +#include "intel-pt-log.h" +#include "intel-pt-insn-decoder.h" + +#include "intel-pt-pkt-decoder.h" + +#define MAX_LOG_NAME 256 + +static FILE *f; +static char log_name[MAX_LOG_NAME]; +static bool enable_logging; + +void intel_pt_log_enable(void) +{ + enable_logging = true; +} + +void intel_pt_log_disable(void) +{ + if (f) + fflush(f); + enable_logging = false; +} + +void intel_pt_log_set_name(const char *name) +{ + strncpy(log_name, name, MAX_LOG_NAME - 5); + strcat(log_name, ".log"); +} + +static void intel_pt_print_data(const unsigned char *buf, int len, uint64_t pos, + int indent) +{ + int i; + + for (i = 0; i < indent; i++) + fprintf(f, " "); + + fprintf(f, " %08" PRIx64 ": ", pos); + for (i = 0; i < len; i++) + fprintf(f, " %02x", buf[i]); + for (; i < 16; i++) + fprintf(f, " "); + fprintf(f, " "); +} + +static void intel_pt_print_no_data(uint64_t pos, int indent) +{ + int i; + + for (i = 0; i < indent; i++) + fprintf(f, " "); + + fprintf(f, " %08" PRIx64 ": ", pos); + for (i = 0; i < 16; i++) + fprintf(f, " "); + fprintf(f, " "); +} + +static int intel_pt_log_open(void) +{ + if (!enable_logging) + return -1; + + if (f) + return 0; + + if (!log_name[0]) + return -1; + + f = fopen(log_name, "w+"); + if (!f) { + enable_logging = false; + return -1; + } + + return 0; +} + +void intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len, + uint64_t pos, const unsigned char *buf) +{ + char desc[INTEL_PT_PKT_DESC_MAX]; + + if (intel_pt_log_open()) + return; + + intel_pt_print_data(buf, pkt_len, pos, 0); + intel_pt_pkt_desc(packet, desc, INTEL_PT_PKT_DESC_MAX); + fprintf(f, "%s\n", desc); +} + +void intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip) +{ + char desc[INTEL_PT_INSN_DESC_MAX]; + size_t len = intel_pt_insn->length; + + if (intel_pt_log_open()) + return; + + if (len > INTEL_PT_INSN_DBG_BUF_SZ) + len = INTEL_PT_INSN_DBG_BUF_SZ; + intel_pt_print_data(intel_pt_insn->buf, len, ip, 8); + if (intel_pt_insn_desc(intel_pt_insn, desc, INTEL_PT_INSN_DESC_MAX) > 0) + fprintf(f, "%s\n", desc); + else + fprintf(f, "Bad instruction!\n"); +} + +void intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn, uint64_t ip) +{ + char desc[INTEL_PT_INSN_DESC_MAX]; + + if (intel_pt_log_open()) + return; + + intel_pt_print_no_data(ip, 8); + if (intel_pt_insn_desc(intel_pt_insn, desc, INTEL_PT_INSN_DESC_MAX) > 0) + fprintf(f, "%s\n", desc); + else + fprintf(f, "Bad instruction!\n"); +} + +void intel_pt_log(const char *fmt, ...) +{ + va_list args; + + if (intel_pt_log_open()) + return; + + va_start(args, fmt); + vfprintf(f, fmt, args); + va_end(args); +} diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.h b/tools/perf/util/intel-pt-decoder/intel-pt-log.h new file mode 100644 index 000000000000..db3942f83677 --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.h @@ -0,0 +1,52 @@ +/* + * intel_pt_log.h: Intel Processor Trace support + * Copyright (c) 2013-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef INCLUDE__INTEL_PT_LOG_H__ +#define INCLUDE__INTEL_PT_LOG_H__ + +#include <stdint.h> +#include <inttypes.h> + +struct intel_pt_pkt; + +void intel_pt_log_enable(void); +void intel_pt_log_disable(void); +void intel_pt_log_set_name(const char *name); + +void intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len, + uint64_t pos, const unsigned char *buf); + +struct intel_pt_insn; + +void intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip); +void intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn, + uint64_t ip); + +__attribute__((format(printf, 1, 2))) +void intel_pt_log(const char *fmt, ...); + +#define x64_fmt "0x%" PRIx64 + +static inline void intel_pt_log_at(const char *msg, uint64_t u) +{ + intel_pt_log("%s at " x64_fmt "\n", msg, u); +} + +static inline void intel_pt_log_to(const char *msg, uint64_t u) +{ + intel_pt_log("%s to " x64_fmt "\n", msg, u); +} + +#endif diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c new file mode 100644 index 000000000000..988c82c6652d --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c @@ -0,0 +1,400 @@ +/* + * intel_pt_pkt_decoder.c: Intel Processor Trace support + * Copyright (c) 2013-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include <stdio.h> +#include <string.h> +#include <endian.h> +#include <byteswap.h> + +#include "intel-pt-pkt-decoder.h" + +#define BIT(n) (1 << (n)) + +#define BIT63 ((uint64_t)1 << 63) + +#if __BYTE_ORDER == __BIG_ENDIAN +#define le16_to_cpu bswap_16 +#define le32_to_cpu bswap_32 +#define le64_to_cpu bswap_64 +#define memcpy_le64(d, s, n) do { \ + memcpy((d), (s), (n)); \ + *(d) = le64_to_cpu(*(d)); \ +} while (0) +#else +#define le16_to_cpu +#define le32_to_cpu +#define le64_to_cpu +#define memcpy_le64 memcpy +#endif + +static const char * const packet_name[] = { + [INTEL_PT_BAD] = "Bad Packet!", + [INTEL_PT_PAD] = "PAD", + [INTEL_PT_TNT] = "TNT", + [INTEL_PT_TIP_PGD] = "TIP.PGD", + [INTEL_PT_TIP_PGE] = "TIP.PGE", + [INTEL_PT_TSC] = "TSC", + [INTEL_PT_MODE_EXEC] = "MODE.Exec", + [INTEL_PT_MODE_TSX] = "MODE.TSX", + [INTEL_PT_TIP] = "TIP", + [INTEL_PT_FUP] = "FUP", + [INTEL_PT_PSB] = "PSB", + [INTEL_PT_PSBEND] = "PSBEND", + [INTEL_PT_CBR] = "CBR", + [INTEL_PT_PIP] = "PIP", + [INTEL_PT_OVF] = "OVF", +}; + +const char *intel_pt_pkt_name(enum intel_pt_pkt_type type) +{ + return packet_name[type]; +} + +static int intel_pt_get_long_tnt(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + uint64_t payload; + int count; + + if (len < 8) + return INTEL_PT_NEED_MORE_BYTES; + + payload = le64_to_cpu(*(uint64_t *)buf); + + for (count = 47; count; count--) { + if (payload & BIT63) + break; + payload <<= 1; + } + + packet->type = INTEL_PT_TNT; + packet->count = count; + packet->payload = payload << 1; + return 8; +} + +static int intel_pt_get_pip(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + uint64_t payload = 0; + + if (len < 8) + return INTEL_PT_NEED_MORE_BYTES; + + packet->type = INTEL_PT_PIP; + memcpy_le64(&payload, buf + 2, 6); + packet->payload = payload >> 1; + + return 8; +} + +static int intel_pt_get_cbr(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + if (len < 4) + return INTEL_PT_NEED_MORE_BYTES; + packet->type = INTEL_PT_CBR; + packet->payload = buf[2]; + return 4; +} + +static int intel_pt_get_ovf(struct intel_pt_pkt *packet) +{ + packet->type = INTEL_PT_OVF; + return 2; +} + +static int intel_pt_get_psb(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + int i; + + if (len < 16) + return INTEL_PT_NEED_MORE_BYTES; + + for (i = 2; i < 16; i += 2) { + if (buf[i] != 2 || buf[i + 1] != 0x82) + return INTEL_PT_BAD_PACKET; + } + + packet->type = INTEL_PT_PSB; + return 16; +} + +static int intel_pt_get_psbend(struct intel_pt_pkt *packet) +{ + packet->type = INTEL_PT_PSBEND; + return 2; +} + +static int intel_pt_get_pad(struct intel_pt_pkt *packet) +{ + packet->type = INTEL_PT_PAD; + return 1; +} + +static int intel_pt_get_ext(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + if (len < 2) + return INTEL_PT_NEED_MORE_BYTES; + + switch (buf[1]) { + case 0xa3: /* Long TNT */ + return intel_pt_get_long_tnt(buf, len, packet); + case 0x43: /* PIP */ + return intel_pt_get_pip(buf, len, packet); + case 0x03: /* CBR */ + return intel_pt_get_cbr(buf, len, packet); + case 0xf3: /* OVF */ + return intel_pt_get_ovf(packet); + case 0x82: /* PSB */ + return intel_pt_get_psb(buf, len, packet); + case 0x23: /* PSBEND */ + return intel_pt_get_psbend(packet); + default: + return INTEL_PT_BAD_PACKET; + } +} + +static int intel_pt_get_short_tnt(unsigned int byte, + struct intel_pt_pkt *packet) +{ + int count; + + for (count = 6; count; count--) { + if (byte & BIT(7)) + break; + byte <<= 1; + } + + packet->type = INTEL_PT_TNT; + packet->count = count; + packet->payload = (uint64_t)byte << 57; + + return 1; +} + +static int intel_pt_get_ip(enum intel_pt_pkt_type type, unsigned int byte, + const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + switch (byte >> 5) { + case 0: + packet->count = 0; + break; + case 1: + if (len < 3) + return INTEL_PT_NEED_MORE_BYTES; + packet->count = 2; + packet->payload = le16_to_cpu(*(uint16_t *)(buf + 1)); + break; + case 2: + if (len < 5) + return INTEL_PT_NEED_MORE_BYTES; + packet->count = 4; + packet->payload = le32_to_cpu(*(uint32_t *)(buf + 1)); + break; + case 3: + case 6: + if (len < 7) + return INTEL_PT_NEED_MORE_BYTES; + packet->count = 6; + memcpy_le64(&packet->payload, buf + 1, 6); + break; + default: + return INTEL_PT_BAD_PACKET; + } + + packet->type = type; + + return packet->count + 1; +} + +static int intel_pt_get_mode(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + if (len < 2) + return INTEL_PT_NEED_MORE_BYTES; + + switch (buf[1] >> 5) { + case 0: + packet->type = INTEL_PT_MODE_EXEC; + switch (buf[1] & 3) { + case 0: + packet->payload = 16; + break; + case 1: + packet->payload = 64; + break; + case 2: + packet->payload = 32; + break; + default: + return INTEL_PT_BAD_PACKET; + } + break; + case 1: + packet->type = INTEL_PT_MODE_TSX; + if ((buf[1] & 3) == 3) + return INTEL_PT_BAD_PACKET; + packet->payload = buf[1] & 3; + break; + default: + return INTEL_PT_BAD_PACKET; + } + + return 2; +} + +static int intel_pt_get_tsc(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + if (len < 8) + return INTEL_PT_NEED_MORE_BYTES; + packet->type = INTEL_PT_TSC; + memcpy_le64(&packet->payload, buf + 1, 7); + return 8; +} + +static int intel_pt_do_get_packet(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + unsigned int byte; + + memset(packet, 0, sizeof(struct intel_pt_pkt)); + + if (!len) + return INTEL_PT_NEED_MORE_BYTES; + + byte = buf[0]; + if (!(byte & BIT(0))) { + if (byte == 0) + return intel_pt_get_pad(packet); + if (byte == 2) + return intel_pt_get_ext(buf, len, packet); + return intel_pt_get_short_tnt(byte, packet); + } + + switch (byte & 0x1f) { + case 0x0D: + return intel_pt_get_ip(INTEL_PT_TIP, byte, buf, len, packet); + case 0x11: + return intel_pt_get_ip(INTEL_PT_TIP_PGE, byte, buf, len, + packet); + case 0x01: + return intel_pt_get_ip(INTEL_PT_TIP_PGD, byte, buf, len, + packet); + case 0x1D: + return intel_pt_get_ip(INTEL_PT_FUP, byte, buf, len, packet); + case 0x19: + switch (byte) { + case 0x99: + return intel_pt_get_mode(buf, len, packet); + case 0x19: + return intel_pt_get_tsc(buf, len, packet); + default: + return INTEL_PT_BAD_PACKET; + } + default: + return INTEL_PT_BAD_PACKET; + } +} + +int intel_pt_get_packet(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + int ret; + + ret = intel_pt_do_get_packet(buf, len, packet); + if (ret > 0) { + while (ret < 8 && len > (size_t)ret && !buf[ret]) + ret += 1; + } + return ret; +} + +int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf, + size_t buf_len) +{ + int ret, i; + unsigned long long payload = packet->payload; + const char *name = intel_pt_pkt_name(packet->type); + + switch (packet->type) { + case INTEL_PT_BAD: + case INTEL_PT_PAD: + case INTEL_PT_PSB: + case INTEL_PT_PSBEND: + case INTEL_PT_OVF: + return snprintf(buf, buf_len, "%s", name); + case INTEL_PT_TNT: { + size_t blen = buf_len; + + ret = snprintf(buf, blen, "%s ", name); + if (ret < 0) + return ret; + buf += ret; + blen -= ret; + for (i = 0; i < packet->count; i++) { + if (payload & BIT63) + ret = snprintf(buf, blen, "T"); + else + ret = snprintf(buf, blen, "N"); + if (ret < 0) + return ret; + buf += ret; + blen -= ret; + payload <<= 1; + } + ret = snprintf(buf, blen, " (%d)", packet->count); + if (ret < 0) + return ret; + blen -= ret; + return buf_len - blen; + } + case INTEL_PT_TIP_PGD: + case INTEL_PT_TIP_PGE: + case INTEL_PT_TIP: + case INTEL_PT_FUP: + if (!(packet->count)) + return snprintf(buf, buf_len, "%s no ip", name); + case INTEL_PT_CBR: + return snprintf(buf, buf_len, "%s 0x%llx", name, payload); + case INTEL_PT_TSC: + if (packet->count) + return snprintf(buf, buf_len, + "%s 0x%llx CTC 0x%x FC 0x%x", + name, payload, packet->count & 0xffff, + (packet->count >> 16) & 0x1ff); + else + return snprintf(buf, buf_len, "%s 0x%llx", + name, payload); + case INTEL_PT_MODE_EXEC: + return snprintf(buf, buf_len, "%s %lld", name, payload); + case INTEL_PT_MODE_TSX: + return snprintf(buf, buf_len, "%s TXAbort:%u InTX:%u", + name, (unsigned)(payload >> 1) & 1, + (unsigned)payload & 1); + case INTEL_PT_PIP: + ret = snprintf(buf, buf_len, "%s 0x%llx", + name, payload); + return ret; + default: + break; + } + return snprintf(buf, buf_len, "%s 0x%llx (%d)", + name, payload, packet->count); +} diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h new file mode 100644 index 000000000000..53404fa942b3 --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h @@ -0,0 +1,64 @@ +/* + * intel_pt_pkt_decoder.h: Intel Processor Trace support + * Copyright (c) 2013-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef INCLUDE__INTEL_PT_PKT_DECODER_H__ +#define INCLUDE__INTEL_PT_PKT_DECODER_H__ + +#include <stddef.h> +#include <stdint.h> + +#define INTEL_PT_PKT_DESC_MAX 256 + +#define INTEL_PT_NEED_MORE_BYTES -1 +#define INTEL_PT_BAD_PACKET -2 + +#define INTEL_PT_PSB_STR "\002\202\002\202\002\202\002\202" \ + "\002\202\002\202\002\202\002\202" +#define INTEL_PT_PSB_LEN 16 + +#define INTEL_PT_PKT_MAX_SZ 16 + +enum intel_pt_pkt_type { + INTEL_PT_BAD, + INTEL_PT_PAD, + INTEL_PT_TNT, + INTEL_PT_TIP_PGD, + INTEL_PT_TIP_PGE, + INTEL_PT_TSC, + INTEL_PT_MODE_EXEC, + INTEL_PT_MODE_TSX, + INTEL_PT_TIP, + INTEL_PT_FUP, + INTEL_PT_PSB, + INTEL_PT_PSBEND, + INTEL_PT_CBR, + INTEL_PT_PIP, + INTEL_PT_OVF, +}; + +struct intel_pt_pkt { + enum intel_pt_pkt_type type; + int count; + uint64_t payload; +}; + +const char *intel_pt_pkt_name(enum intel_pt_pkt_type); + +int intel_pt_get_packet(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet); + +int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf, size_t len); + +#endif diff --git a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt new file mode 100644 index 000000000000..816488c0b97e --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt @@ -0,0 +1,970 @@ +# x86 Opcode Maps +# +# This is (mostly) based on following documentations. +# - Intel(R) 64 and IA-32 Architectures Software Developer's Manual Vol.2C +# (#326018-047US, June 2013) +# +#<Opcode maps> +# Table: table-name +# Referrer: escaped-name +# AVXcode: avx-code +# opcode: mnemonic|GrpXXX [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] +# (or) +# opcode: escape # escaped-name +# EndTable +# +#<group maps> +# GrpTable: GrpXXX +# reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] +# EndTable +# +# AVX Superscripts +# (v): this opcode requires VEX prefix. +# (v1): this opcode only supports 128bit VEX. +# +# Last Prefix Superscripts +# - (66): the last prefix is 0x66 +# - (F3): the last prefix is 0xF3 +# - (F2): the last prefix is 0xF2 +# - (!F3) : the last prefix is not 0xF3 (including non-last prefix case) +# - (66&F2): Both 0x66 and 0xF2 prefixes are specified. + +Table: one byte opcode +Referrer: +AVXcode: +# 0x00 - 0x0f +00: ADD Eb,Gb +01: ADD Ev,Gv +02: ADD Gb,Eb +03: ADD Gv,Ev +04: ADD AL,Ib +05: ADD rAX,Iz +06: PUSH ES (i64) +07: POP ES (i64) +08: OR Eb,Gb +09: OR Ev,Gv +0a: OR Gb,Eb +0b: OR Gv,Ev +0c: OR AL,Ib +0d: OR rAX,Iz +0e: PUSH CS (i64) +0f: escape # 2-byte escape +# 0x10 - 0x1f +10: ADC Eb,Gb +11: ADC Ev,Gv +12: ADC Gb,Eb +13: ADC Gv,Ev +14: ADC AL,Ib +15: ADC rAX,Iz +16: PUSH SS (i64) +17: POP SS (i64) +18: SBB Eb,Gb +19: SBB Ev,Gv +1a: SBB Gb,Eb +1b: SBB Gv,Ev +1c: SBB AL,Ib +1d: SBB rAX,Iz +1e: PUSH DS (i64) +1f: POP DS (i64) +# 0x20 - 0x2f +20: AND Eb,Gb +21: AND Ev,Gv +22: AND Gb,Eb +23: AND Gv,Ev +24: AND AL,Ib +25: AND rAx,Iz +26: SEG=ES (Prefix) +27: DAA (i64) +28: SUB Eb,Gb +29: SUB Ev,Gv +2a: SUB Gb,Eb +2b: SUB Gv,Ev +2c: SUB AL,Ib +2d: SUB rAX,Iz +2e: SEG=CS (Prefix) +2f: DAS (i64) +# 0x30 - 0x3f +30: XOR Eb,Gb +31: XOR Ev,Gv +32: XOR Gb,Eb +33: XOR Gv,Ev +34: XOR AL,Ib +35: XOR rAX,Iz +36: SEG=SS (Prefix) +37: AAA (i64) +38: CMP Eb,Gb +39: CMP Ev,Gv +3a: CMP Gb,Eb +3b: CMP Gv,Ev +3c: CMP AL,Ib +3d: CMP rAX,Iz +3e: SEG=DS (Prefix) +3f: AAS (i64) +# 0x40 - 0x4f +40: INC eAX (i64) | REX (o64) +41: INC eCX (i64) | REX.B (o64) +42: INC eDX (i64) | REX.X (o64) +43: INC eBX (i64) | REX.XB (o64) +44: INC eSP (i64) | REX.R (o64) +45: INC eBP (i64) | REX.RB (o64) +46: INC eSI (i64) | REX.RX (o64) +47: INC eDI (i64) | REX.RXB (o64) +48: DEC eAX (i64) | REX.W (o64) +49: DEC eCX (i64) | REX.WB (o64) +4a: DEC eDX (i64) | REX.WX (o64) +4b: DEC eBX (i64) | REX.WXB (o64) +4c: DEC eSP (i64) | REX.WR (o64) +4d: DEC eBP (i64) | REX.WRB (o64) +4e: DEC eSI (i64) | REX.WRX (o64) +4f: DEC eDI (i64) | REX.WRXB (o64) +# 0x50 - 0x5f +50: PUSH rAX/r8 (d64) +51: PUSH rCX/r9 (d64) +52: PUSH rDX/r10 (d64) +53: PUSH rBX/r11 (d64) +54: PUSH rSP/r12 (d64) +55: PUSH rBP/r13 (d64) +56: PUSH rSI/r14 (d64) +57: PUSH rDI/r15 (d64) +58: POP rAX/r8 (d64) +59: POP rCX/r9 (d64) +5a: POP rDX/r10 (d64) +5b: POP rBX/r11 (d64) +5c: POP rSP/r12 (d64) +5d: POP rBP/r13 (d64) +5e: POP rSI/r14 (d64) +5f: POP rDI/r15 (d64) +# 0x60 - 0x6f +60: PUSHA/PUSHAD (i64) +61: POPA/POPAD (i64) +62: BOUND Gv,Ma (i64) +63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64) +64: SEG=FS (Prefix) +65: SEG=GS (Prefix) +66: Operand-Size (Prefix) +67: Address-Size (Prefix) +68: PUSH Iz (d64) +69: IMUL Gv,Ev,Iz +6a: PUSH Ib (d64) +6b: IMUL Gv,Ev,Ib +6c: INS/INSB Yb,DX +6d: INS/INSW/INSD Yz,DX +6e: OUTS/OUTSB DX,Xb +6f: OUTS/OUTSW/OUTSD DX,Xz +# 0x70 - 0x7f +70: JO Jb +71: JNO Jb +72: JB/JNAE/JC Jb +73: JNB/JAE/JNC Jb +74: JZ/JE Jb +75: JNZ/JNE Jb +76: JBE/JNA Jb +77: JNBE/JA Jb +78: JS Jb +79: JNS Jb +7a: JP/JPE Jb +7b: JNP/JPO Jb +7c: JL/JNGE Jb +7d: JNL/JGE Jb +7e: JLE/JNG Jb +7f: JNLE/JG Jb +# 0x80 - 0x8f +80: Grp1 Eb,Ib (1A) +81: Grp1 Ev,Iz (1A) +82: Grp1 Eb,Ib (1A),(i64) +83: Grp1 Ev,Ib (1A) +84: TEST Eb,Gb +85: TEST Ev,Gv +86: XCHG Eb,Gb +87: XCHG Ev,Gv +88: MOV Eb,Gb +89: MOV Ev,Gv +8a: MOV Gb,Eb +8b: MOV Gv,Ev +8c: MOV Ev,Sw +8d: LEA Gv,M +8e: MOV Sw,Ew +8f: Grp1A (1A) | POP Ev (d64) +# 0x90 - 0x9f +90: NOP | PAUSE (F3) | XCHG r8,rAX +91: XCHG rCX/r9,rAX +92: XCHG rDX/r10,rAX +93: XCHG rBX/r11,rAX +94: XCHG rSP/r12,rAX +95: XCHG rBP/r13,rAX +96: XCHG rSI/r14,rAX +97: XCHG rDI/r15,rAX +98: CBW/CWDE/CDQE +99: CWD/CDQ/CQO +9a: CALLF Ap (i64) +9b: FWAIT/WAIT +9c: PUSHF/D/Q Fv (d64) +9d: POPF/D/Q Fv (d64) +9e: SAHF +9f: LAHF +# 0xa0 - 0xaf +a0: MOV AL,Ob +a1: MOV rAX,Ov +a2: MOV Ob,AL +a3: MOV Ov,rAX +a4: MOVS/B Yb,Xb +a5: MOVS/W/D/Q Yv,Xv +a6: CMPS/B Xb,Yb +a7: CMPS/W/D Xv,Yv +a8: TEST AL,Ib +a9: TEST rAX,Iz +aa: STOS/B Yb,AL +ab: STOS/W/D/Q Yv,rAX +ac: LODS/B AL,Xb +ad: LODS/W/D/Q rAX,Xv +ae: SCAS/B AL,Yb +# Note: The May 2011 Intel manual shows Xv for the second parameter of the +# next instruction but Yv is correct +af: SCAS/W/D/Q rAX,Yv +# 0xb0 - 0xbf +b0: MOV AL/R8L,Ib +b1: MOV CL/R9L,Ib +b2: MOV DL/R10L,Ib +b3: MOV BL/R11L,Ib +b4: MOV AH/R12L,Ib +b5: MOV CH/R13L,Ib +b6: MOV DH/R14L,Ib +b7: MOV BH/R15L,Ib +b8: MOV rAX/r8,Iv +b9: MOV rCX/r9,Iv +ba: MOV rDX/r10,Iv +bb: MOV rBX/r11,Iv +bc: MOV rSP/r12,Iv +bd: MOV rBP/r13,Iv +be: MOV rSI/r14,Iv +bf: MOV rDI/r15,Iv +# 0xc0 - 0xcf +c0: Grp2 Eb,Ib (1A) +c1: Grp2 Ev,Ib (1A) +c2: RETN Iw (f64) +c3: RETN +c4: LES Gz,Mp (i64) | VEX+2byte (Prefix) +c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix) +c6: Grp11A Eb,Ib (1A) +c7: Grp11B Ev,Iz (1A) +c8: ENTER Iw,Ib +c9: LEAVE (d64) +ca: RETF Iw +cb: RETF +cc: INT3 +cd: INT Ib +ce: INTO (i64) +cf: IRET/D/Q +# 0xd0 - 0xdf +d0: Grp2 Eb,1 (1A) +d1: Grp2 Ev,1 (1A) +d2: Grp2 Eb,CL (1A) +d3: Grp2 Ev,CL (1A) +d4: AAM Ib (i64) +d5: AAD Ib (i64) +d6: +d7: XLAT/XLATB +d8: ESC +d9: ESC +da: ESC +db: ESC +dc: ESC +dd: ESC +de: ESC +df: ESC +# 0xe0 - 0xef +# Note: "forced64" is Intel CPU behavior: they ignore 0x66 prefix +# in 64-bit mode. AMD CPUs accept 0x66 prefix, it causes RIP truncation +# to 16 bits. In 32-bit mode, 0x66 is accepted by both Intel and AMD. +e0: LOOPNE/LOOPNZ Jb (f64) +e1: LOOPE/LOOPZ Jb (f64) +e2: LOOP Jb (f64) +e3: JrCXZ Jb (f64) +e4: IN AL,Ib +e5: IN eAX,Ib +e6: OUT Ib,AL +e7: OUT Ib,eAX +# With 0x66 prefix in 64-bit mode, for AMD CPUs immediate offset +# in "near" jumps and calls is 16-bit. For CALL, +# push of return address is 16-bit wide, RSP is decremented by 2 +# but is not truncated to 16 bits, unlike RIP. +e8: CALL Jz (f64) +e9: JMP-near Jz (f64) +ea: JMP-far Ap (i64) +eb: JMP-short Jb (f64) +ec: IN AL,DX +ed: IN eAX,DX +ee: OUT DX,AL +ef: OUT DX,eAX +# 0xf0 - 0xff +f0: LOCK (Prefix) +f1: +f2: REPNE (Prefix) | XACQUIRE (Prefix) +f3: REP/REPE (Prefix) | XRELEASE (Prefix) +f4: HLT +f5: CMC +f6: Grp3_1 Eb (1A) +f7: Grp3_2 Ev (1A) +f8: CLC +f9: STC +fa: CLI +fb: STI +fc: CLD +fd: STD +fe: Grp4 (1A) +ff: Grp5 (1A) +EndTable + +Table: 2-byte opcode (0x0f) +Referrer: 2-byte escape +AVXcode: 1 +# 0x0f 0x00-0x0f +00: Grp6 (1A) +01: Grp7 (1A) +02: LAR Gv,Ew +03: LSL Gv,Ew +04: +05: SYSCALL (o64) +06: CLTS +07: SYSRET (o64) +08: INVD +09: WBINVD +0a: +0b: UD2 (1B) +0c: +# AMD's prefetch group. Intel supports prefetchw(/1) only. +0d: GrpP +0e: FEMMS +# 3DNow! uses the last imm byte as opcode extension. +0f: 3DNow! Pq,Qq,Ib +# 0x0f 0x10-0x1f +# NOTE: According to Intel SDM opcode map, vmovups and vmovupd has no operands +# but it actually has operands. And also, vmovss and vmovsd only accept 128bit. +# MOVSS/MOVSD has too many forms(3) on SDM. This map just shows a typical form. +# Many AVX instructions lack v1 superscript, according to Intel AVX-Prgramming +# Reference A.1 +10: vmovups Vps,Wps | vmovupd Vpd,Wpd (66) | vmovss Vx,Hx,Wss (F3),(v1) | vmovsd Vx,Hx,Wsd (F2),(v1) +11: vmovups Wps,Vps | vmovupd Wpd,Vpd (66) | vmovss Wss,Hx,Vss (F3),(v1) | vmovsd Wsd,Hx,Vsd (F2),(v1) +12: vmovlps Vq,Hq,Mq (v1) | vmovhlps Vq,Hq,Uq (v1) | vmovlpd Vq,Hq,Mq (66),(v1) | vmovsldup Vx,Wx (F3) | vmovddup Vx,Wx (F2) +13: vmovlps Mq,Vq (v1) | vmovlpd Mq,Vq (66),(v1) +14: vunpcklps Vx,Hx,Wx | vunpcklpd Vx,Hx,Wx (66) +15: vunpckhps Vx,Hx,Wx | vunpckhpd Vx,Hx,Wx (66) +16: vmovhps Vdq,Hq,Mq (v1) | vmovlhps Vdq,Hq,Uq (v1) | vmovhpd Vdq,Hq,Mq (66),(v1) | vmovshdup Vx,Wx (F3) +17: vmovhps Mq,Vq (v1) | vmovhpd Mq,Vq (66),(v1) +18: Grp16 (1A) +19: +1a: BNDCL Ev,Gv | BNDCU Ev,Gv | BNDMOV Gv,Ev | BNDLDX Gv,Ev,Gv +1b: BNDCN Ev,Gv | BNDMOV Ev,Gv | BNDMK Gv,Ev | BNDSTX Ev,GV,Gv +1c: +1d: +1e: +1f: NOP Ev +# 0x0f 0x20-0x2f +20: MOV Rd,Cd +21: MOV Rd,Dd +22: MOV Cd,Rd +23: MOV Dd,Rd +24: +25: +26: +27: +28: vmovaps Vps,Wps | vmovapd Vpd,Wpd (66) +29: vmovaps Wps,Vps | vmovapd Wpd,Vpd (66) +2a: cvtpi2ps Vps,Qpi | cvtpi2pd Vpd,Qpi (66) | vcvtsi2ss Vss,Hss,Ey (F3),(v1) | vcvtsi2sd Vsd,Hsd,Ey (F2),(v1) +2b: vmovntps Mps,Vps | vmovntpd Mpd,Vpd (66) +2c: cvttps2pi Ppi,Wps | cvttpd2pi Ppi,Wpd (66) | vcvttss2si Gy,Wss (F3),(v1) | vcvttsd2si Gy,Wsd (F2),(v1) +2d: cvtps2pi Ppi,Wps | cvtpd2pi Qpi,Wpd (66) | vcvtss2si Gy,Wss (F3),(v1) | vcvtsd2si Gy,Wsd (F2),(v1) +2e: vucomiss Vss,Wss (v1) | vucomisd Vsd,Wsd (66),(v1) +2f: vcomiss Vss,Wss (v1) | vcomisd Vsd,Wsd (66),(v1) +# 0x0f 0x30-0x3f +30: WRMSR +31: RDTSC +32: RDMSR +33: RDPMC +34: SYSENTER +35: SYSEXIT +36: +37: GETSEC +38: escape # 3-byte escape 1 +39: +3a: escape # 3-byte escape 2 +3b: +3c: +3d: +3e: +3f: +# 0x0f 0x40-0x4f +40: CMOVO Gv,Ev +41: CMOVNO Gv,Ev +42: CMOVB/C/NAE Gv,Ev +43: CMOVAE/NB/NC Gv,Ev +44: CMOVE/Z Gv,Ev +45: CMOVNE/NZ Gv,Ev +46: CMOVBE/NA Gv,Ev +47: CMOVA/NBE Gv,Ev +48: CMOVS Gv,Ev +49: CMOVNS Gv,Ev +4a: CMOVP/PE Gv,Ev +4b: CMOVNP/PO Gv,Ev +4c: CMOVL/NGE Gv,Ev +4d: CMOVNL/GE Gv,Ev +4e: CMOVLE/NG Gv,Ev +4f: CMOVNLE/G Gv,Ev +# 0x0f 0x50-0x5f +50: vmovmskps Gy,Ups | vmovmskpd Gy,Upd (66) +51: vsqrtps Vps,Wps | vsqrtpd Vpd,Wpd (66) | vsqrtss Vss,Hss,Wss (F3),(v1) | vsqrtsd Vsd,Hsd,Wsd (F2),(v1) +52: vrsqrtps Vps,Wps | vrsqrtss Vss,Hss,Wss (F3),(v1) +53: vrcpps Vps,Wps | vrcpss Vss,Hss,Wss (F3),(v1) +54: vandps Vps,Hps,Wps | vandpd Vpd,Hpd,Wpd (66) +55: vandnps Vps,Hps,Wps | vandnpd Vpd,Hpd,Wpd (66) +56: vorps Vps,Hps,Wps | vorpd Vpd,Hpd,Wpd (66) +57: vxorps Vps,Hps,Wps | vxorpd Vpd,Hpd,Wpd (66) +58: vaddps Vps,Hps,Wps | vaddpd Vpd,Hpd,Wpd (66) | vaddss Vss,Hss,Wss (F3),(v1) | vaddsd Vsd,Hsd,Wsd (F2),(v1) +59: vmulps Vps,Hps,Wps | vmulpd Vpd,Hpd,Wpd (66) | vmulss Vss,Hss,Wss (F3),(v1) | vmulsd Vsd,Hsd,Wsd (F2),(v1) +5a: vcvtps2pd Vpd,Wps | vcvtpd2ps Vps,Wpd (66) | vcvtss2sd Vsd,Hx,Wss (F3),(v1) | vcvtsd2ss Vss,Hx,Wsd (F2),(v1) +5b: vcvtdq2ps Vps,Wdq | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3) +5c: vsubps Vps,Hps,Wps | vsubpd Vpd,Hpd,Wpd (66) | vsubss Vss,Hss,Wss (F3),(v1) | vsubsd Vsd,Hsd,Wsd (F2),(v1) +5d: vminps Vps,Hps,Wps | vminpd Vpd,Hpd,Wpd (66) | vminss Vss,Hss,Wss (F3),(v1) | vminsd Vsd,Hsd,Wsd (F2),(v1) +5e: vdivps Vps,Hps,Wps | vdivpd Vpd,Hpd,Wpd (66) | vdivss Vss,Hss,Wss (F3),(v1) | vdivsd Vsd,Hsd,Wsd (F2),(v1) +5f: vmaxps Vps,Hps,Wps | vmaxpd Vpd,Hpd,Wpd (66) | vmaxss Vss,Hss,Wss (F3),(v1) | vmaxsd Vsd,Hsd,Wsd (F2),(v1) +# 0x0f 0x60-0x6f +60: punpcklbw Pq,Qd | vpunpcklbw Vx,Hx,Wx (66),(v1) +61: punpcklwd Pq,Qd | vpunpcklwd Vx,Hx,Wx (66),(v1) +62: punpckldq Pq,Qd | vpunpckldq Vx,Hx,Wx (66),(v1) +63: packsswb Pq,Qq | vpacksswb Vx,Hx,Wx (66),(v1) +64: pcmpgtb Pq,Qq | vpcmpgtb Vx,Hx,Wx (66),(v1) +65: pcmpgtw Pq,Qq | vpcmpgtw Vx,Hx,Wx (66),(v1) +66: pcmpgtd Pq,Qq | vpcmpgtd Vx,Hx,Wx (66),(v1) +67: packuswb Pq,Qq | vpackuswb Vx,Hx,Wx (66),(v1) +68: punpckhbw Pq,Qd | vpunpckhbw Vx,Hx,Wx (66),(v1) +69: punpckhwd Pq,Qd | vpunpckhwd Vx,Hx,Wx (66),(v1) +6a: punpckhdq Pq,Qd | vpunpckhdq Vx,Hx,Wx (66),(v1) +6b: packssdw Pq,Qd | vpackssdw Vx,Hx,Wx (66),(v1) +6c: vpunpcklqdq Vx,Hx,Wx (66),(v1) +6d: vpunpckhqdq Vx,Hx,Wx (66),(v1) +6e: movd/q Pd,Ey | vmovd/q Vy,Ey (66),(v1) +6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqu Vx,Wx (F3) +# 0x0f 0x70-0x7f +70: pshufw Pq,Qq,Ib | vpshufd Vx,Wx,Ib (66),(v1) | vpshufhw Vx,Wx,Ib (F3),(v1) | vpshuflw Vx,Wx,Ib (F2),(v1) +71: Grp12 (1A) +72: Grp13 (1A) +73: Grp14 (1A) +74: pcmpeqb Pq,Qq | vpcmpeqb Vx,Hx,Wx (66),(v1) +75: pcmpeqw Pq,Qq | vpcmpeqw Vx,Hx,Wx (66),(v1) +76: pcmpeqd Pq,Qq | vpcmpeqd Vx,Hx,Wx (66),(v1) +# Note: Remove (v), because vzeroall and vzeroupper becomes emms without VEX. +77: emms | vzeroupper | vzeroall +78: VMREAD Ey,Gy +79: VMWRITE Gy,Ey +7a: +7b: +7c: vhaddpd Vpd,Hpd,Wpd (66) | vhaddps Vps,Hps,Wps (F2) +7d: vhsubpd Vpd,Hpd,Wpd (66) | vhsubps Vps,Hps,Wps (F2) +7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1) +7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3) +# 0x0f 0x80-0x8f +# Note: "forced64" is Intel CPU behavior (see comment about CALL insn). +80: JO Jz (f64) +81: JNO Jz (f64) +82: JB/JC/JNAE Jz (f64) +83: JAE/JNB/JNC Jz (f64) +84: JE/JZ Jz (f64) +85: JNE/JNZ Jz (f64) +86: JBE/JNA Jz (f64) +87: JA/JNBE Jz (f64) +88: JS Jz (f64) +89: JNS Jz (f64) +8a: JP/JPE Jz (f64) +8b: JNP/JPO Jz (f64) +8c: JL/JNGE Jz (f64) +8d: JNL/JGE Jz (f64) +8e: JLE/JNG Jz (f64) +8f: JNLE/JG Jz (f64) +# 0x0f 0x90-0x9f +90: SETO Eb +91: SETNO Eb +92: SETB/C/NAE Eb +93: SETAE/NB/NC Eb +94: SETE/Z Eb +95: SETNE/NZ Eb +96: SETBE/NA Eb +97: SETA/NBE Eb +98: SETS Eb +99: SETNS Eb +9a: SETP/PE Eb +9b: SETNP/PO Eb +9c: SETL/NGE Eb +9d: SETNL/GE Eb +9e: SETLE/NG Eb +9f: SETNLE/G Eb +# 0x0f 0xa0-0xaf +a0: PUSH FS (d64) +a1: POP FS (d64) +a2: CPUID +a3: BT Ev,Gv +a4: SHLD Ev,Gv,Ib +a5: SHLD Ev,Gv,CL +a6: GrpPDLK +a7: GrpRNG +a8: PUSH GS (d64) +a9: POP GS (d64) +aa: RSM +ab: BTS Ev,Gv +ac: SHRD Ev,Gv,Ib +ad: SHRD Ev,Gv,CL +ae: Grp15 (1A),(1C) +af: IMUL Gv,Ev +# 0x0f 0xb0-0xbf +b0: CMPXCHG Eb,Gb +b1: CMPXCHG Ev,Gv +b2: LSS Gv,Mp +b3: BTR Ev,Gv +b4: LFS Gv,Mp +b5: LGS Gv,Mp +b6: MOVZX Gv,Eb +b7: MOVZX Gv,Ew +b8: JMPE (!F3) | POPCNT Gv,Ev (F3) +b9: Grp10 (1A) +ba: Grp8 Ev,Ib (1A) +bb: BTC Ev,Gv +bc: BSF Gv,Ev (!F3) | TZCNT Gv,Ev (F3) +bd: BSR Gv,Ev (!F3) | LZCNT Gv,Ev (F3) +be: MOVSX Gv,Eb +bf: MOVSX Gv,Ew +# 0x0f 0xc0-0xcf +c0: XADD Eb,Gb +c1: XADD Ev,Gv +c2: vcmpps Vps,Hps,Wps,Ib | vcmppd Vpd,Hpd,Wpd,Ib (66) | vcmpss Vss,Hss,Wss,Ib (F3),(v1) | vcmpsd Vsd,Hsd,Wsd,Ib (F2),(v1) +c3: movnti My,Gy +c4: pinsrw Pq,Ry/Mw,Ib | vpinsrw Vdq,Hdq,Ry/Mw,Ib (66),(v1) +c5: pextrw Gd,Nq,Ib | vpextrw Gd,Udq,Ib (66),(v1) +c6: vshufps Vps,Hps,Wps,Ib | vshufpd Vpd,Hpd,Wpd,Ib (66) +c7: Grp9 (1A) +c8: BSWAP RAX/EAX/R8/R8D +c9: BSWAP RCX/ECX/R9/R9D +ca: BSWAP RDX/EDX/R10/R10D +cb: BSWAP RBX/EBX/R11/R11D +cc: BSWAP RSP/ESP/R12/R12D +cd: BSWAP RBP/EBP/R13/R13D +ce: BSWAP RSI/ESI/R14/R14D +cf: BSWAP RDI/EDI/R15/R15D +# 0x0f 0xd0-0xdf +d0: vaddsubpd Vpd,Hpd,Wpd (66) | vaddsubps Vps,Hps,Wps (F2) +d1: psrlw Pq,Qq | vpsrlw Vx,Hx,Wx (66),(v1) +d2: psrld Pq,Qq | vpsrld Vx,Hx,Wx (66),(v1) +d3: psrlq Pq,Qq | vpsrlq Vx,Hx,Wx (66),(v1) +d4: paddq Pq,Qq | vpaddq Vx,Hx,Wx (66),(v1) +d5: pmullw Pq,Qq | vpmullw Vx,Hx,Wx (66),(v1) +d6: vmovq Wq,Vq (66),(v1) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2) +d7: pmovmskb Gd,Nq | vpmovmskb Gd,Ux (66),(v1) +d8: psubusb Pq,Qq | vpsubusb Vx,Hx,Wx (66),(v1) +d9: psubusw Pq,Qq | vpsubusw Vx,Hx,Wx (66),(v1) +da: pminub Pq,Qq | vpminub Vx,Hx,Wx (66),(v1) +db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1) +dc: paddusb Pq,Qq | vpaddusb Vx,Hx,Wx (66),(v1) +dd: paddusw Pq,Qq | vpaddusw Vx,Hx,Wx (66),(v1) +de: pmaxub Pq,Qq | vpmaxub Vx,Hx,Wx (66),(v1) +df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1) +# 0x0f 0xe0-0xef +e0: pavgb Pq,Qq | vpavgb Vx,Hx,Wx (66),(v1) +e1: psraw Pq,Qq | vpsraw Vx,Hx,Wx (66),(v1) +e2: psrad Pq,Qq | vpsrad Vx,Hx,Wx (66),(v1) +e3: pavgw Pq,Qq | vpavgw Vx,Hx,Wx (66),(v1) +e4: pmulhuw Pq,Qq | vpmulhuw Vx,Hx,Wx (66),(v1) +e5: pmulhw Pq,Qq | vpmulhw Vx,Hx,Wx (66),(v1) +e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtpd2dq Vx,Wpd (F2) +e7: movntq Mq,Pq | vmovntdq Mx,Vx (66) +e8: psubsb Pq,Qq | vpsubsb Vx,Hx,Wx (66),(v1) +e9: psubsw Pq,Qq | vpsubsw Vx,Hx,Wx (66),(v1) +ea: pminsw Pq,Qq | vpminsw Vx,Hx,Wx (66),(v1) +eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1) +ec: paddsb Pq,Qq | vpaddsb Vx,Hx,Wx (66),(v1) +ed: paddsw Pq,Qq | vpaddsw Vx,Hx,Wx (66),(v1) +ee: pmaxsw Pq,Qq | vpmaxsw Vx,Hx,Wx (66),(v1) +ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1) +# 0x0f 0xf0-0xff +f0: vlddqu Vx,Mx (F2) +f1: psllw Pq,Qq | vpsllw Vx,Hx,Wx (66),(v1) +f2: pslld Pq,Qq | vpslld Vx,Hx,Wx (66),(v1) +f3: psllq Pq,Qq | vpsllq Vx,Hx,Wx (66),(v1) +f4: pmuludq Pq,Qq | vpmuludq Vx,Hx,Wx (66),(v1) +f5: pmaddwd Pq,Qq | vpmaddwd Vx,Hx,Wx (66),(v1) +f6: psadbw Pq,Qq | vpsadbw Vx,Hx,Wx (66),(v1) +f7: maskmovq Pq,Nq | vmaskmovdqu Vx,Ux (66),(v1) +f8: psubb Pq,Qq | vpsubb Vx,Hx,Wx (66),(v1) +f9: psubw Pq,Qq | vpsubw Vx,Hx,Wx (66),(v1) +fa: psubd Pq,Qq | vpsubd Vx,Hx,Wx (66),(v1) +fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1) +fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1) +fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1) +fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1) +ff: +EndTable + +Table: 3-byte opcode 1 (0x0f 0x38) +Referrer: 3-byte escape 1 +AVXcode: 2 +# 0x0f 0x38 0x00-0x0f +00: pshufb Pq,Qq | vpshufb Vx,Hx,Wx (66),(v1) +01: phaddw Pq,Qq | vphaddw Vx,Hx,Wx (66),(v1) +02: phaddd Pq,Qq | vphaddd Vx,Hx,Wx (66),(v1) +03: phaddsw Pq,Qq | vphaddsw Vx,Hx,Wx (66),(v1) +04: pmaddubsw Pq,Qq | vpmaddubsw Vx,Hx,Wx (66),(v1) +05: phsubw Pq,Qq | vphsubw Vx,Hx,Wx (66),(v1) +06: phsubd Pq,Qq | vphsubd Vx,Hx,Wx (66),(v1) +07: phsubsw Pq,Qq | vphsubsw Vx,Hx,Wx (66),(v1) +08: psignb Pq,Qq | vpsignb Vx,Hx,Wx (66),(v1) +09: psignw Pq,Qq | vpsignw Vx,Hx,Wx (66),(v1) +0a: psignd Pq,Qq | vpsignd Vx,Hx,Wx (66),(v1) +0b: pmulhrsw Pq,Qq | vpmulhrsw Vx,Hx,Wx (66),(v1) +0c: vpermilps Vx,Hx,Wx (66),(v) +0d: vpermilpd Vx,Hx,Wx (66),(v) +0e: vtestps Vx,Wx (66),(v) +0f: vtestpd Vx,Wx (66),(v) +# 0x0f 0x38 0x10-0x1f +10: pblendvb Vdq,Wdq (66) +11: +12: +13: vcvtph2ps Vx,Wx,Ib (66),(v) +14: blendvps Vdq,Wdq (66) +15: blendvpd Vdq,Wdq (66) +16: vpermps Vqq,Hqq,Wqq (66),(v) +17: vptest Vx,Wx (66) +18: vbroadcastss Vx,Wd (66),(v) +19: vbroadcastsd Vqq,Wq (66),(v) +1a: vbroadcastf128 Vqq,Mdq (66),(v) +1b: +1c: pabsb Pq,Qq | vpabsb Vx,Wx (66),(v1) +1d: pabsw Pq,Qq | vpabsw Vx,Wx (66),(v1) +1e: pabsd Pq,Qq | vpabsd Vx,Wx (66),(v1) +1f: +# 0x0f 0x38 0x20-0x2f +20: vpmovsxbw Vx,Ux/Mq (66),(v1) +21: vpmovsxbd Vx,Ux/Md (66),(v1) +22: vpmovsxbq Vx,Ux/Mw (66),(v1) +23: vpmovsxwd Vx,Ux/Mq (66),(v1) +24: vpmovsxwq Vx,Ux/Md (66),(v1) +25: vpmovsxdq Vx,Ux/Mq (66),(v1) +26: +27: +28: vpmuldq Vx,Hx,Wx (66),(v1) +29: vpcmpeqq Vx,Hx,Wx (66),(v1) +2a: vmovntdqa Vx,Mx (66),(v1) +2b: vpackusdw Vx,Hx,Wx (66),(v1) +2c: vmaskmovps Vx,Hx,Mx (66),(v) +2d: vmaskmovpd Vx,Hx,Mx (66),(v) +2e: vmaskmovps Mx,Hx,Vx (66),(v) +2f: vmaskmovpd Mx,Hx,Vx (66),(v) +# 0x0f 0x38 0x30-0x3f +30: vpmovzxbw Vx,Ux/Mq (66),(v1) +31: vpmovzxbd Vx,Ux/Md (66),(v1) +32: vpmovzxbq Vx,Ux/Mw (66),(v1) +33: vpmovzxwd Vx,Ux/Mq (66),(v1) +34: vpmovzxwq Vx,Ux/Md (66),(v1) +35: vpmovzxdq Vx,Ux/Mq (66),(v1) +36: vpermd Vqq,Hqq,Wqq (66),(v) +37: vpcmpgtq Vx,Hx,Wx (66),(v1) +38: vpminsb Vx,Hx,Wx (66),(v1) +39: vpminsd Vx,Hx,Wx (66),(v1) +3a: vpminuw Vx,Hx,Wx (66),(v1) +3b: vpminud Vx,Hx,Wx (66),(v1) +3c: vpmaxsb Vx,Hx,Wx (66),(v1) +3d: vpmaxsd Vx,Hx,Wx (66),(v1) +3e: vpmaxuw Vx,Hx,Wx (66),(v1) +3f: vpmaxud Vx,Hx,Wx (66),(v1) +# 0x0f 0x38 0x40-0x8f +40: vpmulld Vx,Hx,Wx (66),(v1) +41: vphminposuw Vdq,Wdq (66),(v1) +42: +43: +44: +45: vpsrlvd/q Vx,Hx,Wx (66),(v) +46: vpsravd Vx,Hx,Wx (66),(v) +47: vpsllvd/q Vx,Hx,Wx (66),(v) +# Skip 0x48-0x57 +58: vpbroadcastd Vx,Wx (66),(v) +59: vpbroadcastq Vx,Wx (66),(v) +5a: vbroadcasti128 Vqq,Mdq (66),(v) +# Skip 0x5b-0x77 +78: vpbroadcastb Vx,Wx (66),(v) +79: vpbroadcastw Vx,Wx (66),(v) +# Skip 0x7a-0x7f +80: INVEPT Gy,Mdq (66) +81: INVPID Gy,Mdq (66) +82: INVPCID Gy,Mdq (66) +8c: vpmaskmovd/q Vx,Hx,Mx (66),(v) +8e: vpmaskmovd/q Mx,Vx,Hx (66),(v) +# 0x0f 0x38 0x90-0xbf (FMA) +90: vgatherdd/q Vx,Hx,Wx (66),(v) +91: vgatherqd/q Vx,Hx,Wx (66),(v) +92: vgatherdps/d Vx,Hx,Wx (66),(v) +93: vgatherqps/d Vx,Hx,Wx (66),(v) +94: +95: +96: vfmaddsub132ps/d Vx,Hx,Wx (66),(v) +97: vfmsubadd132ps/d Vx,Hx,Wx (66),(v) +98: vfmadd132ps/d Vx,Hx,Wx (66),(v) +99: vfmadd132ss/d Vx,Hx,Wx (66),(v),(v1) +9a: vfmsub132ps/d Vx,Hx,Wx (66),(v) +9b: vfmsub132ss/d Vx,Hx,Wx (66),(v),(v1) +9c: vfnmadd132ps/d Vx,Hx,Wx (66),(v) +9d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1) +9e: vfnmsub132ps/d Vx,Hx,Wx (66),(v) +9f: vfnmsub132ss/d Vx,Hx,Wx (66),(v),(v1) +a6: vfmaddsub213ps/d Vx,Hx,Wx (66),(v) +a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v) +a8: vfmadd213ps/d Vx,Hx,Wx (66),(v) +a9: vfmadd213ss/d Vx,Hx,Wx (66),(v),(v1) +aa: vfmsub213ps/d Vx,Hx,Wx (66),(v) +ab: vfmsub213ss/d Vx,Hx,Wx (66),(v),(v1) +ac: vfnmadd213ps/d Vx,Hx,Wx (66),(v) +ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1) +ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v) +af: vfnmsub213ss/d Vx,Hx,Wx (66),(v),(v1) +b6: vfmaddsub231ps/d Vx,Hx,Wx (66),(v) +b7: vfmsubadd231ps/d Vx,Hx,Wx (66),(v) +b8: vfmadd231ps/d Vx,Hx,Wx (66),(v) +b9: vfmadd231ss/d Vx,Hx,Wx (66),(v),(v1) +ba: vfmsub231ps/d Vx,Hx,Wx (66),(v) +bb: vfmsub231ss/d Vx,Hx,Wx (66),(v),(v1) +bc: vfnmadd231ps/d Vx,Hx,Wx (66),(v) +bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1) +be: vfnmsub231ps/d Vx,Hx,Wx (66),(v) +bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1) +# 0x0f 0x38 0xc0-0xff +db: VAESIMC Vdq,Wdq (66),(v1) +dc: VAESENC Vdq,Hdq,Wdq (66),(v1) +dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1) +de: VAESDEC Vdq,Hdq,Wdq (66),(v1) +df: VAESDECLAST Vdq,Hdq,Wdq (66),(v1) +f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2) | CRC32 Gd,Eb (66&F2) +f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2) | CRC32 Gd,Ew (66&F2) +f2: ANDN Gy,By,Ey (v) +f3: Grp17 (1A) +f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v) +f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v) +f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v) +EndTable + +Table: 3-byte opcode 2 (0x0f 0x3a) +Referrer: 3-byte escape 2 +AVXcode: 3 +# 0x0f 0x3a 0x00-0xff +00: vpermq Vqq,Wqq,Ib (66),(v) +01: vpermpd Vqq,Wqq,Ib (66),(v) +02: vpblendd Vx,Hx,Wx,Ib (66),(v) +03: +04: vpermilps Vx,Wx,Ib (66),(v) +05: vpermilpd Vx,Wx,Ib (66),(v) +06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v) +07: +08: vroundps Vx,Wx,Ib (66) +09: vroundpd Vx,Wx,Ib (66) +0a: vroundss Vss,Wss,Ib (66),(v1) +0b: vroundsd Vsd,Wsd,Ib (66),(v1) +0c: vblendps Vx,Hx,Wx,Ib (66) +0d: vblendpd Vx,Hx,Wx,Ib (66) +0e: vpblendw Vx,Hx,Wx,Ib (66),(v1) +0f: palignr Pq,Qq,Ib | vpalignr Vx,Hx,Wx,Ib (66),(v1) +14: vpextrb Rd/Mb,Vdq,Ib (66),(v1) +15: vpextrw Rd/Mw,Vdq,Ib (66),(v1) +16: vpextrd/q Ey,Vdq,Ib (66),(v1) +17: vextractps Ed,Vdq,Ib (66),(v1) +18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v) +19: vextractf128 Wdq,Vqq,Ib (66),(v) +1d: vcvtps2ph Wx,Vx,Ib (66),(v) +20: vpinsrb Vdq,Hdq,Ry/Mb,Ib (66),(v1) +21: vinsertps Vdq,Hdq,Udq/Md,Ib (66),(v1) +22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1) +38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v) +39: vextracti128 Wdq,Vqq,Ib (66),(v) +40: vdpps Vx,Hx,Wx,Ib (66) +41: vdppd Vdq,Hdq,Wdq,Ib (66),(v1) +42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1) +44: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1) +46: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v) +4a: vblendvps Vx,Hx,Wx,Lx (66),(v) +4b: vblendvpd Vx,Hx,Wx,Lx (66),(v) +4c: vpblendvb Vx,Hx,Wx,Lx (66),(v1) +60: vpcmpestrm Vdq,Wdq,Ib (66),(v1) +61: vpcmpestri Vdq,Wdq,Ib (66),(v1) +62: vpcmpistrm Vdq,Wdq,Ib (66),(v1) +63: vpcmpistri Vdq,Wdq,Ib (66),(v1) +df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1) +f0: RORX Gy,Ey,Ib (F2),(v) +EndTable + +GrpTable: Grp1 +0: ADD +1: OR +2: ADC +3: SBB +4: AND +5: SUB +6: XOR +7: CMP +EndTable + +GrpTable: Grp1A +0: POP +EndTable + +GrpTable: Grp2 +0: ROL +1: ROR +2: RCL +3: RCR +4: SHL/SAL +5: SHR +6: +7: SAR +EndTable + +GrpTable: Grp3_1 +0: TEST Eb,Ib +1: +2: NOT Eb +3: NEG Eb +4: MUL AL,Eb +5: IMUL AL,Eb +6: DIV AL,Eb +7: IDIV AL,Eb +EndTable + +GrpTable: Grp3_2 +0: TEST Ev,Iz +1: +2: NOT Ev +3: NEG Ev +4: MUL rAX,Ev +5: IMUL rAX,Ev +6: DIV rAX,Ev +7: IDIV rAX,Ev +EndTable + +GrpTable: Grp4 +0: INC Eb +1: DEC Eb +EndTable + +GrpTable: Grp5 +0: INC Ev +1: DEC Ev +# Note: "forced64" is Intel CPU behavior (see comment about CALL insn). +2: CALLN Ev (f64) +3: CALLF Ep +4: JMPN Ev (f64) +5: JMPF Mp +6: PUSH Ev (d64) +7: +EndTable + +GrpTable: Grp6 +0: SLDT Rv/Mw +1: STR Rv/Mw +2: LLDT Ew +3: LTR Ew +4: VERR Ew +5: VERW Ew +EndTable + +GrpTable: Grp7 +0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) +1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B) +2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) +3: LIDT Ms +4: SMSW Mw/Rv +5: +6: LMSW Ew +7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B) +EndTable + +GrpTable: Grp8 +4: BT +5: BTS +6: BTR +7: BTC +EndTable + +GrpTable: Grp9 +1: CMPXCHG8B/16B Mq/Mdq +6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B) +7: VMPTRST Mq | VMPTRST Mq (F3) | RDSEED Rv (11B) +EndTable + +GrpTable: Grp10 +EndTable + +# Grp11A and Grp11B are expressed as Grp11 in Intel SDM +GrpTable: Grp11A +0: MOV Eb,Ib +7: XABORT Ib (000),(11B) +EndTable + +GrpTable: Grp11B +0: MOV Eb,Iz +7: XBEGIN Jz (000),(11B) +EndTable + +GrpTable: Grp12 +2: psrlw Nq,Ib (11B) | vpsrlw Hx,Ux,Ib (66),(11B),(v1) +4: psraw Nq,Ib (11B) | vpsraw Hx,Ux,Ib (66),(11B),(v1) +6: psllw Nq,Ib (11B) | vpsllw Hx,Ux,Ib (66),(11B),(v1) +EndTable + +GrpTable: Grp13 +2: psrld Nq,Ib (11B) | vpsrld Hx,Ux,Ib (66),(11B),(v1) +4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1) +6: pslld Nq,Ib (11B) | vpslld Hx,Ux,Ib (66),(11B),(v1) +EndTable + +GrpTable: Grp14 +2: psrlq Nq,Ib (11B) | vpsrlq Hx,Ux,Ib (66),(11B),(v1) +3: vpsrldq Hx,Ux,Ib (66),(11B),(v1) +6: psllq Nq,Ib (11B) | vpsllq Hx,Ux,Ib (66),(11B),(v1) +7: vpslldq Hx,Ux,Ib (66),(11B),(v1) +EndTable + +GrpTable: Grp15 +0: fxsave | RDFSBASE Ry (F3),(11B) +1: fxstor | RDGSBASE Ry (F3),(11B) +2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B) +3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B) +4: XSAVE +5: XRSTOR | lfence (11B) +6: XSAVEOPT | mfence (11B) +7: clflush | sfence (11B) +EndTable + +GrpTable: Grp16 +0: prefetch NTA +1: prefetch T0 +2: prefetch T1 +3: prefetch T2 +EndTable + +GrpTable: Grp17 +1: BLSR By,Ey (v) +2: BLSMSK By,Ey (v) +3: BLSI By,Ey (v) +EndTable + +# AMD's Prefetch Group +GrpTable: GrpP +0: PREFETCH +1: PREFETCHW +EndTable + +GrpTable: GrpPDLK +0: MONTMUL +1: XSHA1 +2: XSHA2 +EndTable + +GrpTable: GrpRNG +0: xstore-rng +1: xcrypt-ecb +2: xcrypt-cbc +4: xcrypt-cfb +5: xcrypt-ofb +EndTable diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c new file mode 100644 index 000000000000..2a4a4120473b --- /dev/null +++ b/tools/perf/util/intel-pt.c @@ -0,0 +1,1911 @@ +/* + * intel_pt.c: Intel Processor Trace support + * Copyright (c) 2013-2015, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include <stdio.h> +#include <stdbool.h> +#include <errno.h> +#include <linux/kernel.h> +#include <linux/types.h> + +#include "../perf.h" +#include "session.h" +#include "machine.h" +#include "tool.h" +#include "event.h" +#include "evlist.h" +#include "evsel.h" +#include "map.h" +#include "color.h" +#include "util.h" +#include "thread.h" +#include "thread-stack.h" +#include "symbol.h" +#include "callchain.h" +#include "dso.h" +#include "debug.h" +#include "auxtrace.h" +#include "tsc.h" +#include "intel-pt.h" + +#include "intel-pt-decoder/intel-pt-log.h" +#include "intel-pt-decoder/intel-pt-decoder.h" +#include "intel-pt-decoder/intel-pt-insn-decoder.h" +#include "intel-pt-decoder/intel-pt-pkt-decoder.h" + +#define MAX_TIMESTAMP (~0ULL) + +struct intel_pt { + struct auxtrace auxtrace; + struct auxtrace_queues queues; + struct auxtrace_heap heap; + u32 auxtrace_type; + struct perf_session *session; + struct machine *machine; + struct perf_evsel *switch_evsel; + struct thread *unknown_thread; + bool timeless_decoding; + bool sampling_mode; + bool snapshot_mode; + bool per_cpu_mmaps; + bool have_tsc; + bool data_queued; + bool est_tsc; + bool sync_switch; + int have_sched_switch; + u32 pmu_type; + u64 kernel_start; + u64 switch_ip; + u64 ptss_ip; + + struct perf_tsc_conversion tc; + bool cap_user_time_zero; + + struct itrace_synth_opts synth_opts; + + bool sample_instructions; + u64 instructions_sample_type; + u64 instructions_sample_period; + u64 instructions_id; + + bool sample_branches; + u32 branches_filter; + u64 branches_sample_type; + u64 branches_id; + + bool sample_transactions; + u64 transactions_sample_type; + u64 transactions_id; + + bool synth_needs_swap; + + u64 tsc_bit; + u64 noretcomp_bit; + unsigned max_non_turbo_ratio; +}; + +enum switch_state { + INTEL_PT_SS_NOT_TRACING, + INTEL_PT_SS_UNKNOWN, + INTEL_PT_SS_TRACING, + INTEL_PT_SS_EXPECTING_SWITCH_EVENT, + INTEL_PT_SS_EXPECTING_SWITCH_IP, +}; + +struct intel_pt_queue { + struct intel_pt *pt; + unsigned int queue_nr; + struct auxtrace_buffer *buffer; + void *decoder; + const struct intel_pt_state *state; + struct ip_callchain *chain; + union perf_event *event_buf; + bool on_heap; + bool stop; + bool step_through_buffers; + bool use_buffer_pid_tid; + pid_t pid, tid; + int cpu; + int switch_state; + pid_t next_tid; + struct thread *thread; + bool exclude_kernel; + bool have_sample; + u64 time; + u64 timestamp; + u32 flags; + u16 insn_len; +}; + +static void intel_pt_dump(struct intel_pt *pt __maybe_unused, + unsigned char *buf, size_t len) +{ + struct intel_pt_pkt packet; + size_t pos = 0; + int ret, pkt_len, i; + char desc[INTEL_PT_PKT_DESC_MAX]; + const char *color = PERF_COLOR_BLUE; + + color_fprintf(stdout, color, + ". ... Intel Processor Trace data: size %zu bytes\n", + len); + + while (len) { + ret = intel_pt_get_packet(buf, len, &packet); + if (ret > 0) + pkt_len = ret; + else + pkt_len = 1; + printf("."); + color_fprintf(stdout, color, " %08x: ", pos); + for (i = 0; i < pkt_len; i++) + color_fprintf(stdout, color, " %02x", buf[i]); + for (; i < 16; i++) + color_fprintf(stdout, color, " "); + if (ret > 0) { + ret = intel_pt_pkt_desc(&packet, desc, + INTEL_PT_PKT_DESC_MAX); + if (ret > 0) + color_fprintf(stdout, color, " %s\n", desc); + } else { + color_fprintf(stdout, color, " Bad packet!\n"); + } + pos += pkt_len; + buf += pkt_len; + len -= pkt_len; + } +} + +static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf, + size_t len) +{ + printf(".\n"); + intel_pt_dump(pt, buf, len); +} + +static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a, + struct auxtrace_buffer *b) +{ + void *start; + + start = intel_pt_find_overlap(a->data, a->size, b->data, b->size, + pt->have_tsc); + if (!start) + return -EINVAL; + b->use_size = b->data + b->size - start; + b->use_data = start; + return 0; +} + +static void intel_pt_use_buffer_pid_tid(struct intel_pt_queue *ptq, + struct auxtrace_queue *queue, + struct auxtrace_buffer *buffer) +{ + if (queue->cpu == -1 && buffer->cpu != -1) + ptq->cpu = buffer->cpu; + + ptq->pid = buffer->pid; + ptq->tid = buffer->tid; + + intel_pt_log("queue %u cpu %d pid %d tid %d\n", + ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid); + + thread__zput(ptq->thread); + + if (ptq->tid != -1) { + if (ptq->pid != -1) + ptq->thread = machine__findnew_thread(ptq->pt->machine, + ptq->pid, + ptq->tid); + else + ptq->thread = machine__find_thread(ptq->pt->machine, -1, + ptq->tid); + } +} + +/* This function assumes data is processed sequentially only */ +static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data) +{ + struct intel_pt_queue *ptq = data; + struct auxtrace_buffer *buffer = ptq->buffer, *old_buffer = buffer; + struct auxtrace_queue *queue; + + if (ptq->stop) { + b->len = 0; + return 0; + } + + queue = &ptq->pt->queues.queue_array[ptq->queue_nr]; + + buffer = auxtrace_buffer__next(queue, buffer); + if (!buffer) { + if (old_buffer) + auxtrace_buffer__drop_data(old_buffer); + b->len = 0; + return 0; + } + + ptq->buffer = buffer; + + if (!buffer->data) { + int fd = perf_data_file__fd(ptq->pt->session->file); + + buffer->data = auxtrace_buffer__get_data(buffer, fd); + if (!buffer->data) + return -ENOMEM; + } + + if (ptq->pt->snapshot_mode && !buffer->consecutive && old_buffer && + intel_pt_do_fix_overlap(ptq->pt, old_buffer, buffer)) + return -ENOMEM; + + if (old_buffer) + auxtrace_buffer__drop_data(old_buffer); + + if (buffer->use_data) { + b->len = buffer->use_size; + b->buf = buffer->use_data; + } else { + b->len = buffer->size; + b->buf = buffer->data; + } + b->ref_timestamp = buffer->reference; + + if (!old_buffer || ptq->pt->sampling_mode || (ptq->pt->snapshot_mode && + !buffer->consecutive)) { + b->consecutive = false; + b->trace_nr = buffer->buffer_nr + 1; + } else { + b->consecutive = true; + } + + if (ptq->use_buffer_pid_tid && (ptq->pid != buffer->pid || + ptq->tid != buffer->tid)) + intel_pt_use_buffer_pid_tid(ptq, queue, buffer); + + if (ptq->step_through_buffers) + ptq->stop = true; + + if (!b->len) + return intel_pt_get_trace(b, data); + + return 0; +} + +struct intel_pt_cache_entry { + struct auxtrace_cache_entry entry; + u64 insn_cnt; + u64 byte_cnt; + enum intel_pt_insn_op op; + enum intel_pt_insn_branch branch; + int length; + int32_t rel; +}; + +static int intel_pt_config_div(const char *var, const char *value, void *data) +{ + int *d = data; + long val; + + if (!strcmp(var, "intel-pt.cache-divisor")) { + val = strtol(value, NULL, 0); + if (val > 0 && val <= INT_MAX) + *d = val; + } + + return 0; +} + +static int intel_pt_cache_divisor(void) +{ + static int d; + + if (d) + return d; + + perf_config(intel_pt_config_div, &d); + + if (!d) + d = 64; + + return d; +} + +static unsigned int intel_pt_cache_size(struct dso *dso, + struct machine *machine) +{ + off_t size; + + size = dso__data_size(dso, machine); + size /= intel_pt_cache_divisor(); + if (size < 1000) + return 10; + if (size > (1 << 21)) + return 21; + return 32 - __builtin_clz(size); +} + +static struct auxtrace_cache *intel_pt_cache(struct dso *dso, + struct machine *machine) +{ + struct auxtrace_cache *c; + unsigned int bits; + + if (dso->auxtrace_cache) + return dso->auxtrace_cache; + + bits = intel_pt_cache_size(dso, machine); + + /* Ignoring cache creation failure */ + c = auxtrace_cache__new(bits, sizeof(struct intel_pt_cache_entry), 200); + + dso->auxtrace_cache = c; + + return c; +} + +static int intel_pt_cache_add(struct dso *dso, struct machine *machine, + u64 offset, u64 insn_cnt, u64 byte_cnt, + struct intel_pt_insn *intel_pt_insn) +{ + struct auxtrace_cache *c = intel_pt_cache(dso, machine); + struct intel_pt_cache_entry *e; + int err; + + if (!c) + return -ENOMEM; + + e = auxtrace_cache__alloc_entry(c); + if (!e) + return -ENOMEM; + + e->insn_cnt = insn_cnt; + e->byte_cnt = byte_cnt; + e->op = intel_pt_insn->op; + e->branch = intel_pt_insn->branch; + e->length = intel_pt_insn->length; + e->rel = intel_pt_insn->rel; + + err = auxtrace_cache__add(c, offset, &e->entry); + if (err) + auxtrace_cache__free_entry(c, e); + + return err; +} + +static struct intel_pt_cache_entry * +intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset) +{ + struct auxtrace_cache *c = intel_pt_cache(dso, machine); + + if (!c) + return NULL; + + return auxtrace_cache__lookup(dso->auxtrace_cache, offset); +} + +static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, + uint64_t *insn_cnt_ptr, uint64_t *ip, + uint64_t to_ip, uint64_t max_insn_cnt, + void *data) +{ + struct intel_pt_queue *ptq = data; + struct machine *machine = ptq->pt->machine; + struct thread *thread; + struct addr_location al; + unsigned char buf[1024]; + size_t bufsz; + ssize_t len; + int x86_64; + u8 cpumode; + u64 offset, start_offset, start_ip; + u64 insn_cnt = 0; + bool one_map = true; + + if (to_ip && *ip == to_ip) + goto out_no_cache; + + bufsz = intel_pt_insn_max_size(); + + if (*ip >= ptq->pt->kernel_start) + cpumode = PERF_RECORD_MISC_KERNEL; + else + cpumode = PERF_RECORD_MISC_USER; + + thread = ptq->thread; + if (!thread) { + if (cpumode != PERF_RECORD_MISC_KERNEL) + return -EINVAL; + thread = ptq->pt->unknown_thread; + } + + while (1) { + thread__find_addr_map(thread, cpumode, MAP__FUNCTION, *ip, &al); + if (!al.map || !al.map->dso) + return -EINVAL; + + if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR && + dso__data_status_seen(al.map->dso, + DSO_DATA_STATUS_SEEN_ITRACE)) + return -ENOENT; + + offset = al.map->map_ip(al.map, *ip); + + if (!to_ip && one_map) { + struct intel_pt_cache_entry *e; + + e = intel_pt_cache_lookup(al.map->dso, machine, offset); + if (e && + (!max_insn_cnt || e->insn_cnt <= max_insn_cnt)) { + *insn_cnt_ptr = e->insn_cnt; + *ip += e->byte_cnt; + intel_pt_insn->op = e->op; + intel_pt_insn->branch = e->branch; + intel_pt_insn->length = e->length; + intel_pt_insn->rel = e->rel; + intel_pt_log_insn_no_data(intel_pt_insn, *ip); + return 0; + } + } + + start_offset = offset; + start_ip = *ip; + + /* Load maps to ensure dso->is_64_bit has been updated */ + map__load(al.map, machine->symbol_filter); + + x86_64 = al.map->dso->is_64_bit; + + while (1) { + len = dso__data_read_offset(al.map->dso, machine, + offset, buf, bufsz); + if (len <= 0) + return -EINVAL; + + if (intel_pt_get_insn(buf, len, x86_64, intel_pt_insn)) + return -EINVAL; + + intel_pt_log_insn(intel_pt_insn, *ip); + + insn_cnt += 1; + + if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH) + goto out; + + if (max_insn_cnt && insn_cnt >= max_insn_cnt) + goto out_no_cache; + + *ip += intel_pt_insn->length; + + if (to_ip && *ip == to_ip) + goto out_no_cache; + + if (*ip >= al.map->end) + break; + + offset += intel_pt_insn->length; + } + one_map = false; + } +out: + *insn_cnt_ptr = insn_cnt; + + if (!one_map) + goto out_no_cache; + + /* + * Didn't lookup in the 'to_ip' case, so do it now to prevent duplicate + * entries. + */ + if (to_ip) { + struct intel_pt_cache_entry *e; + + e = intel_pt_cache_lookup(al.map->dso, machine, start_offset); + if (e) + return 0; + } + + /* Ignore cache errors */ + intel_pt_cache_add(al.map->dso, machine, start_offset, insn_cnt, + *ip - start_ip, intel_pt_insn); + + return 0; + +out_no_cache: + *insn_cnt_ptr = insn_cnt; + return 0; +} + +static bool intel_pt_get_config(struct intel_pt *pt, + struct perf_event_attr *attr, u64 *config) +{ + if (attr->type == pt->pmu_type) { + if (config) + *config = attr->config; + return true; + } + + return false; +} + +static bool intel_pt_exclude_kernel(struct intel_pt *pt) +{ + struct perf_evsel *evsel; + + evlist__for_each(pt->session->evlist, evsel) { + if (intel_pt_get_config(pt, &evsel->attr, NULL) && + !evsel->attr.exclude_kernel) + return false; + } + return true; +} + +static bool intel_pt_return_compression(struct intel_pt *pt) +{ + struct perf_evsel *evsel; + u64 config; + + if (!pt->noretcomp_bit) + return true; + + evlist__for_each(pt->session->evlist, evsel) { + if (intel_pt_get_config(pt, &evsel->attr, &config) && + (config & pt->noretcomp_bit)) + return false; + } + return true; +} + +static bool intel_pt_timeless_decoding(struct intel_pt *pt) +{ + struct perf_evsel *evsel; + bool timeless_decoding = true; + u64 config; + + if (!pt->tsc_bit || !pt->cap_user_time_zero) + return true; + + evlist__for_each(pt->session->evlist, evsel) { + if (!(evsel->attr.sample_type & PERF_SAMPLE_TIME)) + return true; + if (intel_pt_get_config(pt, &evsel->attr, &config)) { + if (config & pt->tsc_bit) + timeless_decoding = false; + else + return true; + } + } + return timeless_decoding; +} + +static bool intel_pt_tracing_kernel(struct intel_pt *pt) +{ + struct perf_evsel *evsel; + + evlist__for_each(pt->session->evlist, evsel) { + if (intel_pt_get_config(pt, &evsel->attr, NULL) && + !evsel->attr.exclude_kernel) + return true; + } + return false; +} + +static bool intel_pt_have_tsc(struct intel_pt *pt) +{ + struct perf_evsel *evsel; + bool have_tsc = false; + u64 config; + + if (!pt->tsc_bit) + return false; + + evlist__for_each(pt->session->evlist, evsel) { + if (intel_pt_get_config(pt, &evsel->attr, &config)) { + if (config & pt->tsc_bit) + have_tsc = true; + else + return false; + } + } + return have_tsc; +} + +static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns) +{ + u64 quot, rem; + + quot = ns / pt->tc.time_mult; + rem = ns % pt->tc.time_mult; + return (quot << pt->tc.time_shift) + (rem << pt->tc.time_shift) / + pt->tc.time_mult; +} + +static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, + unsigned int queue_nr) +{ + struct intel_pt_params params = { .get_trace = 0, }; + struct intel_pt_queue *ptq; + + ptq = zalloc(sizeof(struct intel_pt_queue)); + if (!ptq) + return NULL; + + if (pt->synth_opts.callchain) { + size_t sz = sizeof(struct ip_callchain); + + sz += pt->synth_opts.callchain_sz * sizeof(u64); + ptq->chain = zalloc(sz); + if (!ptq->chain) + goto out_free; + } + + ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); + if (!ptq->event_buf) + goto out_free; + + ptq->pt = pt; + ptq->queue_nr = queue_nr; + ptq->exclude_kernel = intel_pt_exclude_kernel(pt); + ptq->pid = -1; + ptq->tid = -1; + ptq->cpu = -1; + ptq->next_tid = -1; + + params.get_trace = intel_pt_get_trace; + params.walk_insn = intel_pt_walk_next_insn; + params.data = ptq; + params.return_compression = intel_pt_return_compression(pt); + params.max_non_turbo_ratio = pt->max_non_turbo_ratio; + + if (pt->synth_opts.instructions) { + if (pt->synth_opts.period) { + switch (pt->synth_opts.period_type) { + case PERF_ITRACE_PERIOD_INSTRUCTIONS: + params.period_type = + INTEL_PT_PERIOD_INSTRUCTIONS; + params.period = pt->synth_opts.period; + break; + case PERF_ITRACE_PERIOD_TICKS: + params.period_type = INTEL_PT_PERIOD_TICKS; + params.period = pt->synth_opts.period; + break; + case PERF_ITRACE_PERIOD_NANOSECS: + params.period_type = INTEL_PT_PERIOD_TICKS; + params.period = intel_pt_ns_to_ticks(pt, + pt->synth_opts.period); + break; + default: + break; + } + } + + if (!params.period) { + params.period_type = INTEL_PT_PERIOD_INSTRUCTIONS; + params.period = 1000; + } + } + + ptq->decoder = intel_pt_decoder_new(¶ms); + if (!ptq->decoder) + goto out_free; + + return ptq; + +out_free: + zfree(&ptq->event_buf); + zfree(&ptq->chain); + free(ptq); + return NULL; +} + +static void intel_pt_free_queue(void *priv) +{ + struct intel_pt_queue *ptq = priv; + + if (!ptq) + return; + thread__zput(ptq->thread); + intel_pt_decoder_free(ptq->decoder); + zfree(&ptq->event_buf); + zfree(&ptq->chain); + free(ptq); +} + +static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt, + struct auxtrace_queue *queue) +{ + struct intel_pt_queue *ptq = queue->priv; + + if (queue->tid == -1 || pt->have_sched_switch) { + ptq->tid = machine__get_current_tid(pt->machine, ptq->cpu); + thread__zput(ptq->thread); + } + + if (!ptq->thread && ptq->tid != -1) + ptq->thread = machine__find_thread(pt->machine, -1, ptq->tid); + + if (ptq->thread) { + ptq->pid = ptq->thread->pid_; + if (queue->cpu == -1) + ptq->cpu = ptq->thread->cpu; + } +} + +static void intel_pt_sample_flags(struct intel_pt_queue *ptq) +{ + if (ptq->state->flags & INTEL_PT_ABORT_TX) { + ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT; + } else if (ptq->state->flags & INTEL_PT_ASYNC) { + if (ptq->state->to_ip) + ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | + PERF_IP_FLAG_ASYNC | + PERF_IP_FLAG_INTERRUPT; + else + ptq->flags = PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_TRACE_END; + ptq->insn_len = 0; + } else { + if (ptq->state->from_ip) + ptq->flags = intel_pt_insn_type(ptq->state->insn_op); + else + ptq->flags = PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_TRACE_BEGIN; + if (ptq->state->flags & INTEL_PT_IN_TX) + ptq->flags |= PERF_IP_FLAG_IN_TX; + ptq->insn_len = ptq->state->insn_len; + } +} + +static int intel_pt_setup_queue(struct intel_pt *pt, + struct auxtrace_queue *queue, + unsigned int queue_nr) +{ + struct intel_pt_queue *ptq = queue->priv; + + if (list_empty(&queue->head)) + return 0; + + if (!ptq) { + ptq = intel_pt_alloc_queue(pt, queue_nr); + if (!ptq) + return -ENOMEM; + queue->priv = ptq; + + if (queue->cpu != -1) + ptq->cpu = queue->cpu; + ptq->tid = queue->tid; + + if (pt->sampling_mode) { + if (pt->timeless_decoding) + ptq->step_through_buffers = true; + if (pt->timeless_decoding || !pt->have_sched_switch) + ptq->use_buffer_pid_tid = true; + } + } + + if (!ptq->on_heap && + (!pt->sync_switch || + ptq->switch_state != INTEL_PT_SS_EXPECTING_SWITCH_EVENT)) { + const struct intel_pt_state *state; + int ret; + + if (pt->timeless_decoding) + return 0; + + intel_pt_log("queue %u getting timestamp\n", queue_nr); + intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n", + queue_nr, ptq->cpu, ptq->pid, ptq->tid); + while (1) { + state = intel_pt_decode(ptq->decoder); + if (state->err) { + if (state->err == INTEL_PT_ERR_NODATA) { + intel_pt_log("queue %u has no timestamp\n", + queue_nr); + return 0; + } + continue; + } + if (state->timestamp) + break; + } + + ptq->timestamp = state->timestamp; + intel_pt_log("queue %u timestamp 0x%" PRIx64 "\n", + queue_nr, ptq->timestamp); + ptq->state = state; + ptq->have_sample = true; + intel_pt_sample_flags(ptq); + ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp); + if (ret) + return ret; + ptq->on_heap = true; + } + + return 0; +} + +static int intel_pt_setup_queues(struct intel_pt *pt) +{ + unsigned int i; + int ret; + + for (i = 0; i < pt->queues.nr_queues; i++) { + ret = intel_pt_setup_queue(pt, &pt->queues.queue_array[i], i); + if (ret) + return ret; + } + return 0; +} + +static int intel_pt_inject_event(union perf_event *event, + struct perf_sample *sample, u64 type, + bool swapped) +{ + event->header.size = perf_event__sample_event_size(sample, type, 0); + return perf_event__synthesize_sample(event, type, 0, sample, swapped); +} + +static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) +{ + int ret; + struct intel_pt *pt = ptq->pt; + union perf_event *event = ptq->event_buf; + struct perf_sample sample = { .ip = 0, }; + + event->sample.header.type = PERF_RECORD_SAMPLE; + event->sample.header.misc = PERF_RECORD_MISC_USER; + event->sample.header.size = sizeof(struct perf_event_header); + + if (!pt->timeless_decoding) + sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc); + + sample.ip = ptq->state->from_ip; + sample.pid = ptq->pid; + sample.tid = ptq->tid; + sample.addr = ptq->state->to_ip; + sample.id = ptq->pt->branches_id; + sample.stream_id = ptq->pt->branches_id; + sample.period = 1; + sample.cpu = ptq->cpu; + sample.flags = ptq->flags; + sample.insn_len = ptq->insn_len; + + if (pt->branches_filter && !(pt->branches_filter & ptq->flags)) + return 0; + + if (pt->synth_opts.inject) { + ret = intel_pt_inject_event(event, &sample, + pt->branches_sample_type, + pt->synth_needs_swap); + if (ret) + return ret; + } + + ret = perf_session__deliver_synth_event(pt->session, event, &sample); + if (ret) + pr_err("Intel Processor Trace: failed to deliver branch event, error %d\n", + ret); + + return ret; +} + +static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) +{ + int ret; + struct intel_pt *pt = ptq->pt; + union perf_event *event = ptq->event_buf; + struct perf_sample sample = { .ip = 0, }; + + event->sample.header.type = PERF_RECORD_SAMPLE; + event->sample.header.misc = PERF_RECORD_MISC_USER; + event->sample.header.size = sizeof(struct perf_event_header); + + if (!pt->timeless_decoding) + sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc); + + sample.ip = ptq->state->from_ip; + sample.pid = ptq->pid; + sample.tid = ptq->tid; + sample.addr = ptq->state->to_ip; + sample.id = ptq->pt->instructions_id; + sample.stream_id = ptq->pt->instructions_id; + sample.period = ptq->pt->instructions_sample_period; + sample.cpu = ptq->cpu; + sample.flags = ptq->flags; + sample.insn_len = ptq->insn_len; + + if (pt->synth_opts.callchain) { + thread_stack__sample(ptq->thread, ptq->chain, + pt->synth_opts.callchain_sz, sample.ip); + sample.callchain = ptq->chain; + } + + if (pt->synth_opts.inject) { + ret = intel_pt_inject_event(event, &sample, + pt->instructions_sample_type, + pt->synth_needs_swap); + if (ret) + return ret; + } + + ret = perf_session__deliver_synth_event(pt->session, event, &sample); + if (ret) + pr_err("Intel Processor Trace: failed to deliver instruction event, error %d\n", + ret); + + return ret; +} + +static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq) +{ + int ret; + struct intel_pt *pt = ptq->pt; + union perf_event *event = ptq->event_buf; + struct perf_sample sample = { .ip = 0, }; + + event->sample.header.type = PERF_RECORD_SAMPLE; + event->sample.header.misc = PERF_RECORD_MISC_USER; + event->sample.header.size = sizeof(struct perf_event_header); + + if (!pt->timeless_decoding) + sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc); + + sample.ip = ptq->state->from_ip; + sample.pid = ptq->pid; + sample.tid = ptq->tid; + sample.addr = ptq->state->to_ip; + sample.id = ptq->pt->transactions_id; + sample.stream_id = ptq->pt->transactions_id; + sample.period = 1; + sample.cpu = ptq->cpu; + sample.flags = ptq->flags; + sample.insn_len = ptq->insn_len; + + if (pt->synth_opts.callchain) { + thread_stack__sample(ptq->thread, ptq->chain, + pt->synth_opts.callchain_sz, sample.ip); + sample.callchain = ptq->chain; + } + + if (pt->synth_opts.inject) { + ret = intel_pt_inject_event(event, &sample, + pt->transactions_sample_type, + pt->synth_needs_swap); + if (ret) + return ret; + } + + ret = perf_session__deliver_synth_event(pt->session, event, &sample); + if (ret) + pr_err("Intel Processor Trace: failed to deliver transaction event, error %d\n", + ret); + + return ret; +} + +static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, + pid_t pid, pid_t tid, u64 ip) +{ + union perf_event event; + char msg[MAX_AUXTRACE_ERROR_MSG]; + int err; + + intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG); + + auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, + code, cpu, pid, tid, ip, msg); + + err = perf_session__deliver_synth_event(pt->session, &event, NULL); + if (err) + pr_err("Intel Processor Trace: failed to deliver error event, error %d\n", + err); + + return err; +} + +static int intel_pt_next_tid(struct intel_pt *pt, struct intel_pt_queue *ptq) +{ + struct auxtrace_queue *queue; + pid_t tid = ptq->next_tid; + int err; + + if (tid == -1) + return 0; + + intel_pt_log("switch: cpu %d tid %d\n", ptq->cpu, tid); + + err = machine__set_current_tid(pt->machine, ptq->cpu, -1, tid); + + queue = &pt->queues.queue_array[ptq->queue_nr]; + intel_pt_set_pid_tid_cpu(pt, queue); + + ptq->next_tid = -1; + + return err; +} + +static inline bool intel_pt_is_switch_ip(struct intel_pt_queue *ptq, u64 ip) +{ + struct intel_pt *pt = ptq->pt; + + return ip == pt->switch_ip && + (ptq->flags & PERF_IP_FLAG_BRANCH) && + !(ptq->flags & (PERF_IP_FLAG_CONDITIONAL | PERF_IP_FLAG_ASYNC | + PERF_IP_FLAG_INTERRUPT | PERF_IP_FLAG_TX_ABORT)); +} + +static int intel_pt_sample(struct intel_pt_queue *ptq) +{ + const struct intel_pt_state *state = ptq->state; + struct intel_pt *pt = ptq->pt; + int err; + + if (!ptq->have_sample) + return 0; + + ptq->have_sample = false; + + if (pt->sample_instructions && + (state->type & INTEL_PT_INSTRUCTION)) { + err = intel_pt_synth_instruction_sample(ptq); + if (err) + return err; + } + + if (pt->sample_transactions && + (state->type & INTEL_PT_TRANSACTION)) { + err = intel_pt_synth_transaction_sample(ptq); + if (err) + return err; + } + + if (!(state->type & INTEL_PT_BRANCH)) + return 0; + + if (pt->synth_opts.callchain) + thread_stack__event(ptq->thread, ptq->flags, state->from_ip, + state->to_ip, ptq->insn_len, + state->trace_nr); + else + thread_stack__set_trace_nr(ptq->thread, state->trace_nr); + + if (pt->sample_branches) { + err = intel_pt_synth_branch_sample(ptq); + if (err) + return err; + } + + if (!pt->sync_switch) + return 0; + + if (intel_pt_is_switch_ip(ptq, state->to_ip)) { + switch (ptq->switch_state) { + case INTEL_PT_SS_UNKNOWN: + case INTEL_PT_SS_EXPECTING_SWITCH_IP: + err = intel_pt_next_tid(pt, ptq); + if (err) + return err; + ptq->switch_state = INTEL_PT_SS_TRACING; + break; + default: + ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_EVENT; + return 1; + } + } else if (!state->to_ip) { + ptq->switch_state = INTEL_PT_SS_NOT_TRACING; + } else if (ptq->switch_state == INTEL_PT_SS_NOT_TRACING) { + ptq->switch_state = INTEL_PT_SS_UNKNOWN; + } else if (ptq->switch_state == INTEL_PT_SS_UNKNOWN && + state->to_ip == pt->ptss_ip && + (ptq->flags & PERF_IP_FLAG_CALL)) { + ptq->switch_state = INTEL_PT_SS_TRACING; + } + + return 0; +} + +static u64 intel_pt_switch_ip(struct machine *machine, u64 *ptss_ip) +{ + struct map *map; + struct symbol *sym, *start; + u64 ip, switch_ip = 0; + + if (ptss_ip) + *ptss_ip = 0; + + map = machine__kernel_map(machine, MAP__FUNCTION); + if (!map) + return 0; + + if (map__load(map, machine->symbol_filter)) + return 0; + + start = dso__first_symbol(map->dso, MAP__FUNCTION); + + for (sym = start; sym; sym = dso__next_symbol(sym)) { + if (sym->binding == STB_GLOBAL && + !strcmp(sym->name, "__switch_to")) { + ip = map->unmap_ip(map, sym->start); + if (ip >= map->start && ip < map->end) { + switch_ip = ip; + break; + } + } + } + + if (!switch_ip || !ptss_ip) + return 0; + + for (sym = start; sym; sym = dso__next_symbol(sym)) { + if (!strcmp(sym->name, "perf_trace_sched_switch")) { + ip = map->unmap_ip(map, sym->start); + if (ip >= map->start && ip < map->end) { + *ptss_ip = ip; + break; + } + } + } + + return switch_ip; +} + +static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) +{ + const struct intel_pt_state *state = ptq->state; + struct intel_pt *pt = ptq->pt; + int err; + + if (!pt->kernel_start) { + pt->kernel_start = machine__kernel_start(pt->machine); + if (pt->per_cpu_mmaps && pt->have_sched_switch && + !pt->timeless_decoding && intel_pt_tracing_kernel(pt) && + !pt->sampling_mode) { + pt->switch_ip = intel_pt_switch_ip(pt->machine, + &pt->ptss_ip); + if (pt->switch_ip) { + intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n", + pt->switch_ip, pt->ptss_ip); + pt->sync_switch = true; + } + } + } + + intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n", + ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid); + while (1) { + err = intel_pt_sample(ptq); + if (err) + return err; + + state = intel_pt_decode(ptq->decoder); + if (state->err) { + if (state->err == INTEL_PT_ERR_NODATA) + return 1; + if (pt->sync_switch && + state->from_ip >= pt->kernel_start) { + pt->sync_switch = false; + intel_pt_next_tid(pt, ptq); + } + if (pt->synth_opts.errors) { + err = intel_pt_synth_error(pt, state->err, + ptq->cpu, ptq->pid, + ptq->tid, + state->from_ip); + if (err) + return err; + } + continue; + } + + ptq->state = state; + ptq->have_sample = true; + intel_pt_sample_flags(ptq); + + /* Use estimated TSC upon return to user space */ + if (pt->est_tsc && + (state->from_ip >= pt->kernel_start || !state->from_ip) && + state->to_ip && state->to_ip < pt->kernel_start) { + intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n", + state->timestamp, state->est_timestamp); + ptq->timestamp = state->est_timestamp; + /* Use estimated TSC in unknown switch state */ + } else if (pt->sync_switch && + ptq->switch_state == INTEL_PT_SS_UNKNOWN && + intel_pt_is_switch_ip(ptq, state->to_ip) && + ptq->next_tid == -1) { + intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n", + state->timestamp, state->est_timestamp); + ptq->timestamp = state->est_timestamp; + } else if (state->timestamp > ptq->timestamp) { + ptq->timestamp = state->timestamp; + } + + if (!pt->timeless_decoding && ptq->timestamp >= *timestamp) { + *timestamp = ptq->timestamp; + return 0; + } + } + return 0; +} + +static inline int intel_pt_update_queues(struct intel_pt *pt) +{ + if (pt->queues.new_data) { + pt->queues.new_data = false; + return intel_pt_setup_queues(pt); + } + return 0; +} + +static int intel_pt_process_queues(struct intel_pt *pt, u64 timestamp) +{ + unsigned int queue_nr; + u64 ts; + int ret; + + while (1) { + struct auxtrace_queue *queue; + struct intel_pt_queue *ptq; + + if (!pt->heap.heap_cnt) + return 0; + + if (pt->heap.heap_array[0].ordinal >= timestamp) + return 0; + + queue_nr = pt->heap.heap_array[0].queue_nr; + queue = &pt->queues.queue_array[queue_nr]; + ptq = queue->priv; + + intel_pt_log("queue %u processing 0x%" PRIx64 " to 0x%" PRIx64 "\n", + queue_nr, pt->heap.heap_array[0].ordinal, + timestamp); + + auxtrace_heap__pop(&pt->heap); + + if (pt->heap.heap_cnt) { + ts = pt->heap.heap_array[0].ordinal + 1; + if (ts > timestamp) + ts = timestamp; + } else { + ts = timestamp; + } + + intel_pt_set_pid_tid_cpu(pt, queue); + + ret = intel_pt_run_decoder(ptq, &ts); + + if (ret < 0) { + auxtrace_heap__add(&pt->heap, queue_nr, ts); + return ret; + } + + if (!ret) { + ret = auxtrace_heap__add(&pt->heap, queue_nr, ts); + if (ret < 0) + return ret; + } else { + ptq->on_heap = false; + } + } + + return 0; +} + +static int intel_pt_process_timeless_queues(struct intel_pt *pt, pid_t tid, + u64 time_) +{ + struct auxtrace_queues *queues = &pt->queues; + unsigned int i; + u64 ts = 0; + + for (i = 0; i < queues->nr_queues; i++) { + struct auxtrace_queue *queue = &pt->queues.queue_array[i]; + struct intel_pt_queue *ptq = queue->priv; + + if (ptq && (tid == -1 || ptq->tid == tid)) { + ptq->time = time_; + intel_pt_set_pid_tid_cpu(pt, queue); + intel_pt_run_decoder(ptq, &ts); + } + } + return 0; +} + +static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample) +{ + return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu, + sample->pid, sample->tid, 0); +} + +static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu) +{ + unsigned i, j; + + if (cpu < 0 || !pt->queues.nr_queues) + return NULL; + + if ((unsigned)cpu >= pt->queues.nr_queues) + i = pt->queues.nr_queues - 1; + else + i = cpu; + + if (pt->queues.queue_array[i].cpu == cpu) + return pt->queues.queue_array[i].priv; + + for (j = 0; i > 0; j++) { + if (pt->queues.queue_array[--i].cpu == cpu) + return pt->queues.queue_array[i].priv; + } + + for (; j < pt->queues.nr_queues; j++) { + if (pt->queues.queue_array[j].cpu == cpu) + return pt->queues.queue_array[j].priv; + } + + return NULL; +} + +static int intel_pt_process_switch(struct intel_pt *pt, + struct perf_sample *sample) +{ + struct intel_pt_queue *ptq; + struct perf_evsel *evsel; + pid_t tid; + int cpu, err; + + evsel = perf_evlist__id2evsel(pt->session->evlist, sample->id); + if (evsel != pt->switch_evsel) + return 0; + + tid = perf_evsel__intval(evsel, sample, "next_pid"); + cpu = sample->cpu; + + intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n", + cpu, tid, sample->time, perf_time_to_tsc(sample->time, + &pt->tc)); + + if (!pt->sync_switch) + goto out; + + ptq = intel_pt_cpu_to_ptq(pt, cpu); + if (!ptq) + goto out; + + switch (ptq->switch_state) { + case INTEL_PT_SS_NOT_TRACING: + ptq->next_tid = -1; + break; + case INTEL_PT_SS_UNKNOWN: + case INTEL_PT_SS_TRACING: + ptq->next_tid = tid; + ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_IP; + return 0; + case INTEL_PT_SS_EXPECTING_SWITCH_EVENT: + if (!ptq->on_heap) { + ptq->timestamp = perf_time_to_tsc(sample->time, + &pt->tc); + err = auxtrace_heap__add(&pt->heap, ptq->queue_nr, + ptq->timestamp); + if (err) + return err; + ptq->on_heap = true; + } + ptq->switch_state = INTEL_PT_SS_TRACING; + break; + case INTEL_PT_SS_EXPECTING_SWITCH_IP: + ptq->next_tid = tid; + intel_pt_log("ERROR: cpu %d expecting switch ip\n", cpu); + break; + default: + break; + } +out: + return machine__set_current_tid(pt->machine, cpu, -1, tid); +} + +static int intel_pt_process_itrace_start(struct intel_pt *pt, + union perf_event *event, + struct perf_sample *sample) +{ + if (!pt->per_cpu_mmaps) + return 0; + + intel_pt_log("itrace_start: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n", + sample->cpu, event->itrace_start.pid, + event->itrace_start.tid, sample->time, + perf_time_to_tsc(sample->time, &pt->tc)); + + return machine__set_current_tid(pt->machine, sample->cpu, + event->itrace_start.pid, + event->itrace_start.tid); +} + +static int intel_pt_process_event(struct perf_session *session, + union perf_event *event, + struct perf_sample *sample, + struct perf_tool *tool) +{ + struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, + auxtrace); + u64 timestamp; + int err = 0; + + if (dump_trace) + return 0; + + if (!tool->ordered_events) { + pr_err("Intel Processor Trace requires ordered events\n"); + return -EINVAL; + } + + if (sample->time) + timestamp = perf_time_to_tsc(sample->time, &pt->tc); + else + timestamp = 0; + + if (timestamp || pt->timeless_decoding) { + err = intel_pt_update_queues(pt); + if (err) + return err; + } + + if (pt->timeless_decoding) { + if (event->header.type == PERF_RECORD_EXIT) { + err = intel_pt_process_timeless_queues(pt, + event->comm.tid, + sample->time); + } + } else if (timestamp) { + err = intel_pt_process_queues(pt, timestamp); + } + if (err) + return err; + + if (event->header.type == PERF_RECORD_AUX && + (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) && + pt->synth_opts.errors) { + err = intel_pt_lost(pt, sample); + if (err) + return err; + } + + if (pt->switch_evsel && event->header.type == PERF_RECORD_SAMPLE) + err = intel_pt_process_switch(pt, sample); + else if (event->header.type == PERF_RECORD_ITRACE_START) + err = intel_pt_process_itrace_start(pt, event, sample); + + intel_pt_log("event %s (%u): cpu %d time %"PRIu64" tsc %#"PRIx64"\n", + perf_event__name(event->header.type), event->header.type, + sample->cpu, sample->time, timestamp); + + return err; +} + +static int intel_pt_flush(struct perf_session *session, struct perf_tool *tool) +{ + struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, + auxtrace); + int ret; + + if (dump_trace) + return 0; + + if (!tool->ordered_events) + return -EINVAL; + + ret = intel_pt_update_queues(pt); + if (ret < 0) + return ret; + + if (pt->timeless_decoding) + return intel_pt_process_timeless_queues(pt, -1, + MAX_TIMESTAMP - 1); + + return intel_pt_process_queues(pt, MAX_TIMESTAMP); +} + +static void intel_pt_free_events(struct perf_session *session) +{ + struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, + auxtrace); + struct auxtrace_queues *queues = &pt->queues; + unsigned int i; + + for (i = 0; i < queues->nr_queues; i++) { + intel_pt_free_queue(queues->queue_array[i].priv); + queues->queue_array[i].priv = NULL; + } + intel_pt_log_disable(); + auxtrace_queues__free(queues); +} + +static void intel_pt_free(struct perf_session *session) +{ + struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, + auxtrace); + + auxtrace_heap__free(&pt->heap); + intel_pt_free_events(session); + session->auxtrace = NULL; + thread__delete(pt->unknown_thread); + free(pt); +} + +static int intel_pt_process_auxtrace_event(struct perf_session *session, + union perf_event *event, + struct perf_tool *tool __maybe_unused) +{ + struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, + auxtrace); + + if (pt->sampling_mode) + return 0; + + if (!pt->data_queued) { + struct auxtrace_buffer *buffer; + off_t data_offset; + int fd = perf_data_file__fd(session->file); + int err; + + if (perf_data_file__is_pipe(session->file)) { + data_offset = 0; + } else { + data_offset = lseek(fd, 0, SEEK_CUR); + if (data_offset == -1) + return -errno; + } + + err = auxtrace_queues__add_event(&pt->queues, session, event, + data_offset, &buffer); + if (err) + return err; + + /* Dump here now we have copied a piped trace out of the pipe */ + if (dump_trace) { + if (auxtrace_buffer__get_data(buffer, fd)) { + intel_pt_dump_event(pt, buffer->data, + buffer->size); + auxtrace_buffer__put_data(buffer); + } + } + } + + return 0; +} + +struct intel_pt_synth { + struct perf_tool dummy_tool; + struct perf_session *session; +}; + +static int intel_pt_event_synth(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + struct intel_pt_synth *intel_pt_synth = + container_of(tool, struct intel_pt_synth, dummy_tool); + + return perf_session__deliver_synth_event(intel_pt_synth->session, event, + NULL); +} + +static int intel_pt_synth_event(struct perf_session *session, + struct perf_event_attr *attr, u64 id) +{ + struct intel_pt_synth intel_pt_synth; + + memset(&intel_pt_synth, 0, sizeof(struct intel_pt_synth)); + intel_pt_synth.session = session; + + return perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1, + &id, intel_pt_event_synth); +} + +static int intel_pt_synth_events(struct intel_pt *pt, + struct perf_session *session) +{ + struct perf_evlist *evlist = session->evlist; + struct perf_evsel *evsel; + struct perf_event_attr attr; + bool found = false; + u64 id; + int err; + + evlist__for_each(evlist, evsel) { + if (evsel->attr.type == pt->pmu_type && evsel->ids) { + found = true; + break; + } + } + + if (!found) { + pr_debug("There are no selected events with Intel Processor Trace data\n"); + return 0; + } + + memset(&attr, 0, sizeof(struct perf_event_attr)); + attr.size = sizeof(struct perf_event_attr); + attr.type = PERF_TYPE_HARDWARE; + attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK; + attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | + PERF_SAMPLE_PERIOD; + if (pt->timeless_decoding) + attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; + else + attr.sample_type |= PERF_SAMPLE_TIME; + if (!pt->per_cpu_mmaps) + attr.sample_type &= ~(u64)PERF_SAMPLE_CPU; + attr.exclude_user = evsel->attr.exclude_user; + attr.exclude_kernel = evsel->attr.exclude_kernel; + attr.exclude_hv = evsel->attr.exclude_hv; + attr.exclude_host = evsel->attr.exclude_host; + attr.exclude_guest = evsel->attr.exclude_guest; + attr.sample_id_all = evsel->attr.sample_id_all; + attr.read_format = evsel->attr.read_format; + + id = evsel->id[0] + 1000000000; + if (!id) + id = 1; + + if (pt->synth_opts.instructions) { + attr.config = PERF_COUNT_HW_INSTRUCTIONS; + if (pt->synth_opts.period_type == PERF_ITRACE_PERIOD_NANOSECS) + attr.sample_period = + intel_pt_ns_to_ticks(pt, pt->synth_opts.period); + else + attr.sample_period = pt->synth_opts.period; + pt->instructions_sample_period = attr.sample_period; + if (pt->synth_opts.callchain) + attr.sample_type |= PERF_SAMPLE_CALLCHAIN; + pr_debug("Synthesizing 'instructions' event with id %" PRIu64 " sample type %#" PRIx64 "\n", + id, (u64)attr.sample_type); + err = intel_pt_synth_event(session, &attr, id); + if (err) { + pr_err("%s: failed to synthesize 'instructions' event type\n", + __func__); + return err; + } + pt->sample_instructions = true; + pt->instructions_sample_type = attr.sample_type; + pt->instructions_id = id; + id += 1; + } + + if (pt->synth_opts.transactions) { + attr.config = PERF_COUNT_HW_INSTRUCTIONS; + attr.sample_period = 1; + if (pt->synth_opts.callchain) + attr.sample_type |= PERF_SAMPLE_CALLCHAIN; + pr_debug("Synthesizing 'transactions' event with id %" PRIu64 " sample type %#" PRIx64 "\n", + id, (u64)attr.sample_type); + err = intel_pt_synth_event(session, &attr, id); + if (err) { + pr_err("%s: failed to synthesize 'transactions' event type\n", + __func__); + return err; + } + pt->sample_transactions = true; + pt->transactions_id = id; + id += 1; + evlist__for_each(evlist, evsel) { + if (evsel->id && evsel->id[0] == pt->transactions_id) { + if (evsel->name) + zfree(&evsel->name); + evsel->name = strdup("transactions"); + break; + } + } + } + + if (pt->synth_opts.branches) { + attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; + attr.sample_period = 1; + attr.sample_type |= PERF_SAMPLE_ADDR; + attr.sample_type &= ~(u64)PERF_SAMPLE_CALLCHAIN; + pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n", + id, (u64)attr.sample_type); + err = intel_pt_synth_event(session, &attr, id); + if (err) { + pr_err("%s: failed to synthesize 'branches' event type\n", + __func__); + return err; + } + pt->sample_branches = true; + pt->branches_sample_type = attr.sample_type; + pt->branches_id = id; + } + + pt->synth_needs_swap = evsel->needs_swap; + + return 0; +} + +static struct perf_evsel *intel_pt_find_sched_switch(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + + evlist__for_each_reverse(evlist, evsel) { + const char *name = perf_evsel__name(evsel); + + if (!strcmp(name, "sched:sched_switch")) + return evsel; + } + + return NULL; +} + +static const char * const intel_pt_info_fmts[] = { + [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n", + [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n", + [INTEL_PT_TIME_MULT] = " Time Muliplier %"PRIu64"\n", + [INTEL_PT_TIME_ZERO] = " Time Zero %"PRIu64"\n", + [INTEL_PT_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n", + [INTEL_PT_TSC_BIT] = " TSC bit %#"PRIx64"\n", + [INTEL_PT_NORETCOMP_BIT] = " NoRETComp bit %#"PRIx64"\n", + [INTEL_PT_HAVE_SCHED_SWITCH] = " Have sched_switch %"PRId64"\n", + [INTEL_PT_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n", + [INTEL_PT_PER_CPU_MMAPS] = " Per-cpu maps %"PRId64"\n", +}; + +static void intel_pt_print_info(u64 *arr, int start, int finish) +{ + int i; + + if (!dump_trace) + return; + + for (i = start; i <= finish; i++) + fprintf(stdout, intel_pt_info_fmts[i], arr[i]); +} + +int intel_pt_process_auxtrace_info(union perf_event *event, + struct perf_session *session) +{ + struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info; + size_t min_sz = sizeof(u64) * INTEL_PT_PER_CPU_MMAPS; + struct intel_pt *pt; + int err; + + if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) + + min_sz) + return -EINVAL; + + pt = zalloc(sizeof(struct intel_pt)); + if (!pt) + return -ENOMEM; + + err = auxtrace_queues__init(&pt->queues); + if (err) + goto err_free; + + intel_pt_log_set_name(INTEL_PT_PMU_NAME); + + pt->session = session; + pt->machine = &session->machines.host; /* No kvm support */ + pt->auxtrace_type = auxtrace_info->type; + pt->pmu_type = auxtrace_info->priv[INTEL_PT_PMU_TYPE]; + pt->tc.time_shift = auxtrace_info->priv[INTEL_PT_TIME_SHIFT]; + pt->tc.time_mult = auxtrace_info->priv[INTEL_PT_TIME_MULT]; + pt->tc.time_zero = auxtrace_info->priv[INTEL_PT_TIME_ZERO]; + pt->cap_user_time_zero = auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO]; + pt->tsc_bit = auxtrace_info->priv[INTEL_PT_TSC_BIT]; + pt->noretcomp_bit = auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT]; + pt->have_sched_switch = auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH]; + pt->snapshot_mode = auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE]; + pt->per_cpu_mmaps = auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS]; + intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_PMU_TYPE, + INTEL_PT_PER_CPU_MMAPS); + + pt->timeless_decoding = intel_pt_timeless_decoding(pt); + pt->have_tsc = intel_pt_have_tsc(pt); + pt->sampling_mode = false; + pt->est_tsc = !pt->timeless_decoding; + + pt->unknown_thread = thread__new(999999999, 999999999); + if (!pt->unknown_thread) { + err = -ENOMEM; + goto err_free_queues; + } + err = thread__set_comm(pt->unknown_thread, "unknown", 0); + if (err) + goto err_delete_thread; + if (thread__init_map_groups(pt->unknown_thread, pt->machine)) { + err = -ENOMEM; + goto err_delete_thread; + } + + pt->auxtrace.process_event = intel_pt_process_event; + pt->auxtrace.process_auxtrace_event = intel_pt_process_auxtrace_event; + pt->auxtrace.flush_events = intel_pt_flush; + pt->auxtrace.free_events = intel_pt_free_events; + pt->auxtrace.free = intel_pt_free; + session->auxtrace = &pt->auxtrace; + + if (dump_trace) + return 0; + + if (pt->have_sched_switch == 1) { + pt->switch_evsel = intel_pt_find_sched_switch(session->evlist); + if (!pt->switch_evsel) { + pr_err("%s: missing sched_switch event\n", __func__); + goto err_delete_thread; + } + } + + if (session->itrace_synth_opts && session->itrace_synth_opts->set) { + pt->synth_opts = *session->itrace_synth_opts; + } else { + itrace_synth_opts__set_default(&pt->synth_opts); + if (use_browser != -1) { + pt->synth_opts.branches = false; + pt->synth_opts.callchain = true; + } + } + + if (pt->synth_opts.log) + intel_pt_log_enable(); + + /* Maximum non-turbo ratio is TSC freq / 100 MHz */ + if (pt->tc.time_mult) { + u64 tsc_freq = intel_pt_ns_to_ticks(pt, 1000000000); + + pt->max_non_turbo_ratio = (tsc_freq + 50000000) / 100000000; + intel_pt_log("TSC frequency %"PRIu64"\n", tsc_freq); + intel_pt_log("Maximum non-turbo ratio %u\n", + pt->max_non_turbo_ratio); + } + + if (pt->synth_opts.calls) + pt->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC | + PERF_IP_FLAG_TRACE_END; + if (pt->synth_opts.returns) + pt->branches_filter |= PERF_IP_FLAG_RETURN | + PERF_IP_FLAG_TRACE_BEGIN; + + if (pt->synth_opts.callchain && !symbol_conf.use_callchain) { + symbol_conf.use_callchain = true; + if (callchain_register_param(&callchain_param) < 0) { + symbol_conf.use_callchain = false; + pt->synth_opts.callchain = false; + } + } + + err = intel_pt_synth_events(pt, session); + if (err) + goto err_delete_thread; + + err = auxtrace_queues__process_index(&pt->queues, session); + if (err) + goto err_delete_thread; + + if (pt->queues.populated) + pt->data_queued = true; + + if (pt->timeless_decoding) + pr_debug2("Intel PT decoding without timestamps\n"); + + return 0; + +err_delete_thread: + thread__delete(pt->unknown_thread); +err_free_queues: + intel_pt_log_disable(); + auxtrace_queues__free(&pt->queues); + session->auxtrace = NULL; +err_free: + free(pt); + return err; +} diff --git a/tools/perf/util/intel-pt.h b/tools/perf/util/intel-pt.h new file mode 100644 index 000000000000..a1bfe93473ba --- /dev/null +++ b/tools/perf/util/intel-pt.h @@ -0,0 +1,51 @@ +/* + * intel_pt.h: Intel Processor Trace support + * Copyright (c) 2013-2015, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef INCLUDE__PERF_INTEL_PT_H__ +#define INCLUDE__PERF_INTEL_PT_H__ + +#define INTEL_PT_PMU_NAME "intel_pt" + +enum { + INTEL_PT_PMU_TYPE, + INTEL_PT_TIME_SHIFT, + INTEL_PT_TIME_MULT, + INTEL_PT_TIME_ZERO, + INTEL_PT_CAP_USER_TIME_ZERO, + INTEL_PT_TSC_BIT, + INTEL_PT_NORETCOMP_BIT, + INTEL_PT_HAVE_SCHED_SWITCH, + INTEL_PT_SNAPSHOT_MODE, + INTEL_PT_PER_CPU_MMAPS, + INTEL_PT_AUXTRACE_PRIV_MAX, +}; + +#define INTEL_PT_AUXTRACE_PRIV_SIZE (INTEL_PT_AUXTRACE_PRIV_MAX * sizeof(u64)) + +struct auxtrace_record; +struct perf_tool; +union perf_event; +struct perf_session; +struct perf_event_attr; +struct perf_pmu; + +struct auxtrace_record *intel_pt_recording_init(int *err); + +int intel_pt_process_auxtrace_info(union perf_event *event, + struct perf_session *session); + +struct perf_event_attr *intel_pt_pmu_default_config(struct perf_pmu *pmu); + +#endif diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 84cad054d6f7..3c71138e7672 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -462,8 +462,8 @@ static struct perf_pmu *pmu_lookup(const char *name) LIST_HEAD(aliases); __u32 type; - /* No support for intel_bts or intel_pt so disallow them */ - if (!strcmp(name, "intel_bts") || !strcmp(name, "intel_pt")) + /* No support for intel_bts so disallow it */ + if (!strcmp(name, "intel_bts")) return NULL; /* diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index 0d228a29526d..0467367dc315 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -105,6 +105,30 @@ bool perf_can_record_switch_events(void) return perf_probe_api(perf_probe_context_switch); } +bool perf_can_record_cpu_wide(void) +{ + struct perf_event_attr attr = { + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_CPU_CLOCK, + .exclude_kernel = 1, + }; + struct cpu_map *cpus; + int cpu, fd; + + cpus = cpu_map__new(NULL); + if (!cpus) + return false; + cpu = cpus->map[0]; + cpu_map__put(cpus); + + fd = sys_perf_event_open(&attr, -1, cpu, -1, 0); + if (fd < 0) + return false; + close(fd); + + return true; +} + void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts) { struct perf_evsel *evsel; diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index b0ad810f04dd..53bb5f59ec58 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -875,6 +875,17 @@ int dso__load_sym(struct dso *dso, struct map *map, } } + /* + * Handle any relocation of vdso necessary because older kernels + * attempted to prelink vdso to its virtual address. + */ + if (dso__is_vdso(dso)) { + GElf_Shdr tshdr; + + if (elf_section_by_name(elf, &ehdr, &tshdr, ".text", NULL)) + map->reloc = map->start - tshdr.sh_addr + tshdr.sh_offset; + } + dso->adjust_symbols = runtime_ss->adjust_symbols || ref_reloc(kmap); /* * Initial kernel and module mappings do not map to the dso. For |