summaryrefslogtreecommitdiffstats
path: root/tools/perf/util
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-03-27 13:42:32 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-03-27 13:42:32 -0700
commit7b58b82b86c8b65a2b57a4c6cb96a460654f9e09 (patch)
treea13e19f216389f16f1cb6641d54751f167482515 /tools/perf/util
parent02f9a04d76b76b80b05ddc33ceabe806b84fda3c (diff)
parentab0809af0bee88b689ba289ec8c40aa2be3a17ec (diff)
downloadlinux-7b58b82b86c8b65a2b57a4c6cb96a460654f9e09.tar.bz2
Merge tag 'perf-tools-for-v5.18-2022-03-26' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux
Pull perf tools updates from Arnaldo Carvalho de Melo: "New features: perf ftrace: - Add -n/--use-nsec option to the 'latency' subcommand. Default: usecs: $ sudo perf ftrace latency -T dput -a sleep 1 # DURATION | COUNT | GRAPH | 0 - 1 us | 2098375 | ############################# | 1 - 2 us | 61 | | 2 - 4 us | 33 | | 4 - 8 us | 13 | | 8 - 16 us | 124 | | 16 - 32 us | 123 | | 32 - 64 us | 1 | | 64 - 128 us | 0 | | 128 - 256 us | 1 | | 256 - 512 us | 0 | | Better granularity with nsec: $ sudo perf ftrace latency -T dput -a -n sleep 1 # DURATION | COUNT | GRAPH | 0 - 1 us | 0 | | 1 - 2 ns | 0 | | 2 - 4 ns | 0 | | 4 - 8 ns | 0 | | 8 - 16 ns | 0 | | 16 - 32 ns | 0 | | 32 - 64 ns | 0 | | 64 - 128 ns | 1163434 | ############## | 128 - 256 ns | 914102 | ############# | 256 - 512 ns | 884 | | 512 - 1024 ns | 613 | | 1 - 2 us | 31 | | 2 - 4 us | 17 | | 4 - 8 us | 7 | | 8 - 16 us | 123 | | 16 - 32 us | 83 | | perf lock: - Add -c/--combine-locks option to merge lock instances in the same class into a single entry. # perf lock report -c Name acquired contended avg wait(ns) total wait(ns) max wait(ns) min wait(ns) rcu_read_lock 251225 0 0 0 0 0 hrtimer_bases.lock 39450 0 0 0 0 0 &sb->s_type->i_l... 10301 1 662 662 662 662 ptlock_ptr(page) 10173 2 701 1402 760 642 &(ei->i_block_re... 8732 0 0 0 0 0 &xa->xa_lock 8088 0 0 0 0 0 &base->lock 6705 0 0 0 0 0 &p->pi_lock 5549 0 0 0 0 0 &dentry->d_lockr... 5010 4 1274 5097 1844 789 &ep->lock 3958 0 0 0 0 0 - Add -F/--field option to customize the list of fields to output: $ perf lock report -F contended,wait_max -k avg_wait Name contended max wait(ns) avg wait(ns) slock-AF_INET6 1 23543 23543 &lruvec->lru_lock 5 18317 11254 slock-AF_INET6 1 10379 10379 rcu_node_1 1 2104 2104 &dentry->d_lockr... 1 1844 1844 &dentry->d_lockr... 1 1672 1672 &newf->file_lock 15 2279 1025 &dentry->d_lockr... 1 792 792 - Add --synth=no option for record, as there is no need to symbolize, lock names comes from the tracepoints. perf record: - Threaded recording, opt-in, via the new --threads command line option. - Improve AMD IBS (Instruction-Based Sampling) error handling messages. perf script: - Add 'brstackinsnlen' field (use it with -F) for branch stacks. - Output branch sample type in 'perf script'. perf report: - Add "addr_from" and "addr_to" sort dimensions. - Print branch stack entry type in 'perf report --dump-raw-trace' - Fix symbolization for chrooted workloads. Hardware tracing: Intel PT: - Add CFE (Control Flow Event) and EVD (Event Data) packets support. - Add MODE.Exec IFLAG bit support. Explanation about these features from the "Intel® 64 and IA-32 architectures software developer’s manual combined volumes: 1, 2A, 2B, 2C, 2D, 3A, 3B, 3C, 3D, and 4" PDF at: https://cdrdv2.intel.com/v1/dl/getContent/671200 At page 3951: "32.2.4 Event Trace is a capability that exposes details about the asynchronous events, when they are generated, and when their corresponding software event handler completes execution. These include: o Interrupts, including NMI and SMI, including the interrupt vector when defined. o Faults, exceptions including the fault vector. - Page faults additionally include the page fault address, when in context. o Event handler returns, including IRET and RSM. o VM exits and VM entries.¹ - VM exits include the values written to the “exit reason” and “exit qualification” VMCS fields. INIT and SIPI events. o TSX aborts, including the abort status returned for the RTM instructions. o Shutdown. Additionally, it provides indication of the status of the Interrupt Flag (IF), to indicate when interrupts are masked" ARM CoreSight: - Use advertised caps/min_interval as default sample_period on ARM spe. - Update deduction of TRCCONFIGR register for branch broadcast on ARM's CoreSight ETM. Vendor Events (JSON): Intel: - Update events and metrics for: Alderlake, Broadwell, Broadwell DE, BroadwellX, CascadelakeX, Elkhartlake, Bonnell, Goldmont, GoldmontPlus, Westmere EP-DP, Haswell, HaswellX, Icelake, IcelakeX, Ivybridge, Ivytown, Jaketown, Knights Landing, Nehalem EP, Sandybridge, Silvermont, Skylake, Skylake Server, SkylakeX, Tigerlake, TremontX, Westmere EP-SP, and Westmere EX. ARM: - Add support for HiSilicon CPA PMU aliasing. perf stat: - Fix forked applications enablement of counters. - The 'slots' should only be printed on a different order than the one specified on the command line when 'topdown' events are present, fix it. Miscellaneous: - Sync msr-index, cpufeatures header files with the kernel sources. - Stop using some deprecated libbpf APIs in 'perf trace'. - Fix some spelling mistakes. - Refactor the maps pointers usage to pave the way for using refcount debugging. - Only offer the --tui option on perf top, report and annotate when perf was built with libslang. - Don't mention --to-ctf in 'perf data --help' when not linking with the required library, libbabeltrace. - Use ARRAY_SIZE() instead of ad hoc equivalent, spotted by array_size.cocci. - Enhance the matching of sub-commands abbreviations: 'perf c2c rec' -> 'perf c2c record' 'perf c2c recport -> error - Set build-id using build-id header on new mmap records. - Fix generation of 'perf --version' string. perf test: - Add test for the arm_spe event. - Add test to check unwinding using fame-pointer (fp) mode on arm64. - Make metric testing more robust in 'perf test'. - Add error message for unsupported branch stack cases. libperf: - Add API for allocating new thread map array. - Fix typo in perf_evlist__open() failure error messages in libperf tests. perf c2c: - Replace bitmap_weight() with bitmap_empty() where appropriate" * tag 'perf-tools-for-v5.18-2022-03-26' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (143 commits) perf evsel: Improve AMD IBS (Instruction-Based Sampling) error handling messages perf python: Add perf_env stubs that will be needed in evsel__open_strerror() perf tools: Enhance the matching of sub-commands abbreviations libperf tests: Fix typo in perf_evlist__open() failure error messages tools arm64: Import cputype.h perf lock: Add -F/--field option to control output perf lock: Extend struct lock_key to have print function perf lock: Add --synth=no option for record tools headers cpufeatures: Sync with the kernel sources tools headers cpufeatures: Sync with the kernel sources perf stat: Fix forked applications enablement of counters tools arch x86: Sync the msr-index.h copy with the kernel sources perf evsel: Make evsel__env() always return a valid env perf build-id: Fix spelling mistake "Cant" -> "Can't" perf header: Fix spelling mistake "could't" -> "couldn't" perf script: Add 'brstackinsnlen' for branch stacks perf parse-events: Move slots only with topdown perf ftrace latency: Update documentation perf ftrace latency: Add -n/--use-nsec option perf tools: Fix version kernel tag ...
Diffstat (limited to 'tools/perf/util')
-rw-r--r--tools/perf/util/Build1
-rwxr-xr-xtools/perf/util/PERF-VERSION-GEN13
-rw-r--r--tools/perf/util/annotate.c10
-rw-r--r--tools/perf/util/auxtrace.c4
-rw-r--r--tools/perf/util/auxtrace.h4
-rw-r--r--tools/perf/util/bpf-event.c15
-rw-r--r--tools/perf/util/bpf-loader.c254
-rw-r--r--tools/perf/util/bpf_ftrace.c2
-rw-r--r--tools/perf/util/bpf_skel/func_latency.bpf.c6
-rw-r--r--tools/perf/util/build-id.c6
-rw-r--r--tools/perf/util/callchain.c2
-rw-r--r--tools/perf/util/data.c8
-rw-r--r--tools/perf/util/dso.c15
-rw-r--r--tools/perf/util/dso.h1
-rw-r--r--tools/perf/util/dsos.c13
-rw-r--r--tools/perf/util/event.c6
-rw-r--r--tools/perf/util/event.h45
-rw-r--r--tools/perf/util/evlist.c16
-rw-r--r--tools/perf/util/evlist.h1
-rw-r--r--tools/perf/util/evsel.c31
-rw-r--r--tools/perf/util/ftrace.h1
-rw-r--r--tools/perf/util/header.c3
-rw-r--r--tools/perf/util/hist.c2
-rw-r--r--tools/perf/util/hist.h2
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.c245
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.h21
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c47
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h3
-rw-r--r--tools/perf/util/intel-pt.c164
-rw-r--r--tools/perf/util/jitdump.c10
-rw-r--r--tools/perf/util/machine.c38
-rw-r--r--tools/perf/util/machine.h8
-rw-r--r--tools/perf/util/map.c456
-rw-r--r--tools/perf/util/map.h26
-rw-r--r--tools/perf/util/maps.c403
-rw-r--r--tools/perf/util/maps.h2
-rw-r--r--tools/perf/util/mmap.c10
-rw-r--r--tools/perf/util/mmap.h3
-rw-r--r--tools/perf/util/namespaces.c50
-rw-r--r--tools/perf/util/namespaces.h10
-rw-r--r--tools/perf/util/ordered-events.c3
-rw-r--r--tools/perf/util/ordered-events.h3
-rw-r--r--tools/perf/util/pmu.c14
-rw-r--r--tools/perf/util/probe-event.c2
-rw-r--r--tools/perf/util/python.c13
-rw-r--r--tools/perf/util/record.h2
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c3
-rw-r--r--tools/perf/util/session.c213
-rw-r--r--tools/perf/util/session.h3
-rw-r--r--tools/perf/util/sort.c128
-rw-r--r--tools/perf/util/sort.h2
-rw-r--r--tools/perf/util/symbol.c18
-rw-r--r--tools/perf/util/tool.h3
-rw-r--r--tools/perf/util/top.h5
-rw-r--r--tools/perf/util/trace-event-parse.c2
-rw-r--r--tools/perf/util/util.c31
-rw-r--r--tools/perf/util/util.h2
57 files changed, 1789 insertions, 615 deletions
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 2a403cefcaf2..9a7209a99e16 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -56,6 +56,7 @@ perf-y += debug.o
perf-y += fncache.o
perf-y += machine.o
perf-y += map.o
+perf-y += maps.o
perf-y += pstack.o
perf-y += session.o
perf-y += sample-raw.o
diff --git a/tools/perf/util/PERF-VERSION-GEN b/tools/perf/util/PERF-VERSION-GEN
index 59241ff342be..0ee5af529238 100755
--- a/tools/perf/util/PERF-VERSION-GEN
+++ b/tools/perf/util/PERF-VERSION-GEN
@@ -11,23 +11,18 @@ LF='
'
#
-# First check if there is a .git to get the version from git describe
-# otherwise try to get the version from the kernel Makefile
+# Always try first to get the version from the kernel Makefile
#
CID=
TAG=
if test -d ../../.git -o -f ../../.git
then
- TAG=$(git describe --abbrev=0 --match "v[0-9].[0-9]*" 2>/dev/null )
+ TAG=$(MAKEFLAGS= make -sC ../.. kernelversion)
CID=$(git log -1 --abbrev=12 --pretty=format:"%h" 2>/dev/null) && CID="-g$CID"
-elif test -f ../../PERF-VERSION-FILE
-then
+else
TAG=$(cut -d' ' -f3 ../../PERF-VERSION-FILE | sed -e 's/\"//g')
fi
-if test -z "$TAG"
-then
- TAG=$(MAKEFLAGS= make -sC ../.. kernelversion)
-fi
+
VN="$TAG$CID"
if test -n "$CID"
then
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 8190a124b99d..e4c641b240df 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -33,6 +33,7 @@
#include "string2.h"
#include "util/event.h"
#include "arch/common.h"
+#include "namespaces.h"
#include <regex.h>
#include <pthread.h>
#include <linux/bitops.h>
@@ -1696,6 +1697,15 @@ fallback:
* DSO is the same as when 'perf record' ran.
*/
__symbol__join_symfs(filename, filename_size, dso->long_name);
+
+ if (access(filename, R_OK) && errno == ENOENT && dso->nsinfo) {
+ char *new_name = filename_with_chroot(dso->nsinfo->pid,
+ filename);
+ if (new_name) {
+ strlcpy(filename, new_name, filename_size);
+ free(new_name);
+ }
+ }
}
free(build_id_path);
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 825336304a37..9e48652662d4 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -1333,6 +1333,7 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts,
synth_opts->ptwrites = true;
synth_opts->pwr_events = true;
synth_opts->other_events = true;
+ synth_opts->intr_events = true;
synth_opts->errors = true;
synth_opts->flc = true;
synth_opts->llc = true;
@@ -1479,6 +1480,9 @@ int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts,
case 'o':
synth_opts->other_events = true;
break;
+ case 'I':
+ synth_opts->intr_events = true;
+ break;
case 'e':
synth_opts->errors = true;
if (get_flags(&p, &synth_opts->error_plus_flags,
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index 19910b9011f3..dc38b6f57232 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -76,6 +76,7 @@ enum itrace_period_type {
* @pwr_events: whether to synthesize power events
* @other_events: whether to synthesize other events recorded due to the use of
* aux_output
+ * @intr_events: whether to synthesize interrupt events
* @errors: whether to synthesize decoder error events
* @dont_decode: whether to skip decoding entirely
* @log: write a decoding log
@@ -120,6 +121,7 @@ struct itrace_synth_opts {
bool ptwrites;
bool pwr_events;
bool other_events;
+ bool intr_events;
bool errors;
bool dont_decode;
bool log;
@@ -636,6 +638,8 @@ bool auxtrace__evsel_is_auxtrace(struct perf_session *session,
" p: synthesize power events\n" \
" o: synthesize other events recorded due to the use\n" \
" of aux-output (refer to perf record)\n" \
+" I: synthesize interrupt or similar (asynchronous) events\n" \
+" (e.g. Intel PT Event Trace)\n" \
" e[flags]: synthesize error events\n" \
" each flag must be preceded by + or -\n" \
" error flags are: o (overflow)\n" \
diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c
index a517eaa51eb3..94624733af7e 100644
--- a/tools/perf/util/bpf-event.c
+++ b/tools/perf/util/bpf-event.c
@@ -33,6 +33,19 @@ struct btf * __weak btf__load_from_kernel_by_id(__u32 id)
return err ? ERR_PTR(err) : btf;
}
+int __weak bpf_prog_load(enum bpf_prog_type prog_type,
+ const char *prog_name __maybe_unused,
+ const char *license,
+ const struct bpf_insn *insns, size_t insn_cnt,
+ const struct bpf_prog_load_opts *opts)
+{
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+ return bpf_load_program(prog_type, insns, insn_cnt, license,
+ opts->kern_version, opts->log_buf, opts->log_size);
+#pragma GCC diagnostic pop
+}
+
struct bpf_program * __weak
bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev)
{
@@ -92,7 +105,7 @@ static int machine__process_bpf_event_load(struct machine *machine,
for (i = 0; i < info_linear->info.nr_jited_ksyms; i++) {
u64 *addrs = (u64 *)(uintptr_t)(info_linear->info.jited_ksyms);
u64 addr = addrs[i];
- struct map *map = maps__find(&machine->kmaps, addr);
+ struct map *map = maps__find(machine__kernel_maps(machine), addr);
if (map) {
map->dso->binary_type = DSO_BINARY_TYPE__BPF_PROG_INFO;
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index ec6d9e7b446d..b72cef1ae959 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -26,6 +26,8 @@
#include "util.h"
#include "llvm-utils.h"
#include "c++/clang-c.h"
+#include "hashmap.h"
+#include "asm/bug.h"
#include <internal/xyarray.h>
@@ -49,8 +51,54 @@ struct bpf_prog_priv {
int *type_mapping;
};
+struct bpf_perf_object {
+ struct list_head list;
+ struct bpf_object *obj;
+};
+
+static LIST_HEAD(bpf_objects_list);
+static struct hashmap *bpf_program_hash;
+static struct hashmap *bpf_map_hash;
+
+static struct bpf_perf_object *
+bpf_perf_object__next(struct bpf_perf_object *prev)
+{
+ struct bpf_perf_object *next;
+
+ if (!prev)
+ next = list_first_entry(&bpf_objects_list,
+ struct bpf_perf_object,
+ list);
+ else
+ next = list_next_entry(prev, list);
+
+ /* Empty list is noticed here so don't need checking on entry. */
+ if (&next->list == &bpf_objects_list)
+ return NULL;
+
+ return next;
+}
+
+#define bpf_perf_object__for_each(perf_obj, tmp) \
+ for ((perf_obj) = bpf_perf_object__next(NULL), \
+ (tmp) = bpf_perf_object__next(perf_obj); \
+ (perf_obj) != NULL; \
+ (perf_obj) = (tmp), (tmp) = bpf_perf_object__next(tmp))
+
static bool libbpf_initialized;
+static int bpf_perf_object__add(struct bpf_object *obj)
+{
+ struct bpf_perf_object *perf_obj = zalloc(sizeof(*perf_obj));
+
+ if (perf_obj) {
+ INIT_LIST_HEAD(&perf_obj->list);
+ perf_obj->obj = obj;
+ list_add_tail(&perf_obj->list, &bpf_objects_list);
+ }
+ return perf_obj ? 0 : -ENOMEM;
+}
+
struct bpf_object *
bpf__prepare_load_buffer(void *obj_buf, size_t obj_buf_sz, const char *name)
{
@@ -68,9 +116,21 @@ bpf__prepare_load_buffer(void *obj_buf, size_t obj_buf_sz, const char *name)
return ERR_PTR(-EINVAL);
}
+ if (bpf_perf_object__add(obj)) {
+ bpf_object__close(obj);
+ return ERR_PTR(-ENOMEM);
+ }
+
return obj;
}
+static void bpf_perf_object__close(struct bpf_perf_object *perf_obj)
+{
+ list_del(&perf_obj->list);
+ bpf_object__close(perf_obj->obj);
+ free(perf_obj);
+}
+
struct bpf_object *bpf__prepare_load(const char *filename, bool source)
{
LIBBPF_OPTS(bpf_object_open_opts, opts, .object_name = filename);
@@ -102,29 +162,25 @@ struct bpf_object *bpf__prepare_load(const char *filename, bool source)
llvm__dump_obj(filename, obj_buf, obj_buf_sz);
free(obj_buf);
- } else
+ } else {
obj = bpf_object__open(filename);
+ }
if (IS_ERR_OR_NULL(obj)) {
pr_debug("bpf: failed to load %s\n", filename);
return obj;
}
- return obj;
-}
-
-void bpf__clear(void)
-{
- struct bpf_object *obj, *tmp;
-
- bpf_object__for_each_safe(obj, tmp) {
- bpf__unprobe(obj);
+ if (bpf_perf_object__add(obj)) {
bpf_object__close(obj);
+ return ERR_PTR(-BPF_LOADER_ERRNO__COMPILE);
}
+
+ return obj;
}
static void
-clear_prog_priv(struct bpf_program *prog __maybe_unused,
+clear_prog_priv(const struct bpf_program *prog __maybe_unused,
void *_priv)
{
struct bpf_prog_priv *priv = _priv;
@@ -137,6 +193,83 @@ clear_prog_priv(struct bpf_program *prog __maybe_unused,
free(priv);
}
+static void bpf_program_hash_free(void)
+{
+ struct hashmap_entry *cur;
+ size_t bkt;
+
+ if (IS_ERR_OR_NULL(bpf_program_hash))
+ return;
+
+ hashmap__for_each_entry(bpf_program_hash, cur, bkt)
+ clear_prog_priv(cur->key, cur->value);
+
+ hashmap__free(bpf_program_hash);
+ bpf_program_hash = NULL;
+}
+
+static void bpf_map_hash_free(void);
+
+void bpf__clear(void)
+{
+ struct bpf_perf_object *perf_obj, *tmp;
+
+ bpf_perf_object__for_each(perf_obj, tmp) {
+ bpf__unprobe(perf_obj->obj);
+ bpf_perf_object__close(perf_obj);
+ }
+
+ bpf_program_hash_free();
+ bpf_map_hash_free();
+}
+
+static size_t ptr_hash(const void *__key, void *ctx __maybe_unused)
+{
+ return (size_t) __key;
+}
+
+static bool ptr_equal(const void *key1, const void *key2,
+ void *ctx __maybe_unused)
+{
+ return key1 == key2;
+}
+
+static void *program_priv(const struct bpf_program *prog)
+{
+ void *priv;
+
+ if (IS_ERR_OR_NULL(bpf_program_hash))
+ return NULL;
+ if (!hashmap__find(bpf_program_hash, prog, &priv))
+ return NULL;
+ return priv;
+}
+
+static int program_set_priv(struct bpf_program *prog, void *priv)
+{
+ void *old_priv;
+
+ /*
+ * Should not happen, we warn about it in the
+ * caller function - config_bpf_program
+ */
+ if (IS_ERR(bpf_program_hash))
+ return PTR_ERR(bpf_program_hash);
+
+ if (!bpf_program_hash) {
+ bpf_program_hash = hashmap__new(ptr_hash, ptr_equal, NULL);
+ if (IS_ERR(bpf_program_hash))
+ return PTR_ERR(bpf_program_hash);
+ }
+
+ old_priv = program_priv(prog);
+ if (old_priv) {
+ clear_prog_priv(prog, old_priv);
+ return hashmap__set(bpf_program_hash, prog, priv, NULL, NULL);
+ }
+ return hashmap__add(bpf_program_hash, prog, priv);
+}
+
static int
prog_config__exec(const char *value, struct perf_probe_event *pev)
{
@@ -378,7 +511,7 @@ config_bpf_program(struct bpf_program *prog)
pr_debug("bpf: config '%s' is ok\n", config_str);
set_priv:
- err = bpf_program__set_priv(prog, priv, clear_prog_priv);
+ err = program_set_priv(prog, priv);
if (err) {
pr_debug("Failed to set priv for program '%s'\n", config_str);
goto errout;
@@ -419,7 +552,7 @@ preproc_gen_prologue(struct bpf_program *prog, int n,
struct bpf_insn *orig_insns, int orig_insns_cnt,
struct bpf_prog_prep_result *res)
{
- struct bpf_prog_priv *priv = bpf_program__priv(prog);
+ struct bpf_prog_priv *priv = program_priv(prog);
struct probe_trace_event *tev;
struct perf_probe_event *pev;
struct bpf_insn *buf;
@@ -570,7 +703,7 @@ static int map_prologue(struct perf_probe_event *pev, int *mapping,
static int hook_load_preprocessor(struct bpf_program *prog)
{
- struct bpf_prog_priv *priv = bpf_program__priv(prog);
+ struct bpf_prog_priv *priv = program_priv(prog);
struct perf_probe_event *pev;
bool need_prologue = false;
int err, i;
@@ -646,7 +779,7 @@ int bpf__probe(struct bpf_object *obj)
if (err)
goto out;
- priv = bpf_program__priv(prog);
+ priv = program_priv(prog);
if (IS_ERR_OR_NULL(priv)) {
if (!priv)
err = -BPF_LOADER_ERRNO__INTERNAL;
@@ -698,7 +831,7 @@ int bpf__unprobe(struct bpf_object *obj)
struct bpf_program *prog;
bpf_object__for_each_program(prog, obj) {
- struct bpf_prog_priv *priv = bpf_program__priv(prog);
+ struct bpf_prog_priv *priv = program_priv(prog);
int i;
if (IS_ERR_OR_NULL(priv) || priv->is_tp)
@@ -754,7 +887,7 @@ int bpf__foreach_event(struct bpf_object *obj,
int err;
bpf_object__for_each_program(prog, obj) {
- struct bpf_prog_priv *priv = bpf_program__priv(prog);
+ struct bpf_prog_priv *priv = program_priv(prog);
struct probe_trace_event *tev;
struct perf_probe_event *pev;
int i, fd;
@@ -850,7 +983,7 @@ bpf_map_priv__purge(struct bpf_map_priv *priv)
}
static void
-bpf_map_priv__clear(struct bpf_map *map __maybe_unused,
+bpf_map_priv__clear(const struct bpf_map *map __maybe_unused,
void *_priv)
{
struct bpf_map_priv *priv = _priv;
@@ -859,6 +992,53 @@ bpf_map_priv__clear(struct bpf_map *map __maybe_unused,
free(priv);
}
+static void *map_priv(const struct bpf_map *map)
+{
+ void *priv;
+
+ if (IS_ERR_OR_NULL(bpf_map_hash))
+ return NULL;
+ if (!hashmap__find(bpf_map_hash, map, &priv))
+ return NULL;
+ return priv;
+}
+
+static void bpf_map_hash_free(void)
+{
+ struct hashmap_entry *cur;
+ size_t bkt;
+
+ if (IS_ERR_OR_NULL(bpf_map_hash))
+ return;
+
+ hashmap__for_each_entry(bpf_map_hash, cur, bkt)
+ bpf_map_priv__clear(cur->key, cur->value);
+
+ hashmap__free(bpf_map_hash);
+ bpf_map_hash = NULL;
+}
+
+static int map_set_priv(struct bpf_map *map, void *priv)
+{
+ void *old_priv;
+
+ if (WARN_ON_ONCE(IS_ERR(bpf_map_hash)))
+ return PTR_ERR(bpf_program_hash);
+
+ if (!bpf_map_hash) {
+ bpf_map_hash = hashmap__new(ptr_hash, ptr_equal, NULL);
+ if (IS_ERR(bpf_map_hash))
+ return PTR_ERR(bpf_map_hash);
+ }
+
+ old_priv = map_priv(map);
+ if (old_priv) {
+ bpf_map_priv__clear(map, old_priv);
+ return hashmap__set(bpf_map_hash, map, priv, NULL, NULL);
+ }
+ return hashmap__add(bpf_map_hash, map, priv);
+}
+
static int
bpf_map_op_setkey(struct bpf_map_op *op, struct parse_events_term *term)
{
@@ -958,7 +1138,7 @@ static int
bpf_map__add_op(struct bpf_map *map, struct bpf_map_op *op)
{
const char *map_name = bpf_map__name(map);
- struct bpf_map_priv *priv = bpf_map__priv(map);
+ struct bpf_map_priv *priv = map_priv(map);
if (IS_ERR(priv)) {
pr_debug("Failed to get private from map %s\n", map_name);
@@ -973,7 +1153,7 @@ bpf_map__add_op(struct bpf_map *map, struct bpf_map_op *op)
}
INIT_LIST_HEAD(&priv->ops_list);
- if (bpf_map__set_priv(map, priv, bpf_map_priv__clear)) {
+ if (map_set_priv(map, priv)) {
free(priv);
return -BPF_LOADER_ERRNO__INTERNAL;
}
@@ -1305,7 +1485,7 @@ bpf_map_config_foreach_key(struct bpf_map *map,
int err, map_fd, type;
struct bpf_map_op *op;
const char *name = bpf_map__name(map);
- struct bpf_map_priv *priv = bpf_map__priv(map);
+ struct bpf_map_priv *priv = map_priv(map);
if (IS_ERR(priv)) {
pr_debug("ERROR: failed to get private from map %s\n", name);
@@ -1494,11 +1674,11 @@ apply_obj_config_object(struct bpf_object *obj)
int bpf__apply_obj_config(void)
{
- struct bpf_object *obj, *tmp;
+ struct bpf_perf_object *perf_obj, *tmp;
int err;
- bpf_object__for_each_safe(obj, tmp) {
- err = apply_obj_config_object(obj);
+ bpf_perf_object__for_each(perf_obj, tmp) {
+ err = apply_obj_config_object(perf_obj->obj);
if (err)
return err;
}
@@ -1506,27 +1686,25 @@ int bpf__apply_obj_config(void)
return 0;
}
-#define bpf__for_each_map(pos, obj, objtmp) \
- bpf_object__for_each_safe(obj, objtmp) \
- bpf_object__for_each_map(pos, obj)
+#define bpf__perf_for_each_map(map, pobj, tmp) \
+ bpf_perf_object__for_each(pobj, tmp) \
+ bpf_object__for_each_map(map, pobj->obj)
-#define bpf__for_each_map_named(pos, obj, objtmp, name) \
- bpf__for_each_map(pos, obj, objtmp) \
- if (bpf_map__name(pos) && \
- (strcmp(name, \
- bpf_map__name(pos)) == 0))
+#define bpf__perf_for_each_map_named(map, pobj, pobjtmp, name) \
+ bpf__perf_for_each_map(map, pobj, pobjtmp) \
+ if (bpf_map__name(map) && (strcmp(name, bpf_map__name(map)) == 0))
struct evsel *bpf__setup_output_event(struct evlist *evlist, const char *name)
{
struct bpf_map_priv *tmpl_priv = NULL;
- struct bpf_object *obj, *tmp;
+ struct bpf_perf_object *perf_obj, *tmp;
struct evsel *evsel = NULL;
struct bpf_map *map;
int err;
bool need_init = false;
- bpf__for_each_map_named(map, obj, tmp, name) {
- struct bpf_map_priv *priv = bpf_map__priv(map);
+ bpf__perf_for_each_map_named(map, perf_obj, tmp, name) {
+ struct bpf_map_priv *priv = map_priv(map);
if (IS_ERR(priv))
return ERR_PTR(-BPF_LOADER_ERRNO__INTERNAL);
@@ -1561,8 +1739,8 @@ struct evsel *bpf__setup_output_event(struct evlist *evlist, const char *name)
evsel = evlist__last(evlist);
}
- bpf__for_each_map_named(map, obj, tmp, name) {
- struct bpf_map_priv *priv = bpf_map__priv(map);
+ bpf__perf_for_each_map_named(map, perf_obj, tmp, name) {
+ struct bpf_map_priv *priv = map_priv(map);
if (IS_ERR(priv))
return ERR_PTR(-BPF_LOADER_ERRNO__INTERNAL);
@@ -1574,7 +1752,7 @@ struct evsel *bpf__setup_output_event(struct evlist *evlist, const char *name)
if (!priv)
return ERR_PTR(-ENOMEM);
- err = bpf_map__set_priv(map, priv, bpf_map_priv__clear);
+ err = map_set_priv(map, priv);
if (err) {
bpf_map_priv__clear(map, priv);
return ERR_PTR(err);
diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c
index d756cc66eef3..4f4d3aaff37c 100644
--- a/tools/perf/util/bpf_ftrace.c
+++ b/tools/perf/util/bpf_ftrace.c
@@ -81,6 +81,8 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
}
}
+ skel->bss->use_nsec = ftrace->use_nsec;
+
skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin,
false, func->name);
if (IS_ERR(skel->links.func_begin)) {
diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c
index ea94187fe443..9d01e3af7479 100644
--- a/tools/perf/util/bpf_skel/func_latency.bpf.c
+++ b/tools/perf/util/bpf_skel/func_latency.bpf.c
@@ -39,6 +39,7 @@ struct {
int enabled = 0;
int has_cpu = 0;
int has_task = 0;
+int use_nsec = 0;
SEC("kprobe/func")
int BPF_PROG(func_begin)
@@ -80,6 +81,7 @@ int BPF_PROG(func_end)
{
__u64 tid;
__u64 *start;
+ __u64 cmp_base = use_nsec ? 1 : 1000;
if (!enabled)
return 0;
@@ -97,9 +99,9 @@ int BPF_PROG(func_end)
if (delta < 0)
return 0;
- // calculate index using delta in usec
+ // calculate index using delta
for (key = 0; key < (NUM_BUCKET - 1); key++) {
- if (delta < ((1000UL) << key))
+ if (delta < (cmp_base << key))
break;
}
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index e32e8f2ff3bd..82f3d46bea70 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -706,7 +706,7 @@ build_id_cache__add(const char *sbuild_id, const char *name, const char *realnam
if (is_kallsyms) {
if (copyfile("/proc/kallsyms", filename))
goto out_free;
- } else if (nsi && nsi->need_setns) {
+ } else if (nsi && nsinfo__need_setns(nsi)) {
if (copyfile_ns(name, filename, nsi))
goto out_free;
} else if (link(realname, filename) && errno != EEXIST &&
@@ -730,7 +730,7 @@ build_id_cache__add(const char *sbuild_id, const char *name, const char *realnam
goto out_free;
}
if (access(filename, F_OK)) {
- if (nsi && nsi->need_setns) {
+ if (nsi && nsinfo__need_setns(nsi)) {
if (copyfile_ns(debugfile, filename,
nsi))
goto out_free;
@@ -762,7 +762,7 @@ build_id_cache__add(const char *sbuild_id, const char *name, const char *realnam
len = readlink(linkname, path, sizeof(path) - 1);
if (len <= 0) {
- pr_err("Cant read link: %s\n", linkname);
+ pr_err("Can't read link: %s\n", linkname);
goto out_free;
}
path[len] = '\0';
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 131207b91d15..5c27a4b2e7a7 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -1119,7 +1119,7 @@ int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *
goto out;
}
- if (al->maps == &al->maps->machine->kmaps) {
+ if (al->maps == machine__kernel_maps(al->maps->machine)) {
if (machine__is_host(al->maps->machine)) {
al->cpumode = PERF_RECORD_MISC_KERNEL;
al->level = 'k';
diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c
index 15a4547d608e..a5ace2bbc28d 100644
--- a/tools/perf/util/data.c
+++ b/tools/perf/util/data.c
@@ -48,12 +48,16 @@ int perf_data__create_dir(struct perf_data *data, int nr)
struct perf_data_file *file = &files[i];
ret = asprintf(&file->path, "%s/data.%d", data->path, i);
- if (ret < 0)
+ if (ret < 0) {
+ ret = -ENOMEM;
goto out_err;
+ }
ret = open(file->path, O_RDWR|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR);
- if (ret < 0)
+ if (ret < 0) {
+ ret = -errno;
goto out_err;
+ }
file->fd = ret;
}
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 9cc8a1772b4b..5ac13958d1bd 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -508,8 +508,19 @@ static int __open_dso(struct dso *dso, struct machine *machine)
root_dir, name, PATH_MAX))
goto out;
- if (!is_regular_file(name))
- goto out;
+ if (!is_regular_file(name)) {
+ char *new_name;
+
+ if (errno != ENOENT || dso->nsinfo == NULL)
+ goto out;
+
+ new_name = filename_with_chroot(dso->nsinfo->pid, name);
+ if (!new_name)
+ goto out;
+
+ free(name);
+ name = new_name;
+ }
if (dso__needs_decompress(dso)) {
char newpath[KMOD_DECOMP_LEN];
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index 011da3924fc1..3a9fd4d389b5 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -167,6 +167,7 @@ struct dso {
enum dso_load_errno load_errno;
u8 adjust_symbols:1;
u8 has_build_id:1;
+ u8 header_build_id:1;
u8 has_srcline:1;
u8 hit:1;
u8 annotate_warned:1;
diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c
index 183a81d5b2f9..b97366f77bbf 100644
--- a/tools/perf/util/dsos.c
+++ b/tools/perf/util/dsos.c
@@ -2,12 +2,15 @@
#include "debug.h"
#include "dsos.h"
#include "dso.h"
+#include "util.h"
#include "vdso.h"
#include "namespaces.h"
+#include <errno.h>
#include <libgen.h>
#include <stdlib.h>
#include <string.h>
#include <symbol.h> // filename__read_build_id
+#include <unistd.h>
static int __dso_id__cmp(struct dso_id *a, struct dso_id *b)
{
@@ -76,6 +79,16 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits)
if (filename__read_build_id(pos->long_name, &pos->bid) > 0) {
have_build_id = true;
pos->has_build_id = true;
+ } else if (errno == ENOENT && pos->nsinfo) {
+ char *new_name = filename_with_chroot(pos->nsinfo->pid,
+ pos->long_name);
+
+ if (new_name && filename__read_build_id(new_name,
+ &pos->bid) > 0) {
+ have_build_id = true;
+ pos->has_build_id = true;
+ }
+ free(new_name);
}
nsinfo__mountns_exit(&nsc);
}
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index fe24801f8e9f..6439c888ae38 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -484,7 +484,7 @@ size_t perf_event__fprintf_text_poke(union perf_event *event, struct machine *ma
if (machine) {
struct addr_location al;
- al.map = maps__find(&machine->kmaps, tp->addr);
+ al.map = maps__find(machine__kernel_maps(machine), tp->addr);
if (al.map && map__load(al.map) >= 0) {
al.addr = al.map->map_ip(al.map, tp->addr);
al.sym = map__find_symbol(al.map, al.addr);
@@ -587,13 +587,13 @@ struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr,
if (cpumode == PERF_RECORD_MISC_KERNEL && perf_host) {
al->level = 'k';
- al->maps = maps = &machine->kmaps;
+ al->maps = maps = machine__kernel_maps(machine);
load_map = true;
} else if (cpumode == PERF_RECORD_MISC_USER && perf_host) {
al->level = '.';
} else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL && perf_guest) {
al->level = 'g';
- al->maps = maps = &machine->kmaps;
+ al->maps = maps = machine__kernel_maps(machine);
load_map = true;
} else if (cpumode == PERF_RECORD_MISC_GUEST_USER && perf_guest) {
al->level = 'u';
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index c59331eea1d9..cdd72e05fd28 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -101,9 +101,11 @@ enum {
PERF_IP_FLAG_IN_TX = 1ULL << 10,
PERF_IP_FLAG_VMENTRY = 1ULL << 11,
PERF_IP_FLAG_VMEXIT = 1ULL << 12,
+ PERF_IP_FLAG_INTR_DISABLE = 1ULL << 13,
+ PERF_IP_FLAG_INTR_TOGGLE = 1ULL << 14,
};
-#define PERF_IP_FLAG_CHARS "bcrosyiABExgh"
+#define PERF_IP_FLAG_CHARS "bcrosyiABExghDt"
#define PERF_BRANCH_MASK (\
PERF_IP_FLAG_BRANCH |\
@@ -182,6 +184,8 @@ enum perf_synth_id {
PERF_SYNTH_INTEL_PWRX,
PERF_SYNTH_INTEL_CBR,
PERF_SYNTH_INTEL_PSB,
+ PERF_SYNTH_INTEL_EVT,
+ PERF_SYNTH_INTEL_IFLAG_CHG,
};
/*
@@ -280,6 +284,45 @@ struct perf_synth_intel_psb {
u64 offset;
};
+struct perf_synth_intel_evd {
+ union {
+ struct {
+ u8 evd_type;
+ u8 reserved[7];
+ };
+ u64 et;
+ };
+ u64 payload;
+};
+
+/* Intel PT Event Trace */
+struct perf_synth_intel_evt {
+ u32 padding;
+ union {
+ struct {
+ u32 type : 5,
+ reserved : 2,
+ ip : 1,
+ vector : 8,
+ evd_cnt : 16;
+ };
+ u32 cfe;
+ };
+ struct perf_synth_intel_evd evd[0];
+};
+
+struct perf_synth_intel_iflag_chg {
+ u32 padding;
+ union {
+ struct {
+ u32 iflag : 1,
+ via_branch : 1;
+ };
+ u32 flags;
+ };
+ u64 branch_ip; /* If via_branch */
+};
+
/*
* raw_data is always 4 bytes from an 8-byte boundary, so subtract 4 to get
* 8-byte alignment.
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 41a66a48cbdf..9bb79e049957 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -2145,6 +2145,22 @@ int evlist__ctlfd_process(struct evlist *evlist, enum evlist_ctl_cmd *cmd)
return err;
}
+int evlist__ctlfd_update(struct evlist *evlist, struct pollfd *update)
+{
+ int ctlfd_pos = evlist->ctl_fd.pos;
+ struct pollfd *entries = evlist->core.pollfd.entries;
+
+ if (!evlist__ctlfd_initialized(evlist))
+ return 0;
+
+ if (entries[ctlfd_pos].fd != update->fd ||
+ entries[ctlfd_pos].events != update->events)
+ return -1;
+
+ entries[ctlfd_pos].revents = update->revents;
+ return 0;
+}
+
struct evsel *evlist__find_evsel(struct evlist *evlist, int idx)
{
struct evsel *evsel;
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 64cba56fbc74..a21daaa5fc1b 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -410,6 +410,7 @@ void evlist__close_control(int ctl_fd, int ctl_fd_ack, bool *ctl_fd_close);
int evlist__initialize_ctlfd(struct evlist *evlist, int ctl_fd, int ctl_fd_ack);
int evlist__finalize_ctlfd(struct evlist *evlist);
bool evlist__ctlfd_initialized(struct evlist *evlist);
+int evlist__ctlfd_update(struct evlist *evlist, struct pollfd *update);
int evlist__ctlfd_process(struct evlist *evlist, enum evlist_ctl_cmd *cmd);
int evlist__ctlfd_ack(struct evlist *evlist);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 22d3267ce294..2a1729e7aee4 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -2847,9 +2847,23 @@ static bool find_process(const char *name)
return ret ? false : true;
}
+static bool is_amd(const char *arch, const char *cpuid)
+{
+ return arch && !strcmp("x86", arch) && cpuid && strstarts(cpuid, "AuthenticAMD");
+}
+
+static bool is_amd_ibs(struct evsel *evsel)
+{
+ return evsel->core.attr.precise_ip
+ || (evsel->pmu_name && !strncmp(evsel->pmu_name, "ibs", 3));
+}
+
int evsel__open_strerror(struct evsel *evsel, struct target *target,
int err, char *msg, size_t size)
{
+ struct perf_env *env = evsel__env(evsel);
+ const char *arch = perf_env__arch(env);
+ const char *cpuid = perf_env__cpuid(env);
char sbuf[STRERR_BUFSIZE];
int printed = 0, enforced = 0;
@@ -2909,6 +2923,10 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
"No such device - did you specify an out-of-range profile CPU?");
break;
case EOPNOTSUPP:
+ if (evsel->core.attr.sample_type & PERF_SAMPLE_BRANCH_STACK)
+ return scnprintf(msg, size,
+ "%s: PMU Hardware or event type doesn't support branch stack sampling.",
+ evsel__name(evsel));
if (evsel->core.attr.aux_output)
return scnprintf(msg, size,
"%s: PMU Hardware doesn't support 'aux_output' feature",
@@ -2949,6 +2967,17 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
return scnprintf(msg, size,
"Invalid event (%s) in per-thread mode, enable system wide with '-a'.",
evsel__name(evsel));
+ if (is_amd(arch, cpuid)) {
+ if (is_amd_ibs(evsel)) {
+ if (evsel->core.attr.exclude_kernel)
+ return scnprintf(msg, size,
+ "AMD IBS can't exclude kernel events. Try running at a higher privilege level.");
+ if (!evsel->core.system_wide)
+ return scnprintf(msg, size,
+ "AMD IBS may only be available in system-wide/per-cpu mode. Try using -a, or -C and workload affinity");
+ }
+ }
+
break;
case ENODATA:
return scnprintf(msg, size, "Cannot collect data source with the load latency event alone. "
@@ -2965,7 +2994,7 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
struct perf_env *evsel__env(struct evsel *evsel)
{
- if (evsel && evsel->evlist)
+ if (evsel && evsel->evlist && evsel->evlist->env)
return evsel->evlist->env;
return &perf_env;
}
diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h
index 887f68a185f7..a34cd15733b8 100644
--- a/tools/perf/util/ftrace.h
+++ b/tools/perf/util/ftrace.h
@@ -17,6 +17,7 @@ struct perf_ftrace {
struct list_head nograph_funcs;
unsigned long percpu_buffer_size;
bool inherit;
+ bool use_nsec;
int graph_depth;
int func_stack_trace;
int func_irq_info;
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 6da12e522edc..d546ff724dbe 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1335,7 +1335,7 @@ static int build_mem_topology(struct memory_node *nodes, u64 size, u64 *cntp)
dir = opendir(path);
if (!dir) {
- pr_debug2("%s: could't read %s, does this arch have topology information?\n",
+ pr_debug2("%s: couldn't read %s, does this arch have topology information?\n",
__func__, path);
return -1;
}
@@ -2200,6 +2200,7 @@ static int __event_process_build_id(struct perf_record_header_build_id *bev,
build_id__init(&bid, bev->data, size);
dso__set_build_id(dso, &bid);
+ dso->header_build_id = 1;
if (dso_space != DSO_SPACE__USER) {
struct kmod_path m = { .name = NULL, };
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 0a8033b09e28..1c085ab56534 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -124,6 +124,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
} else {
symlen = unresolved_col_width + 4 + 2;
hists__new_col_len(hists, HISTC_SYMBOL_FROM, symlen);
+ hists__new_col_len(hists, HISTC_ADDR_FROM, symlen);
hists__set_unres_dso_col_len(hists, HISTC_DSO_FROM);
}
@@ -138,6 +139,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
} else {
symlen = unresolved_col_width + 4 + 2;
hists__new_col_len(hists, HISTC_SYMBOL_TO, symlen);
+ hists__new_col_len(hists, HISTC_ADDR_TO, symlen);
hists__set_unres_dso_col_len(hists, HISTC_DSO_TO);
}
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 2a15e22fb89c..7ed4648d2fc2 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -77,6 +77,8 @@ enum hist_column {
HISTC_GLOBAL_INS_LAT,
HISTC_LOCAL_P_STAGE_CYC,
HISTC_GLOBAL_P_STAGE_CYC,
+ HISTC_ADDR_FROM,
+ HISTC_ADDR_TO,
HISTC_NR_COLS, /* Last entry */
};
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index 0e013c2d9eb4..e1d8f7504cbe 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -144,6 +144,8 @@ struct intel_pt_decoder {
bool vm_tm_corr_continuous;
bool nr;
bool next_nr;
+ bool iflag;
+ bool next_iflag;
enum intel_pt_param_flags flags;
uint64_t pos;
uint64_t last_ip;
@@ -213,6 +215,9 @@ struct intel_pt_decoder {
bool set_fup_pwre;
bool set_fup_exstop;
bool set_fup_bep;
+ bool set_fup_cfe_ip;
+ bool set_fup_cfe;
+ bool set_fup_mode_exec;
bool sample_cyc;
unsigned int fup_tx_flags;
unsigned int tx_flags;
@@ -223,6 +228,7 @@ struct intel_pt_decoder {
uint64_t timestamp_insn_cnt;
uint64_t sample_insn_cnt;
uint64_t stuck_ip;
+ struct intel_pt_pkt fup_cfe_pkt;
int max_loops;
int no_progress;
int stuck_ip_prd;
@@ -231,6 +237,8 @@ struct intel_pt_decoder {
const unsigned char *next_buf;
size_t next_len;
unsigned char temp_buf[INTEL_PT_PKT_MAX_SZ];
+ int evd_cnt;
+ struct intel_pt_evd evd[INTEL_PT_MAX_EVDS];
};
static uint64_t intel_pt_lower_power_of_2(uint64_t x)
@@ -820,6 +828,9 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
case INTEL_PT_BIP:
case INTEL_PT_BEP:
case INTEL_PT_BEP_IP:
+ case INTEL_PT_CFE:
+ case INTEL_PT_CFE_IP:
+ case INTEL_PT_EVD:
return 0;
case INTEL_PT_MTC:
@@ -1094,6 +1105,52 @@ static void intel_pt_sample_insn(struct intel_pt_decoder *decoder)
decoder->state.type |= INTEL_PT_INSTRUCTION;
}
+/*
+ * Sample FUP instruction at the same time as reporting the FUP event, so the
+ * instruction sample gets the same flags as the FUP event.
+ */
+static void intel_pt_sample_fup_insn(struct intel_pt_decoder *decoder)
+{
+ struct intel_pt_insn intel_pt_insn;
+ uint64_t max_insn_cnt, insn_cnt = 0;
+ int err;
+
+ decoder->state.insn_op = INTEL_PT_OP_OTHER;
+ decoder->state.insn_len = 0;
+
+ if (!decoder->branch_enable || !decoder->pge || decoder->hop ||
+ decoder->ip != decoder->last_ip)
+ return;
+
+ if (!decoder->mtc_insn)
+ decoder->mtc_insn = true;
+
+ max_insn_cnt = intel_pt_next_sample(decoder);
+ if (max_insn_cnt != 1)
+ return;
+
+ err = decoder->walk_insn(&intel_pt_insn, &insn_cnt, &decoder->ip,
+ 0, max_insn_cnt, decoder->data);
+ /* Ignore error, it will be reported next walk anyway */
+ if (err)
+ return;
+
+ if (intel_pt_insn.branch != INTEL_PT_BR_NO_BRANCH) {
+ intel_pt_log_at("ERROR: Unexpected branch at FUP instruction", decoder->ip);
+ return;
+ }
+
+ decoder->tot_insn_cnt += insn_cnt;
+ decoder->timestamp_insn_cnt += insn_cnt;
+ decoder->sample_insn_cnt += insn_cnt;
+ decoder->period_insn_cnt += insn_cnt;
+
+ intel_pt_sample_insn(decoder);
+
+ decoder->state.type |= INTEL_PT_INSTRUCTION;
+ decoder->ip += intel_pt_insn.length;
+}
+
static int intel_pt_walk_insn(struct intel_pt_decoder *decoder,
struct intel_pt_insn *intel_pt_insn, uint64_t ip)
{
@@ -1203,13 +1260,85 @@ out_no_progress:
return err;
}
-static bool intel_pt_fup_event(struct intel_pt_decoder *decoder)
+static void intel_pt_mode_exec_status(struct intel_pt_decoder *decoder)
+{
+ bool iflag = decoder->packet.count & INTEL_PT_IFLAG;
+
+ decoder->exec_mode = decoder->packet.payload;
+ decoder->iflag = iflag;
+ decoder->next_iflag = iflag;
+ decoder->state.from_iflag = iflag;
+ decoder->state.to_iflag = iflag;
+}
+
+static void intel_pt_mode_exec(struct intel_pt_decoder *decoder)
+{
+ bool iflag = decoder->packet.count & INTEL_PT_IFLAG;
+
+ decoder->exec_mode = decoder->packet.payload;
+ decoder->next_iflag = iflag;
+}
+
+static void intel_pt_sample_iflag(struct intel_pt_decoder *decoder)
+{
+ decoder->state.type |= INTEL_PT_IFLAG_CHG;
+ decoder->state.from_iflag = decoder->iflag;
+ decoder->state.to_iflag = decoder->next_iflag;
+ decoder->iflag = decoder->next_iflag;
+}
+
+static void intel_pt_sample_iflag_chg(struct intel_pt_decoder *decoder)
+{
+ if (decoder->iflag != decoder->next_iflag)
+ intel_pt_sample_iflag(decoder);
+}
+
+static void intel_pt_clear_fup_event(struct intel_pt_decoder *decoder)
+{
+ decoder->set_fup_tx_flags = false;
+ decoder->set_fup_ptw = false;
+ decoder->set_fup_mwait = false;
+ decoder->set_fup_pwre = false;
+ decoder->set_fup_exstop = false;
+ decoder->set_fup_bep = false;
+ decoder->set_fup_cfe_ip = false;
+ decoder->set_fup_cfe = false;
+ decoder->evd_cnt = 0;
+ decoder->set_fup_mode_exec = false;
+ decoder->iflag = decoder->next_iflag;
+}
+
+static bool intel_pt_fup_event(struct intel_pt_decoder *decoder, bool no_tip)
{
enum intel_pt_sample_type type = decoder->state.type;
+ bool sample_fup_insn = false;
bool ret = false;
decoder->state.type &= ~INTEL_PT_BRANCH;
+ if (decoder->set_fup_cfe_ip || decoder->set_fup_cfe) {
+ bool ip = decoder->set_fup_cfe_ip;
+
+ decoder->set_fup_cfe_ip = false;
+ decoder->set_fup_cfe = false;
+ decoder->state.type |= INTEL_PT_EVT;
+ if (!ip && decoder->pge)
+ decoder->state.type |= INTEL_PT_BRANCH;
+ decoder->state.cfe_type = decoder->fup_cfe_pkt.count;
+ decoder->state.cfe_vector = decoder->fup_cfe_pkt.payload;
+ decoder->state.evd_cnt = decoder->evd_cnt;
+ decoder->state.evd = decoder->evd;
+ decoder->evd_cnt = 0;
+ if (ip || decoder->pge)
+ decoder->state.flags |= INTEL_PT_FUP_IP;
+ ret = true;
+ }
+ if (decoder->set_fup_mode_exec) {
+ decoder->set_fup_mode_exec = false;
+ intel_pt_sample_iflag(decoder);
+ sample_fup_insn = no_tip;
+ ret = true;
+ }
if (decoder->set_fup_tx_flags) {
decoder->set_fup_tx_flags = false;
decoder->tx_flags = decoder->fup_tx_flags;
@@ -1266,6 +1395,8 @@ static bool intel_pt_fup_event(struct intel_pt_decoder *decoder)
if (ret) {
decoder->state.from_ip = decoder->ip;
decoder->state.to_ip = 0;
+ if (sample_fup_insn)
+ intel_pt_sample_fup_insn(decoder);
} else {
decoder->state.type = type;
}
@@ -1298,7 +1429,7 @@ static int intel_pt_walk_fup(struct intel_pt_decoder *decoder)
bool no_tip = decoder->pkt_state != INTEL_PT_STATE_FUP;
decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
- if (intel_pt_fup_event(decoder) && no_tip)
+ if (intel_pt_fup_event(decoder, no_tip) && no_tip)
return 0;
return -EAGAIN;
}
@@ -1350,6 +1481,7 @@ static int intel_pt_walk_tip(struct intel_pt_decoder *decoder)
return err;
intel_pt_update_nr(decoder);
+ intel_pt_sample_iflag_chg(decoder);
if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) {
if (decoder->pkt_state == INTEL_PT_STATE_TIP_PGD) {
@@ -1464,6 +1596,7 @@ static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder)
decoder->state.to_ip = decoder->last_ip;
decoder->ip = decoder->last_ip;
intel_pt_update_nr(decoder);
+ intel_pt_sample_iflag_chg(decoder);
return 0;
}
@@ -1527,6 +1660,19 @@ static int intel_pt_mode_tsx(struct intel_pt_decoder *decoder, bool *no_tip)
return 0;
}
+static int intel_pt_evd(struct intel_pt_decoder *decoder)
+{
+ if (decoder->evd_cnt >= INTEL_PT_MAX_EVDS) {
+ intel_pt_log_at("ERROR: Too many EVD packets", decoder->pos);
+ return -ENOSYS;
+ }
+ decoder->evd[decoder->evd_cnt++] = (struct intel_pt_evd){
+ .type = decoder->packet.count,
+ .payload = decoder->packet.payload,
+ };
+ return 0;
+}
+
static uint64_t intel_pt_8b_tsc(uint64_t timestamp, uint64_t ref_timestamp)
{
timestamp |= (ref_timestamp & (0xffULL << 56));
@@ -1620,12 +1766,7 @@ static int intel_pt_overflow(struct intel_pt_decoder *decoder)
decoder->state.from_ip = decoder->ip;
decoder->ip = 0;
decoder->pge = false;
- decoder->set_fup_tx_flags = false;
- decoder->set_fup_ptw = false;
- decoder->set_fup_mwait = false;
- decoder->set_fup_pwre = false;
- decoder->set_fup_exstop = false;
- decoder->set_fup_bep = false;
+ intel_pt_clear_fup_event(decoder);
decoder->overflow = true;
return -EOVERFLOW;
}
@@ -1873,6 +2014,9 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
case INTEL_PT_BIP:
case INTEL_PT_BEP:
case INTEL_PT_BEP_IP:
+ case INTEL_PT_CFE:
+ case INTEL_PT_CFE_IP:
+ case INTEL_PT_EVD:
decoder->have_tma = false;
intel_pt_log("ERROR: Unexpected packet\n");
err = -EAGAIN;
@@ -1895,7 +2039,7 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
break;
case INTEL_PT_MODE_EXEC:
- decoder->exec_mode = decoder->packet.payload;
+ intel_pt_mode_exec_status(decoder);
break;
case INTEL_PT_PIP:
@@ -1975,6 +2119,9 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
case INTEL_PT_BIP:
case INTEL_PT_BEP:
case INTEL_PT_BEP_IP:
+ case INTEL_PT_CFE:
+ case INTEL_PT_CFE_IP:
+ case INTEL_PT_EVD:
intel_pt_log("ERROR: Missing TIP after FUP\n");
decoder->pkt_state = INTEL_PT_STATE_ERR3;
decoder->pkt_step = 0;
@@ -2026,6 +2173,7 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
decoder->state.to_ip = decoder->ip;
}
intel_pt_update_nr(decoder);
+ intel_pt_sample_iflag_chg(decoder);
return 0;
case INTEL_PT_PIP:
@@ -2043,7 +2191,7 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
break;
case INTEL_PT_MODE_EXEC:
- decoder->exec_mode = decoder->packet.payload;
+ intel_pt_mode_exec(decoder);
break;
case INTEL_PT_VMCS:
@@ -2134,6 +2282,9 @@ static int intel_pt_vm_psb_lookahead_cb(struct intel_pt_pkt_info *pkt_info)
case INTEL_PT_TIP:
case INTEL_PT_PSB:
case INTEL_PT_TRACESTOP:
+ case INTEL_PT_CFE:
+ case INTEL_PT_CFE_IP:
+ case INTEL_PT_EVD:
default:
return 1;
}
@@ -2653,6 +2804,9 @@ static int intel_pt_vm_time_correlation(struct intel_pt_decoder *decoder)
decoder->blk_type = 0;
break;
+ case INTEL_PT_CFE:
+ case INTEL_PT_CFE_IP:
+ case INTEL_PT_EVD:
case INTEL_PT_MODE_EXEC:
case INTEL_PT_MODE_TSX:
case INTEL_PT_MNT:
@@ -2719,6 +2873,7 @@ static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, in
decoder->state.from_ip = decoder->ip;
decoder->state.to_ip = 0;
intel_pt_update_nr(decoder);
+ intel_pt_sample_iflag_chg(decoder);
return HOP_RETURN;
case INTEL_PT_FUP:
@@ -2733,10 +2888,10 @@ static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, in
decoder->state.type = INTEL_PT_INSTRUCTION;
decoder->state.from_ip = decoder->ip;
decoder->state.to_ip = 0;
- intel_pt_fup_event(decoder);
+ intel_pt_fup_event(decoder, *no_tip);
return HOP_RETURN;
}
- intel_pt_fup_event(decoder);
+ intel_pt_fup_event(decoder, *no_tip);
decoder->state.type |= INTEL_PT_INSTRUCTION | INTEL_PT_BRANCH;
*err = intel_pt_walk_fup_tip(decoder);
if (!*err && decoder->state.to_ip)
@@ -2789,6 +2944,9 @@ static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, in
case INTEL_PT_BIP:
case INTEL_PT_BEP:
case INTEL_PT_BEP_IP:
+ case INTEL_PT_CFE:
+ case INTEL_PT_CFE_IP:
+ case INTEL_PT_EVD:
default:
return HOP_PROCESS;
}
@@ -2857,6 +3015,9 @@ static int intel_pt_psb_lookahead_cb(struct intel_pt_pkt_info *pkt_info)
case INTEL_PT_BIP:
case INTEL_PT_BEP:
case INTEL_PT_BEP_IP:
+ case INTEL_PT_CFE:
+ case INTEL_PT_CFE_IP:
+ case INTEL_PT_EVD:
if (data->after_psbend) {
data->after_psbend -= 1;
if (!data->after_psbend)
@@ -2994,6 +3155,7 @@ next:
decoder->pos);
break;
}
+ intel_pt_sample_iflag_chg(decoder);
intel_pt_set_ip(decoder);
decoder->state.from_ip = 0;
decoder->state.to_ip = decoder->ip;
@@ -3026,7 +3188,7 @@ next:
intel_pt_set_last_ip(decoder);
if (!decoder->branch_enable || !decoder->pge) {
decoder->ip = decoder->last_ip;
- if (intel_pt_fup_event(decoder))
+ if (intel_pt_fup_event(decoder, no_tip))
return 0;
no_tip = false;
break;
@@ -3108,8 +3270,15 @@ next:
break;
case INTEL_PT_MODE_EXEC:
- decoder->exec_mode = decoder->packet.payload;
- break;
+ intel_pt_mode_exec(decoder);
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ return err;
+ if (decoder->packet.type == INTEL_PT_FUP) {
+ decoder->set_fup_mode_exec = true;
+ no_tip = true;
+ }
+ goto next;
case INTEL_PT_MODE_TSX:
/* MODE_TSX need not be followed by FUP */
@@ -3223,6 +3392,35 @@ next:
}
goto next;
+ case INTEL_PT_CFE:
+ decoder->fup_cfe_pkt = decoder->packet;
+ decoder->set_fup_cfe = true;
+ if (!decoder->pge) {
+ intel_pt_fup_event(decoder, true);
+ return 0;
+ }
+ break;
+
+ case INTEL_PT_CFE_IP:
+ decoder->fup_cfe_pkt = decoder->packet;
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ return err;
+ if (decoder->packet.type == INTEL_PT_FUP) {
+ decoder->set_fup_cfe_ip = true;
+ no_tip = true;
+ } else {
+ intel_pt_log_at("ERROR: Missing FUP after CFE",
+ decoder->pos);
+ }
+ goto next;
+
+ case INTEL_PT_EVD:
+ err = intel_pt_evd(decoder);
+ if (err)
+ return err;
+ break;
+
default:
return intel_pt_bug(decoder);
}
@@ -3265,6 +3463,9 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
case INTEL_PT_BIP:
case INTEL_PT_BEP:
case INTEL_PT_BEP_IP:
+ case INTEL_PT_CFE:
+ case INTEL_PT_CFE_IP:
+ case INTEL_PT_EVD:
intel_pt_log("ERROR: Unexpected packet\n");
err = -ENOENT;
goto out;
@@ -3307,7 +3508,7 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
break;
case INTEL_PT_MODE_EXEC:
- decoder->exec_mode = decoder->packet.payload;
+ intel_pt_mode_exec_status(decoder);
break;
case INTEL_PT_MODE_TSX:
@@ -3426,7 +3627,7 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
break;
case INTEL_PT_MODE_EXEC:
- decoder->exec_mode = decoder->packet.payload;
+ intel_pt_mode_exec_status(decoder);
break;
case INTEL_PT_MODE_TSX:
@@ -3476,6 +3677,9 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
case INTEL_PT_BIP:
case INTEL_PT_BEP:
case INTEL_PT_BEP_IP:
+ case INTEL_PT_CFE:
+ case INTEL_PT_CFE_IP:
+ case INTEL_PT_EVD:
default:
break;
}
@@ -3486,12 +3690,7 @@ static int intel_pt_sync_ip(struct intel_pt_decoder *decoder)
{
int err;
- decoder->set_fup_tx_flags = false;
- decoder->set_fup_ptw = false;
- decoder->set_fup_mwait = false;
- decoder->set_fup_pwre = false;
- decoder->set_fup_exstop = false;
- decoder->set_fup_bep = false;
+ intel_pt_clear_fup_event(decoder);
decoder->overflow = false;
if (!decoder->branch_enable) {
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
index 8fd68f7a0963..efb2cb3ae0ca 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
@@ -17,6 +17,7 @@
#define INTEL_PT_IN_TX (1 << 0)
#define INTEL_PT_ABORT_TX (1 << 1)
+#define INTEL_PT_IFLAG (1 << 2)
#define INTEL_PT_ASYNC (1 << 2)
#define INTEL_PT_FUP_IP (1 << 3)
#define INTEL_PT_SAMPLE_IPC (1 << 4)
@@ -35,6 +36,8 @@ enum intel_pt_sample_type {
INTEL_PT_TRACE_END = 1 << 10,
INTEL_PT_BLK_ITEMS = 1 << 11,
INTEL_PT_PSB_EVT = 1 << 12,
+ INTEL_PT_EVT = 1 << 13,
+ INTEL_PT_IFLAG_CHG = 1 << 14,
};
enum intel_pt_period_type {
@@ -209,10 +212,24 @@ struct intel_pt_vmcs_info {
bool error_printed;
};
+/*
+ * Maximum number of event trace data in one go, assuming at most 1 per type
+ * and 6-bits of type in the EVD packet.
+ */
+#define INTEL_PT_MAX_EVDS 64
+
+/* Event trace data from EVD packet */
+struct intel_pt_evd {
+ int type;
+ uint64_t payload;
+};
+
struct intel_pt_state {
enum intel_pt_sample_type type;
bool from_nr;
bool to_nr;
+ bool from_iflag;
+ bool to_iflag;
int err;
uint64_t from_ip;
uint64_t to_ip;
@@ -234,6 +251,10 @@ struct intel_pt_state {
int insn_len;
char insn[INTEL_PT_INSN_BUF_SZ];
struct intel_pt_blk_items items;
+ int cfe_type;
+ int cfe_vector;
+ int evd_cnt;
+ struct intel_pt_evd *evd;
};
struct intel_pt_insn;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
index 4bd154848cad..18f97f43e01a 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
@@ -64,6 +64,9 @@ static const char * const packet_name[] = {
[INTEL_PT_BIP] = "BIP",
[INTEL_PT_BEP] = "BEP",
[INTEL_PT_BEP_IP] = "BEP",
+ [INTEL_PT_CFE] = "CFE",
+ [INTEL_PT_CFE_IP] = "CFE",
+ [INTEL_PT_EVD] = "EVD",
};
const char *intel_pt_pkt_name(enum intel_pt_pkt_type type)
@@ -197,8 +200,7 @@ static int intel_pt_get_mnt(const unsigned char *buf, size_t len,
return INTEL_PT_NEED_MORE_BYTES;
packet->type = INTEL_PT_MNT;
memcpy_le64(&packet->payload, buf + 3, 8);
- return 11
-;
+ return 11;
}
static int intel_pt_get_3byte(const unsigned char *buf, size_t len,
@@ -329,6 +331,29 @@ static int intel_pt_get_bep_ip(size_t len, struct intel_pt_pkt *packet)
return 2;
}
+static int intel_pt_get_cfe(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 4)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = buf[2] & 0x80 ? INTEL_PT_CFE_IP : INTEL_PT_CFE;
+ packet->count = buf[2] & 0x1f;
+ packet->payload = buf[3];
+ return 4;
+}
+
+static int intel_pt_get_evd(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 11)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_EVD;
+ packet->count = buf[2] & 0x3f;
+ packet->payload = buf[3];
+ memcpy_le64(&packet->payload, buf + 3, 8);
+ return 11;
+}
+
static int intel_pt_get_ext(const unsigned char *buf, size_t len,
struct intel_pt_pkt *packet)
{
@@ -375,6 +400,10 @@ static int intel_pt_get_ext(const unsigned char *buf, size_t len,
return intel_pt_get_bep(len, packet);
case 0xb3: /* BEP with IP */
return intel_pt_get_bep_ip(len, packet);
+ case 0x13: /* CFE */
+ return intel_pt_get_cfe(buf, len, packet);
+ case 0x53: /* EVD */
+ return intel_pt_get_evd(buf, len, packet);
default:
return INTEL_PT_BAD_PACKET;
}
@@ -475,6 +504,7 @@ static int intel_pt_get_mode(const unsigned char *buf, size_t len,
switch (buf[1] >> 5) {
case 0:
packet->type = INTEL_PT_MODE_EXEC;
+ packet->count = buf[1];
switch (buf[1] & 3) {
case 0:
packet->payload = 16;
@@ -624,6 +654,9 @@ void intel_pt_upd_pkt_ctx(const struct intel_pt_pkt *packet,
case INTEL_PT_MWAIT:
case INTEL_PT_BEP:
case INTEL_PT_BEP_IP:
+ case INTEL_PT_CFE:
+ case INTEL_PT_CFE_IP:
+ case INTEL_PT_EVD:
*ctx = INTEL_PT_NO_CTX;
break;
case INTEL_PT_BBP:
@@ -709,7 +742,8 @@ int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf,
return snprintf(buf, buf_len, "%s CTC 0x%x FC 0x%x", name,
(unsigned)payload, packet->count);
case INTEL_PT_MODE_EXEC:
- return snprintf(buf, buf_len, "%s %lld", name, payload);
+ return snprintf(buf, buf_len, "%s IF:%d %lld",
+ name, !!(packet->count & 4), payload);
case INTEL_PT_MODE_TSX:
return snprintf(buf, buf_len, "%s TXAbort:%u InTX:%u",
name, (unsigned)(payload >> 1) & 1,
@@ -751,6 +785,13 @@ int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf,
case INTEL_PT_BIP:
return snprintf(buf, buf_len, "%s ID 0x%02x Value 0x%llx",
name, packet->count, payload);
+ case INTEL_PT_CFE:
+ case INTEL_PT_CFE_IP:
+ return snprintf(buf, buf_len, "%s IP:%d Type 0x%02x Vector 0x%llx",
+ name, packet->type == INTEL_PT_CFE_IP, packet->count, payload);
+ case INTEL_PT_EVD:
+ return snprintf(buf, buf_len, "%s Type 0x%02x Payload 0x%llx",
+ name, packet->count, payload);
default:
break;
}
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
index 996090cb84f6..496ba4be875c 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
@@ -56,6 +56,9 @@ enum intel_pt_pkt_type {
INTEL_PT_BIP,
INTEL_PT_BEP,
INTEL_PT_BEP_IP,
+ INTEL_PT_CFE,
+ INTEL_PT_CFE_IP,
+ INTEL_PT_EVD,
};
struct intel_pt_pkt {
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index e8613cbda331..ec43d364d0de 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -46,6 +46,12 @@
#define MAX_TIMESTAMP (~0ULL)
+#define INTEL_PT_CFG_PASS_THRU BIT_ULL(0)
+#define INTEL_PT_CFG_PWR_EVT_EN BIT_ULL(4)
+#define INTEL_PT_CFG_BRANCH_EN BIT_ULL(13)
+#define INTEL_PT_CFG_EVT_EN BIT_ULL(31)
+#define INTEL_PT_CFG_TNT_DIS BIT_ULL(55)
+
struct range {
u64 start;
u64 end;
@@ -71,6 +77,7 @@ struct intel_pt {
bool mispred_all;
bool use_thread_stack;
bool callstack;
+ bool cap_event_trace;
unsigned int br_stack_sz;
unsigned int br_stack_sz_plus;
int have_sched_switch;
@@ -115,6 +122,12 @@ struct intel_pt {
bool sample_pebs;
struct evsel *pebs_evsel;
+ u64 evt_sample_type;
+ u64 evt_id;
+
+ u64 iflag_chg_sample_type;
+ u64 iflag_chg_id;
+
u64 tsc_bit;
u64 mtc_bit;
u64 mtc_freq_bits;
@@ -953,12 +966,26 @@ static bool intel_pt_branch_enable(struct intel_pt *pt)
evlist__for_each_entry(pt->session->evlist, evsel) {
if (intel_pt_get_config(pt, &evsel->core.attr, &config) &&
- (config & 1) && !(config & 0x2000))
+ (config & INTEL_PT_CFG_PASS_THRU) &&
+ !(config & INTEL_PT_CFG_BRANCH_EN))
return false;
}
return true;
}
+static bool intel_pt_disabled_tnt(struct intel_pt *pt)
+{
+ struct evsel *evsel;
+ u64 config;
+
+ evlist__for_each_entry(pt->session->evlist, evsel) {
+ if (intel_pt_get_config(pt, &evsel->core.attr, &config) &&
+ config & INTEL_PT_CFG_TNT_DIS)
+ return true;
+ }
+ return false;
+}
+
static unsigned int intel_pt_mtc_period(struct intel_pt *pt)
{
struct evsel *evsel;
@@ -1214,6 +1241,10 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
params.first_timestamp = pt->first_timestamp;
params.max_loops = pt->max_loops;
+ /* Cannot walk code without TNT, so force 'quick' mode */
+ if (params.branch_enable && intel_pt_disabled_tnt(pt) && !params.quick)
+ params.quick = 1;
+
if (pt->filts.cnt > 0)
params.pgd_ip = intel_pt_pgd_ip;
@@ -1316,6 +1347,8 @@ static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt,
static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
{
+ struct intel_pt *pt = ptq->pt;
+
ptq->insn_len = 0;
if (ptq->state->flags & INTEL_PT_ABORT_TX) {
ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT;
@@ -1346,6 +1379,17 @@ static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
ptq->flags |= PERF_IP_FLAG_TRACE_BEGIN;
if (ptq->state->type & INTEL_PT_TRACE_END)
ptq->flags |= PERF_IP_FLAG_TRACE_END;
+
+ if (pt->cap_event_trace) {
+ if (ptq->state->type & INTEL_PT_IFLAG_CHG) {
+ if (!ptq->state->from_iflag)
+ ptq->flags |= PERF_IP_FLAG_INTR_DISABLE;
+ if (ptq->state->from_iflag != ptq->state->to_iflag)
+ ptq->flags |= PERF_IP_FLAG_INTR_TOGGLE;
+ } else if (!ptq->state->to_iflag) {
+ ptq->flags |= PERF_IP_FLAG_INTR_DISABLE;
+ }
+ }
}
static void intel_pt_setup_time_range(struct intel_pt *pt,
@@ -2160,6 +2204,78 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq)
return err;
}
+static int intel_pt_synth_events_sample(struct intel_pt_queue *ptq)
+{
+ struct intel_pt *pt = ptq->pt;
+ union perf_event *event = ptq->event_buf;
+ struct perf_sample sample = { .ip = 0, };
+ struct {
+ struct perf_synth_intel_evt cfe;
+ struct perf_synth_intel_evd evd[INTEL_PT_MAX_EVDS];
+ } raw;
+ int i;
+
+ if (intel_pt_skip_event(pt))
+ return 0;
+
+ intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+ sample.id = ptq->pt->evt_id;
+ sample.stream_id = ptq->pt->evt_id;
+
+ raw.cfe.type = ptq->state->cfe_type;
+ raw.cfe.reserved = 0;
+ raw.cfe.ip = !!(ptq->state->flags & INTEL_PT_FUP_IP);
+ raw.cfe.vector = ptq->state->cfe_vector;
+ raw.cfe.evd_cnt = ptq->state->evd_cnt;
+
+ for (i = 0; i < ptq->state->evd_cnt; i++) {
+ raw.evd[i].et = 0;
+ raw.evd[i].evd_type = ptq->state->evd[i].type;
+ raw.evd[i].payload = ptq->state->evd[i].payload;
+ }
+
+ sample.raw_size = perf_synth__raw_size(raw) +
+ ptq->state->evd_cnt * sizeof(struct perf_synth_intel_evd);
+ sample.raw_data = perf_synth__raw_data(&raw);
+
+ return intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->evt_sample_type);
+}
+
+static int intel_pt_synth_iflag_chg_sample(struct intel_pt_queue *ptq)
+{
+ struct intel_pt *pt = ptq->pt;
+ union perf_event *event = ptq->event_buf;
+ struct perf_sample sample = { .ip = 0, };
+ struct perf_synth_intel_iflag_chg raw;
+
+ if (intel_pt_skip_event(pt))
+ return 0;
+
+ intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+ sample.id = ptq->pt->iflag_chg_id;
+ sample.stream_id = ptq->pt->iflag_chg_id;
+
+ raw.flags = 0;
+ raw.iflag = ptq->state->to_iflag;
+
+ if (ptq->state->type & INTEL_PT_BRANCH) {
+ raw.via_branch = 1;
+ raw.branch_ip = ptq->state->to_ip;
+ } else {
+ sample.addr = 0;
+ }
+ sample.flags = ptq->flags;
+
+ sample.raw_size = perf_synth__raw_size(raw);
+ sample.raw_data = perf_synth__raw_data(&raw);
+
+ return intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->iflag_chg_sample_type);
+}
+
static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
pid_t pid, pid_t tid, u64 ip, u64 timestamp)
{
@@ -2266,6 +2382,19 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
return err;
}
+ if (pt->synth_opts.intr_events) {
+ if (state->type & INTEL_PT_EVT) {
+ err = intel_pt_synth_events_sample(ptq);
+ if (err)
+ return err;
+ }
+ if (state->type & INTEL_PT_IFLAG_CHG) {
+ err = intel_pt_synth_iflag_chg_sample(ptq);
+ if (err)
+ return err;
+ }
+ }
+
if (pt->sample_pwr_events) {
if (state->type & INTEL_PT_PSB_EVT) {
err = intel_pt_synth_psb_sample(ptq);
@@ -3429,7 +3558,7 @@ static int intel_pt_synth_events(struct intel_pt *pt,
id += 1;
}
- if (pt->synth_opts.pwr_events && (evsel->core.attr.config & 0x10)) {
+ if (pt->synth_opts.pwr_events && (evsel->core.attr.config & INTEL_PT_CFG_PWR_EVT_EN)) {
attr.config = PERF_SYNTH_INTEL_MWAIT;
err = intel_pt_synth_event(session, "mwait", &attr, id);
if (err)
@@ -3463,6 +3592,28 @@ static int intel_pt_synth_events(struct intel_pt *pt,
id += 1;
}
+ if (pt->synth_opts.intr_events && (evsel->core.attr.config & INTEL_PT_CFG_EVT_EN)) {
+ attr.config = PERF_SYNTH_INTEL_EVT;
+ err = intel_pt_synth_event(session, "evt", &attr, id);
+ if (err)
+ return err;
+ pt->evt_sample_type = attr.sample_type;
+ pt->evt_id = id;
+ intel_pt_set_event_name(evlist, id, "evt");
+ id += 1;
+ }
+
+ if (pt->synth_opts.intr_events && pt->cap_event_trace) {
+ attr.config = PERF_SYNTH_INTEL_IFLAG_CHG;
+ err = intel_pt_synth_event(session, "iflag", &attr, id);
+ if (err)
+ return err;
+ pt->iflag_chg_sample_type = attr.sample_type;
+ pt->iflag_chg_id = id;
+ intel_pt_set_event_name(evlist, id, "iflag");
+ id += 1;
+ }
+
return 0;
}
@@ -3790,7 +3941,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
}
info = &auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] + 1;
- info_end = (void *)info + auxtrace_info->header.size;
+ info_end = (void *)auxtrace_info + auxtrace_info->header.size;
if (intel_pt_has(auxtrace_info, INTEL_PT_FILTER_STR_LEN)) {
size_t len;
@@ -3829,6 +3980,13 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
intel_pt_print_info_str("Filter string", pt->filter);
}
+ if ((void *)info < info_end) {
+ pt->cap_event_trace = *info++;
+ if (dump_trace)
+ fprintf(stdout, " Cap Event Trace %d\n",
+ pt->cap_event_trace);
+ }
+
pt->timeless_decoding = intel_pt_timeless_decoding(pt);
if (pt->timeless_decoding && !pt->tc.time_mult)
pt->tc.time_mult = 1;
diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c
index 917a9c707371..a23255773c60 100644
--- a/tools/perf/util/jitdump.c
+++ b/tools/perf/util/jitdump.c
@@ -382,15 +382,15 @@ jit_inject_event(struct jit_buf_desc *jd, union perf_event *event)
static pid_t jr_entry_pid(struct jit_buf_desc *jd, union jr_entry *jr)
{
- if (jd->nsi && jd->nsi->in_pidns)
- return jd->nsi->tgid;
+ if (jd->nsi && nsinfo__in_pidns(jd->nsi))
+ return nsinfo__tgid(jd->nsi);
return jr->load.pid;
}
static pid_t jr_entry_tid(struct jit_buf_desc *jd, union jr_entry *jr)
{
- if (jd->nsi && jd->nsi->in_pidns)
- return jd->nsi->pid;
+ if (jd->nsi && nsinfo__in_pidns(jd->nsi))
+ return nsinfo__pid(jd->nsi);
return jr->load.tid;
}
@@ -779,7 +779,7 @@ jit_detect(char *mmap_name, pid_t pid, struct nsinfo *nsi)
* pid does not match mmap pid
* pid==0 in system-wide mode (synthesized)
*/
- if (pid && pid2 != nsi->nstgid)
+ if (pid && pid2 != nsinfo__nstgid(nsi))
return -1;
/*
* validate suffix
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 394550003693..b80048546451 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -89,7 +89,10 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
int err = -ENOMEM;
memset(machine, 0, sizeof(*machine));
- maps__init(&machine->kmaps, machine);
+ machine->kmaps = maps__new(machine);
+ if (machine->kmaps == NULL)
+ return -ENOMEM;
+
RB_CLEAR_NODE(&machine->rb_node);
dsos__init(&machine->dsos);
@@ -108,7 +111,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
machine->root_dir = strdup(root_dir);
if (machine->root_dir == NULL)
- return -ENOMEM;
+ goto out;
if (machine__set_mmap_name(machine))
goto out;
@@ -131,6 +134,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
out:
if (err) {
+ zfree(&machine->kmaps);
zfree(&machine->root_dir);
zfree(&machine->mmap_name);
}
@@ -220,7 +224,7 @@ void machine__exit(struct machine *machine)
return;
machine__destroy_kernel_maps(machine);
- maps__exit(&machine->kmaps);
+ maps__delete(machine->kmaps);
dsos__exit(&machine->dsos);
machine__exit_vdso(machine);
zfree(&machine->root_dir);
@@ -778,7 +782,7 @@ static int machine__process_ksymbol_register(struct machine *machine,
struct perf_sample *sample __maybe_unused)
{
struct symbol *sym;
- struct map *map = maps__find(&machine->kmaps, event->ksymbol.addr);
+ struct map *map = maps__find(machine__kernel_maps(machine), event->ksymbol.addr);
if (!map) {
struct dso *dso = dso__new(event->ksymbol.name);
@@ -801,7 +805,7 @@ static int machine__process_ksymbol_register(struct machine *machine,
map->start = event->ksymbol.addr;
map->end = map->start + event->ksymbol.len;
- maps__insert(&machine->kmaps, map);
+ maps__insert(machine__kernel_maps(machine), map);
map__put(map);
dso__set_loaded(dso);
@@ -827,12 +831,12 @@ static int machine__process_ksymbol_unregister(struct machine *machine,
struct symbol *sym;
struct map *map;
- map = maps__find(&machine->kmaps, event->ksymbol.addr);
+ map = maps__find(machine__kernel_maps(machine), event->ksymbol.addr);
if (!map)
return 0;
if (map != machine->vmlinux_map)
- maps__remove(&machine->kmaps, map);
+ maps__remove(machine__kernel_maps(machine), map);
else {
sym = dso__find_symbol(map->dso, map->map_ip(map, map->start));
if (sym)
@@ -858,7 +862,7 @@ int machine__process_ksymbol(struct machine *machine __maybe_unused,
int machine__process_text_poke(struct machine *machine, union perf_event *event,
struct perf_sample *sample __maybe_unused)
{
- struct map *map = maps__find(&machine->kmaps, event->text_poke.addr);
+ struct map *map = maps__find(machine__kernel_maps(machine), event->text_poke.addr);
u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
if (dump_trace)
@@ -914,7 +918,7 @@ static struct map *machine__addnew_module_map(struct machine *machine, u64 start
if (map == NULL)
goto out;
- maps__insert(&machine->kmaps, map);
+ maps__insert(machine__kernel_maps(machine), map);
/* Put the map here because maps__insert already got it */
map__put(map);
@@ -1100,7 +1104,7 @@ int machine__create_extra_kernel_map(struct machine *machine,
strlcpy(kmap->name, xm->name, KMAP_NAME_LEN);
- maps__insert(&machine->kmaps, map);
+ maps__insert(machine__kernel_maps(machine), map);
pr_debug2("Added extra kernel map %s %" PRIx64 "-%" PRIx64 "\n",
kmap->name, map->start, map->end);
@@ -1145,7 +1149,7 @@ static u64 find_entry_trampoline(struct dso *dso)
int machine__map_x86_64_entry_trampolines(struct machine *machine,
struct dso *kernel)
{
- struct maps *kmaps = &machine->kmaps;
+ struct maps *kmaps = machine__kernel_maps(machine);
int nr_cpus_avail, cpu;
bool found = false;
struct map *map;
@@ -1215,7 +1219,7 @@ __machine__create_kernel_maps(struct machine *machine, struct dso *kernel)
return -1;
machine->vmlinux_map->map_ip = machine->vmlinux_map->unmap_ip = identity__map_ip;
- maps__insert(&machine->kmaps, machine->vmlinux_map);
+ maps__insert(machine__kernel_maps(machine), machine->vmlinux_map);
return 0;
}
@@ -1228,7 +1232,7 @@ void machine__destroy_kernel_maps(struct machine *machine)
return;
kmap = map__kmap(map);
- maps__remove(&machine->kmaps, map);
+ maps__remove(machine__kernel_maps(machine), map);
if (kmap && kmap->ref_reloc_sym) {
zfree((char **)&kmap->ref_reloc_sym->name);
zfree(&kmap->ref_reloc_sym);
@@ -1323,7 +1327,7 @@ int machine__load_kallsyms(struct machine *machine, const char *filename)
* kernel, with modules between them, fixup the end of all
* sections.
*/
- maps__fixup_end(&machine->kmaps);
+ maps__fixup_end(machine__kernel_maps(machine));
}
return ret;
@@ -1471,7 +1475,7 @@ static int machine__set_modules_path(struct machine *machine)
machine->root_dir, version);
free(version);
- return maps__set_modules_path_dir(&machine->kmaps, modules_path, 0);
+ return maps__set_modules_path_dir(machine__kernel_maps(machine), modules_path, 0);
}
int __weak arch__fix_module_text_start(u64 *start __maybe_unused,
u64 *size __maybe_unused,
@@ -1544,11 +1548,11 @@ static void machine__update_kernel_mmap(struct machine *machine,
struct map *map = machine__kernel_map(machine);
map__get(map);
- maps__remove(&machine->kmaps, map);
+ maps__remove(machine__kernel_maps(machine), map);
machine__set_kernel_mmap(machine, start, end);
- maps__insert(&machine->kmaps, map);
+ maps__insert(machine__kernel_maps(machine), map);
map__put(map);
}
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index c5a45dc8df4c..0023165422aa 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -51,7 +51,7 @@ struct machine {
struct vdso_info *vdso_info;
struct perf_env *env;
struct dsos dsos;
- struct maps kmaps;
+ struct maps *kmaps;
struct map *vmlinux_map;
u64 kernel_start;
pid_t *current_tid;
@@ -83,7 +83,7 @@ struct map *machine__kernel_map(struct machine *machine)
static inline
struct maps *machine__kernel_maps(struct machine *machine)
{
- return &machine->kmaps;
+ return machine->kmaps;
}
int machine__get_kernel_start(struct machine *machine);
@@ -223,7 +223,7 @@ static inline
struct symbol *machine__find_kernel_symbol(struct machine *machine, u64 addr,
struct map **mapp)
{
- return maps__find_symbol(&machine->kmaps, addr, mapp);
+ return maps__find_symbol(machine->kmaps, addr, mapp);
}
static inline
@@ -231,7 +231,7 @@ struct symbol *machine__find_kernel_symbol_by_name(struct machine *machine,
const char *name,
struct map **mapp)
{
- return maps__find_symbol_by_name(&machine->kmaps, name, mapp);
+ return maps__find_symbol_by_name(machine->kmaps, name, mapp);
}
int arch__fix_module_text_start(u64 *start, u64 *size, const char *name);
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 8af693d9678c..e0aa4a254583 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -1,31 +1,20 @@
// SPDX-License-Identifier: GPL-2.0
-#include "symbol.h"
-#include <assert.h>
-#include <errno.h>
#include <inttypes.h>
#include <limits.h>
+#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <stdio.h>
-#include <unistd.h>
+#include <linux/string.h>
+#include <linux/zalloc.h>
#include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
+#include "debug.h"
#include "dso.h"
#include "map.h"
-#include "map_symbol.h"
+#include "namespaces.h"
+#include "srcline.h"
+#include "symbol.h"
#include "thread.h"
#include "vdso.h"
-#include "build-id.h"
-#include "debug.h"
-#include "machine.h"
-#include <linux/string.h>
-#include <linux/zalloc.h>
-#include "srcline.h"
-#include "namespaces.h"
-#include "unwind.h"
-#include "srccode.h"
-#include "ui/ui.h"
-
-static void __maps__insert(struct maps *maps, struct map *map);
static inline int is_android_lib(const char *filename)
{
@@ -138,7 +127,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
if (map != NULL) {
char newfilename[PATH_MAX];
- struct dso *dso;
+ struct dso *dso, *header_bid_dso;
int anon, no_dso, vdso, android;
android = is_android_lib(filename);
@@ -151,7 +140,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
if ((anon || no_dso) && nsi && (prot & PROT_EXEC)) {
snprintf(newfilename, sizeof(newfilename),
- "/tmp/perf-%d.map", nsi->pid);
+ "/tmp/perf-%d.map", nsinfo__pid(nsi));
filename = newfilename;
}
@@ -168,7 +157,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
nnsi = nsinfo__copy(nsi);
if (nnsi) {
nsinfo__put(nsi);
- nnsi->need_setns = false;
+ nsinfo__clear_need_setns(nnsi);
nsi = nnsi;
}
pgoff = 0;
@@ -194,9 +183,23 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
}
dso->nsinfo = nsi;
- if (build_id__is_defined(bid))
+ if (build_id__is_defined(bid)) {
dso__set_build_id(dso, bid);
-
+ } else {
+ /*
+ * If the mmap event had no build ID, search for an existing dso from the
+ * build ID header by name. Otherwise only the dso loaded at the time of
+ * reading the header will have the build ID set and all future mmaps will
+ * have it missing.
+ */
+ down_read(&machine->dsos.lock);
+ header_bid_dso = __dsos__find(&machine->dsos, filename, false);
+ up_read(&machine->dsos.lock);
+ if (header_bid_dso && header_bid_dso->header_build_id) {
+ dso__set_build_id(dso, &header_bid_dso->bid);
+ dso->header_build_id = 1;
+ }
+ }
dso__put(dso);
}
return map;
@@ -524,403 +527,13 @@ u64 map__objdump_2mem(struct map *map, u64 ip)
return ip + map->reloc;
}
-void maps__init(struct maps *maps, struct machine *machine)
-{
- maps->entries = RB_ROOT;
- init_rwsem(&maps->lock);
- maps->machine = machine;
- maps->last_search_by_name = NULL;
- maps->nr_maps = 0;
- maps->maps_by_name = NULL;
- refcount_set(&maps->refcnt, 1);
-}
-
-static void __maps__free_maps_by_name(struct maps *maps)
-{
- /*
- * Free everything to try to do it from the rbtree in the next search
- */
- zfree(&maps->maps_by_name);
- maps->nr_maps_allocated = 0;
-}
-
-void maps__insert(struct maps *maps, struct map *map)
-{
- down_write(&maps->lock);
- __maps__insert(maps, map);
- ++maps->nr_maps;
-
- if (map->dso && map->dso->kernel) {
- struct kmap *kmap = map__kmap(map);
-
- if (kmap)
- kmap->kmaps = maps;
- else
- pr_err("Internal error: kernel dso with non kernel map\n");
- }
-
-
- /*
- * If we already performed some search by name, then we need to add the just
- * inserted map and resort.
- */
- if (maps->maps_by_name) {
- if (maps->nr_maps > maps->nr_maps_allocated) {
- int nr_allocate = maps->nr_maps * 2;
- struct map **maps_by_name = realloc(maps->maps_by_name, nr_allocate * sizeof(map));
-
- if (maps_by_name == NULL) {
- __maps__free_maps_by_name(maps);
- up_write(&maps->lock);
- return;
- }
-
- maps->maps_by_name = maps_by_name;
- maps->nr_maps_allocated = nr_allocate;
- }
- maps->maps_by_name[maps->nr_maps - 1] = map;
- __maps__sort_by_name(maps);
- }
- up_write(&maps->lock);
-}
-
-static void __maps__remove(struct maps *maps, struct map *map)
-{
- rb_erase_init(&map->rb_node, &maps->entries);
- map__put(map);
-}
-
-void maps__remove(struct maps *maps, struct map *map)
-{
- down_write(&maps->lock);
- if (maps->last_search_by_name == map)
- maps->last_search_by_name = NULL;
-
- __maps__remove(maps, map);
- --maps->nr_maps;
- if (maps->maps_by_name)
- __maps__free_maps_by_name(maps);
- up_write(&maps->lock);
-}
-
-static void __maps__purge(struct maps *maps)
-{
- struct map *pos, *next;
-
- maps__for_each_entry_safe(maps, pos, next) {
- rb_erase_init(&pos->rb_node, &maps->entries);
- map__put(pos);
- }
-}
-
-void maps__exit(struct maps *maps)
-{
- down_write(&maps->lock);
- __maps__purge(maps);
- up_write(&maps->lock);
-}
-
-bool maps__empty(struct maps *maps)
-{
- return !maps__first(maps);
-}
-
-struct maps *maps__new(struct machine *machine)
-{
- struct maps *maps = zalloc(sizeof(*maps));
-
- if (maps != NULL)
- maps__init(maps, machine);
-
- return maps;
-}
-
-void maps__delete(struct maps *maps)
-{
- maps__exit(maps);
- unwind__finish_access(maps);
- free(maps);
-}
-
-void maps__put(struct maps *maps)
-{
- if (maps && refcount_dec_and_test(&maps->refcnt))
- maps__delete(maps);
-}
-
-struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp)
-{
- struct map *map = maps__find(maps, addr);
-
- /* Ensure map is loaded before using map->map_ip */
- if (map != NULL && map__load(map) >= 0) {
- if (mapp != NULL)
- *mapp = map;
- return map__find_symbol(map, map->map_ip(map, addr));
- }
-
- return NULL;
-}
-
-static bool map__contains_symbol(struct map *map, struct symbol *sym)
+bool map__contains_symbol(const struct map *map, const struct symbol *sym)
{
u64 ip = map->unmap_ip(map, sym->start);
return ip >= map->start && ip < map->end;
}
-struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp)
-{
- struct symbol *sym;
- struct map *pos;
-
- down_read(&maps->lock);
-
- maps__for_each_entry(maps, pos) {
- sym = map__find_symbol_by_name(pos, name);
-
- if (sym == NULL)
- continue;
- if (!map__contains_symbol(pos, sym)) {
- sym = NULL;
- continue;
- }
- if (mapp != NULL)
- *mapp = pos;
- goto out;
- }
-
- sym = NULL;
-out:
- up_read(&maps->lock);
- return sym;
-}
-
-int maps__find_ams(struct maps *maps, struct addr_map_symbol *ams)
-{
- if (ams->addr < ams->ms.map->start || ams->addr >= ams->ms.map->end) {
- if (maps == NULL)
- return -1;
- ams->ms.map = maps__find(maps, ams->addr);
- if (ams->ms.map == NULL)
- return -1;
- }
-
- ams->al_addr = ams->ms.map->map_ip(ams->ms.map, ams->addr);
- ams->ms.sym = map__find_symbol(ams->ms.map, ams->al_addr);
-
- return ams->ms.sym ? 0 : -1;
-}
-
-size_t maps__fprintf(struct maps *maps, FILE *fp)
-{
- size_t printed = 0;
- struct map *pos;
-
- down_read(&maps->lock);
-
- maps__for_each_entry(maps, pos) {
- printed += fprintf(fp, "Map:");
- printed += map__fprintf(pos, fp);
- if (verbose > 2) {
- printed += dso__fprintf(pos->dso, fp);
- printed += fprintf(fp, "--\n");
- }
- }
-
- up_read(&maps->lock);
-
- return printed;
-}
-
-int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp)
-{
- struct rb_root *root;
- struct rb_node *next, *first;
- int err = 0;
-
- down_write(&maps->lock);
-
- root = &maps->entries;
-
- /*
- * Find first map where end > map->start.
- * Same as find_vma() in kernel.
- */
- next = root->rb_node;
- first = NULL;
- while (next) {
- struct map *pos = rb_entry(next, struct map, rb_node);
-
- if (pos->end > map->start) {
- first = next;
- if (pos->start <= map->start)
- break;
- next = next->rb_left;
- } else
- next = next->rb_right;
- }
-
- next = first;
- while (next) {
- struct map *pos = rb_entry(next, struct map, rb_node);
- next = rb_next(&pos->rb_node);
-
- /*
- * Stop if current map starts after map->end.
- * Maps are ordered by start: next will not overlap for sure.
- */
- if (pos->start >= map->end)
- break;
-
- if (verbose >= 2) {
-
- if (use_browser) {
- pr_debug("overlapping maps in %s (disable tui for more info)\n",
- map->dso->name);
- } else {
- fputs("overlapping maps:\n", fp);
- map__fprintf(map, fp);
- map__fprintf(pos, fp);
- }
- }
-
- rb_erase_init(&pos->rb_node, root);
- /*
- * Now check if we need to create new maps for areas not
- * overlapped by the new map:
- */
- if (map->start > pos->start) {
- struct map *before = map__clone(pos);
-
- if (before == NULL) {
- err = -ENOMEM;
- goto put_map;
- }
-
- before->end = map->start;
- __maps__insert(maps, before);
- if (verbose >= 2 && !use_browser)
- map__fprintf(before, fp);
- map__put(before);
- }
-
- if (map->end < pos->end) {
- struct map *after = map__clone(pos);
-
- if (after == NULL) {
- err = -ENOMEM;
- goto put_map;
- }
-
- after->start = map->end;
- after->pgoff += map->end - pos->start;
- assert(pos->map_ip(pos, map->end) == after->map_ip(after, map->end));
- __maps__insert(maps, after);
- if (verbose >= 2 && !use_browser)
- map__fprintf(after, fp);
- map__put(after);
- }
-put_map:
- map__put(pos);
-
- if (err)
- goto out;
- }
-
- err = 0;
-out:
- up_write(&maps->lock);
- return err;
-}
-
-/*
- * XXX This should not really _copy_ te maps, but refcount them.
- */
-int maps__clone(struct thread *thread, struct maps *parent)
-{
- struct maps *maps = thread->maps;
- int err;
- struct map *map;
-
- down_read(&parent->lock);
-
- maps__for_each_entry(parent, map) {
- struct map *new = map__clone(map);
-
- if (new == NULL) {
- err = -ENOMEM;
- goto out_unlock;
- }
-
- err = unwind__prepare_access(maps, new, NULL);
- if (err)
- goto out_unlock;
-
- maps__insert(maps, new);
- map__put(new);
- }
-
- err = 0;
-out_unlock:
- up_read(&parent->lock);
- return err;
-}
-
-static void __maps__insert(struct maps *maps, struct map *map)
-{
- struct rb_node **p = &maps->entries.rb_node;
- struct rb_node *parent = NULL;
- const u64 ip = map->start;
- struct map *m;
-
- while (*p != NULL) {
- parent = *p;
- m = rb_entry(parent, struct map, rb_node);
- if (ip < m->start)
- p = &(*p)->rb_left;
- else
- p = &(*p)->rb_right;
- }
-
- rb_link_node(&map->rb_node, parent, p);
- rb_insert_color(&map->rb_node, &maps->entries);
- map__get(map);
-}
-
-struct map *maps__find(struct maps *maps, u64 ip)
-{
- struct rb_node *p;
- struct map *m;
-
- down_read(&maps->lock);
-
- p = maps->entries.rb_node;
- while (p != NULL) {
- m = rb_entry(p, struct map, rb_node);
- if (ip < m->start)
- p = p->rb_left;
- else if (ip >= m->end)
- p = p->rb_right;
- else
- goto out;
- }
-
- m = NULL;
-out:
- up_read(&maps->lock);
- return m;
-}
-
-struct map *maps__first(struct maps *maps)
-{
- struct rb_node *first = rb_first(&maps->entries);
-
- if (first)
- return rb_entry(first, struct map, rb_node);
- return NULL;
-}
-
static struct map *__map__next(struct map *map)
{
struct rb_node *next = rb_next(&map->rb_node);
@@ -961,3 +574,18 @@ struct maps *map__kmaps(struct map *map)
}
return kmap->kmaps;
}
+
+u64 map__map_ip(const struct map *map, u64 ip)
+{
+ return ip - map->start + map->pgoff;
+}
+
+u64 map__unmap_ip(const struct map *map, u64 ip)
+{
+ return ip + map->start - map->pgoff;
+}
+
+u64 identity__map_ip(const struct map *map __maybe_unused, u64 ip)
+{
+ return ip;
+}
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index d32f5b28c1fb..3dcfe06db6b3 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -29,9 +29,9 @@ struct map {
u64 reloc;
/* ip -> dso rip */
- u64 (*map_ip)(struct map *, u64);
+ u64 (*map_ip)(const struct map *, u64);
/* dso rip -> ip */
- u64 (*unmap_ip)(struct map *, u64);
+ u64 (*unmap_ip)(const struct map *, u64);
struct dso *dso;
refcount_t refcnt;
@@ -44,20 +44,12 @@ struct kmap *__map__kmap(struct map *map);
struct kmap *map__kmap(struct map *map);
struct maps *map__kmaps(struct map *map);
-static inline u64 map__map_ip(struct map *map, u64 ip)
-{
- return ip - map->start + map->pgoff;
-}
-
-static inline u64 map__unmap_ip(struct map *map, u64 ip)
-{
- return ip + map->start - map->pgoff;
-}
-
-static inline u64 identity__map_ip(struct map *map __maybe_unused, u64 ip)
-{
- return ip;
-}
+/* ip -> dso rip */
+u64 map__map_ip(const struct map *map, u64 ip);
+/* dso rip -> ip */
+u64 map__unmap_ip(const struct map *map, u64 ip);
+/* Returns ip */
+u64 identity__map_ip(const struct map *map __maybe_unused, u64 ip);
static inline size_t map__size(const struct map *map)
{
@@ -160,6 +152,8 @@ static inline bool __map__is_kmodule(const struct map *map)
bool map__has_symbols(const struct map *map);
+bool map__contains_symbol(const struct map *map, const struct symbol *sym);
+
#define ENTRY_TRAMPOLINE_NAME "__entry_SYSCALL_64_trampoline"
static inline bool is_entry_trampoline(const char *name)
diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c
new file mode 100644
index 000000000000..37bd5b40000d
--- /dev/null
+++ b/tools/perf/util/maps.c
@@ -0,0 +1,403 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <stdlib.h>
+#include <linux/zalloc.h>
+#include "debug.h"
+#include "dso.h"
+#include "map.h"
+#include "maps.h"
+#include "thread.h"
+#include "ui/ui.h"
+#include "unwind.h"
+
+static void __maps__insert(struct maps *maps, struct map *map);
+
+static void maps__init(struct maps *maps, struct machine *machine)
+{
+ maps->entries = RB_ROOT;
+ init_rwsem(&maps->lock);
+ maps->machine = machine;
+ maps->last_search_by_name = NULL;
+ maps->nr_maps = 0;
+ maps->maps_by_name = NULL;
+ refcount_set(&maps->refcnt, 1);
+}
+
+static void __maps__free_maps_by_name(struct maps *maps)
+{
+ /*
+ * Free everything to try to do it from the rbtree in the next search
+ */
+ zfree(&maps->maps_by_name);
+ maps->nr_maps_allocated = 0;
+}
+
+void maps__insert(struct maps *maps, struct map *map)
+{
+ down_write(&maps->lock);
+ __maps__insert(maps, map);
+ ++maps->nr_maps;
+
+ if (map->dso && map->dso->kernel) {
+ struct kmap *kmap = map__kmap(map);
+
+ if (kmap)
+ kmap->kmaps = maps;
+ else
+ pr_err("Internal error: kernel dso with non kernel map\n");
+ }
+
+
+ /*
+ * If we already performed some search by name, then we need to add the just
+ * inserted map and resort.
+ */
+ if (maps->maps_by_name) {
+ if (maps->nr_maps > maps->nr_maps_allocated) {
+ int nr_allocate = maps->nr_maps * 2;
+ struct map **maps_by_name = realloc(maps->maps_by_name, nr_allocate * sizeof(map));
+
+ if (maps_by_name == NULL) {
+ __maps__free_maps_by_name(maps);
+ up_write(&maps->lock);
+ return;
+ }
+
+ maps->maps_by_name = maps_by_name;
+ maps->nr_maps_allocated = nr_allocate;
+ }
+ maps->maps_by_name[maps->nr_maps - 1] = map;
+ __maps__sort_by_name(maps);
+ }
+ up_write(&maps->lock);
+}
+
+static void __maps__remove(struct maps *maps, struct map *map)
+{
+ rb_erase_init(&map->rb_node, &maps->entries);
+ map__put(map);
+}
+
+void maps__remove(struct maps *maps, struct map *map)
+{
+ down_write(&maps->lock);
+ if (maps->last_search_by_name == map)
+ maps->last_search_by_name = NULL;
+
+ __maps__remove(maps, map);
+ --maps->nr_maps;
+ if (maps->maps_by_name)
+ __maps__free_maps_by_name(maps);
+ up_write(&maps->lock);
+}
+
+static void __maps__purge(struct maps *maps)
+{
+ struct map *pos, *next;
+
+ maps__for_each_entry_safe(maps, pos, next) {
+ rb_erase_init(&pos->rb_node, &maps->entries);
+ map__put(pos);
+ }
+}
+
+static void maps__exit(struct maps *maps)
+{
+ down_write(&maps->lock);
+ __maps__purge(maps);
+ up_write(&maps->lock);
+}
+
+bool maps__empty(struct maps *maps)
+{
+ return !maps__first(maps);
+}
+
+struct maps *maps__new(struct machine *machine)
+{
+ struct maps *maps = zalloc(sizeof(*maps));
+
+ if (maps != NULL)
+ maps__init(maps, machine);
+
+ return maps;
+}
+
+void maps__delete(struct maps *maps)
+{
+ maps__exit(maps);
+ unwind__finish_access(maps);
+ free(maps);
+}
+
+void maps__put(struct maps *maps)
+{
+ if (maps && refcount_dec_and_test(&maps->refcnt))
+ maps__delete(maps);
+}
+
+struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp)
+{
+ struct map *map = maps__find(maps, addr);
+
+ /* Ensure map is loaded before using map->map_ip */
+ if (map != NULL && map__load(map) >= 0) {
+ if (mapp != NULL)
+ *mapp = map;
+ return map__find_symbol(map, map->map_ip(map, addr));
+ }
+
+ return NULL;
+}
+
+struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp)
+{
+ struct symbol *sym;
+ struct map *pos;
+
+ down_read(&maps->lock);
+
+ maps__for_each_entry(maps, pos) {
+ sym = map__find_symbol_by_name(pos, name);
+
+ if (sym == NULL)
+ continue;
+ if (!map__contains_symbol(pos, sym)) {
+ sym = NULL;
+ continue;
+ }
+ if (mapp != NULL)
+ *mapp = pos;
+ goto out;
+ }
+
+ sym = NULL;
+out:
+ up_read(&maps->lock);
+ return sym;
+}
+
+int maps__find_ams(struct maps *maps, struct addr_map_symbol *ams)
+{
+ if (ams->addr < ams->ms.map->start || ams->addr >= ams->ms.map->end) {
+ if (maps == NULL)
+ return -1;
+ ams->ms.map = maps__find(maps, ams->addr);
+ if (ams->ms.map == NULL)
+ return -1;
+ }
+
+ ams->al_addr = ams->ms.map->map_ip(ams->ms.map, ams->addr);
+ ams->ms.sym = map__find_symbol(ams->ms.map, ams->al_addr);
+
+ return ams->ms.sym ? 0 : -1;
+}
+
+size_t maps__fprintf(struct maps *maps, FILE *fp)
+{
+ size_t printed = 0;
+ struct map *pos;
+
+ down_read(&maps->lock);
+
+ maps__for_each_entry(maps, pos) {
+ printed += fprintf(fp, "Map:");
+ printed += map__fprintf(pos, fp);
+ if (verbose > 2) {
+ printed += dso__fprintf(pos->dso, fp);
+ printed += fprintf(fp, "--\n");
+ }
+ }
+
+ up_read(&maps->lock);
+
+ return printed;
+}
+
+int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp)
+{
+ struct rb_root *root;
+ struct rb_node *next, *first;
+ int err = 0;
+
+ down_write(&maps->lock);
+
+ root = &maps->entries;
+
+ /*
+ * Find first map where end > map->start.
+ * Same as find_vma() in kernel.
+ */
+ next = root->rb_node;
+ first = NULL;
+ while (next) {
+ struct map *pos = rb_entry(next, struct map, rb_node);
+
+ if (pos->end > map->start) {
+ first = next;
+ if (pos->start <= map->start)
+ break;
+ next = next->rb_left;
+ } else
+ next = next->rb_right;
+ }
+
+ next = first;
+ while (next) {
+ struct map *pos = rb_entry(next, struct map, rb_node);
+ next = rb_next(&pos->rb_node);
+
+ /*
+ * Stop if current map starts after map->end.
+ * Maps are ordered by start: next will not overlap for sure.
+ */
+ if (pos->start >= map->end)
+ break;
+
+ if (verbose >= 2) {
+
+ if (use_browser) {
+ pr_debug("overlapping maps in %s (disable tui for more info)\n",
+ map->dso->name);
+ } else {
+ fputs("overlapping maps:\n", fp);
+ map__fprintf(map, fp);
+ map__fprintf(pos, fp);
+ }
+ }
+
+ rb_erase_init(&pos->rb_node, root);
+ /*
+ * Now check if we need to create new maps for areas not
+ * overlapped by the new map:
+ */
+ if (map->start > pos->start) {
+ struct map *before = map__clone(pos);
+
+ if (before == NULL) {
+ err = -ENOMEM;
+ goto put_map;
+ }
+
+ before->end = map->start;
+ __maps__insert(maps, before);
+ if (verbose >= 2 && !use_browser)
+ map__fprintf(before, fp);
+ map__put(before);
+ }
+
+ if (map->end < pos->end) {
+ struct map *after = map__clone(pos);
+
+ if (after == NULL) {
+ err = -ENOMEM;
+ goto put_map;
+ }
+
+ after->start = map->end;
+ after->pgoff += map->end - pos->start;
+ assert(pos->map_ip(pos, map->end) == after->map_ip(after, map->end));
+ __maps__insert(maps, after);
+ if (verbose >= 2 && !use_browser)
+ map__fprintf(after, fp);
+ map__put(after);
+ }
+put_map:
+ map__put(pos);
+
+ if (err)
+ goto out;
+ }
+
+ err = 0;
+out:
+ up_write(&maps->lock);
+ return err;
+}
+
+/*
+ * XXX This should not really _copy_ te maps, but refcount them.
+ */
+int maps__clone(struct thread *thread, struct maps *parent)
+{
+ struct maps *maps = thread->maps;
+ int err;
+ struct map *map;
+
+ down_read(&parent->lock);
+
+ maps__for_each_entry(parent, map) {
+ struct map *new = map__clone(map);
+
+ if (new == NULL) {
+ err = -ENOMEM;
+ goto out_unlock;
+ }
+
+ err = unwind__prepare_access(maps, new, NULL);
+ if (err)
+ goto out_unlock;
+
+ maps__insert(maps, new);
+ map__put(new);
+ }
+
+ err = 0;
+out_unlock:
+ up_read(&parent->lock);
+ return err;
+}
+
+static void __maps__insert(struct maps *maps, struct map *map)
+{
+ struct rb_node **p = &maps->entries.rb_node;
+ struct rb_node *parent = NULL;
+ const u64 ip = map->start;
+ struct map *m;
+
+ while (*p != NULL) {
+ parent = *p;
+ m = rb_entry(parent, struct map, rb_node);
+ if (ip < m->start)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+
+ rb_link_node(&map->rb_node, parent, p);
+ rb_insert_color(&map->rb_node, &maps->entries);
+ map__get(map);
+}
+
+struct map *maps__find(struct maps *maps, u64 ip)
+{
+ struct rb_node *p;
+ struct map *m;
+
+ down_read(&maps->lock);
+
+ p = maps->entries.rb_node;
+ while (p != NULL) {
+ m = rb_entry(p, struct map, rb_node);
+ if (ip < m->start)
+ p = p->rb_left;
+ else if (ip >= m->end)
+ p = p->rb_right;
+ else
+ goto out;
+ }
+
+ m = NULL;
+out:
+ up_read(&maps->lock);
+ return m;
+}
+
+struct map *maps__first(struct maps *maps)
+{
+ struct rb_node *first = rb_first(&maps->entries);
+
+ if (first)
+ return rb_entry(first, struct map, rb_node);
+ return NULL;
+}
diff --git a/tools/perf/util/maps.h b/tools/perf/util/maps.h
index 3dd000ddf925..7e729ff42749 100644
--- a/tools/perf/util/maps.h
+++ b/tools/perf/util/maps.h
@@ -60,8 +60,6 @@ static inline struct maps *maps__get(struct maps *maps)
}
void maps__put(struct maps *maps);
-void maps__init(struct maps *maps, struct machine *machine);
-void maps__exit(struct maps *maps);
int maps__clone(struct thread *thread, struct maps *parent);
size_t maps__fprintf(struct maps *maps, FILE *fp);
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 0e8ff8d1e206..50502b4a7ca4 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -230,6 +230,10 @@ void mmap__munmap(struct mmap *map)
{
bitmap_free(map->affinity_mask.bits);
+#ifndef PYTHON_PERF
+ zstd_fini(&map->zstd_data);
+#endif
+
perf_mmap__aio_munmap(map);
if (map->data != NULL) {
munmap(map->data, mmap__mmap_len(map));
@@ -292,6 +296,12 @@ int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, struct perf_cpu
map->core.flush = mp->flush;
map->comp_level = mp->comp_level;
+#ifndef PYTHON_PERF
+ if (zstd_init(&map->zstd_data, map->comp_level)) {
+ pr_debug2("failed to init mmap compressor, error %d\n", errno);
+ return -1;
+ }
+#endif
if (map->comp_level && !perf_mmap__aio_enabled(map)) {
map->data = mmap(NULL, mmap__mmap_len(map), PROT_READ|PROT_WRITE,
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index 83f6bd4d4082..cd8b0777473b 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -15,6 +15,7 @@
#endif
#include "auxtrace.h"
#include "event.h"
+#include "util/compress.h"
struct aiocb;
@@ -45,6 +46,8 @@ struct mmap {
struct mmap_cpu_mask affinity_mask;
void *data;
int comp_level;
+ struct perf_data_file *file;
+ struct zstd_data zstd_data;
};
struct mmap_params {
diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c
index 48aa3217300b..dd536220cdb9 100644
--- a/tools/perf/util/namespaces.c
+++ b/tools/perf/util/namespaces.c
@@ -76,7 +76,7 @@ static int nsinfo__get_nspid(struct nsinfo *nsi, const char *path)
if (strstr(statln, "Tgid:") != NULL) {
nsi->tgid = (pid_t)strtol(strrchr(statln, '\t'),
NULL, 10);
- nsi->nstgid = nsi->tgid;
+ nsi->nstgid = nsinfo__tgid(nsi);
}
if (strstr(statln, "NStgid:") != NULL) {
@@ -108,7 +108,7 @@ int nsinfo__init(struct nsinfo *nsi)
if (snprintf(oldns, PATH_MAX, "/proc/self/ns/mnt") >= PATH_MAX)
return rv;
- if (asprintf(&newns, "/proc/%d/ns/mnt", nsi->pid) == -1)
+ if (asprintf(&newns, "/proc/%d/ns/mnt", nsinfo__pid(nsi)) == -1)
return rv;
if (stat(oldns, &old_stat) < 0)
@@ -129,7 +129,7 @@ int nsinfo__init(struct nsinfo *nsi)
/* If we're dealing with a process that is in a different PID namespace,
* attempt to work out the innermost tgid for the process.
*/
- if (snprintf(spath, PATH_MAX, "/proc/%d/status", nsi->pid) >= PATH_MAX)
+ if (snprintf(spath, PATH_MAX, "/proc/%d/status", nsinfo__pid(nsi)) >= PATH_MAX)
goto out;
rv = nsinfo__get_nspid(nsi, spath);
@@ -166,7 +166,7 @@ struct nsinfo *nsinfo__new(pid_t pid)
return nsi;
}
-struct nsinfo *nsinfo__copy(struct nsinfo *nsi)
+struct nsinfo *nsinfo__copy(const struct nsinfo *nsi)
{
struct nsinfo *nnsi;
@@ -175,11 +175,11 @@ struct nsinfo *nsinfo__copy(struct nsinfo *nsi)
nnsi = calloc(1, sizeof(*nnsi));
if (nnsi != NULL) {
- nnsi->pid = nsi->pid;
- nnsi->tgid = nsi->tgid;
- nnsi->nstgid = nsi->nstgid;
- nnsi->need_setns = nsi->need_setns;
- nnsi->in_pidns = nsi->in_pidns;
+ nnsi->pid = nsinfo__pid(nsi);
+ nnsi->tgid = nsinfo__tgid(nsi);
+ nnsi->nstgid = nsinfo__nstgid(nsi);
+ nnsi->need_setns = nsinfo__need_setns(nsi);
+ nnsi->in_pidns = nsinfo__in_pidns(nsi);
if (nsi->mntns_path) {
nnsi->mntns_path = strdup(nsi->mntns_path);
if (!nnsi->mntns_path) {
@@ -193,7 +193,7 @@ struct nsinfo *nsinfo__copy(struct nsinfo *nsi)
return nnsi;
}
-void nsinfo__delete(struct nsinfo *nsi)
+static void nsinfo__delete(struct nsinfo *nsi)
{
zfree(&nsi->mntns_path);
free(nsi);
@@ -212,6 +212,36 @@ void nsinfo__put(struct nsinfo *nsi)
nsinfo__delete(nsi);
}
+bool nsinfo__need_setns(const struct nsinfo *nsi)
+{
+ return nsi->need_setns;
+}
+
+void nsinfo__clear_need_setns(struct nsinfo *nsi)
+{
+ nsi->need_setns = false;
+}
+
+pid_t nsinfo__tgid(const struct nsinfo *nsi)
+{
+ return nsi->tgid;
+}
+
+pid_t nsinfo__nstgid(const struct nsinfo *nsi)
+{
+ return nsi->nstgid;
+}
+
+pid_t nsinfo__pid(const struct nsinfo *nsi)
+{
+ return nsi->pid;
+}
+
+pid_t nsinfo__in_pidns(const struct nsinfo *nsi)
+{
+ return nsi->in_pidns;
+}
+
void nsinfo__mountns_enter(struct nsinfo *nsi,
struct nscookie *nc)
{
diff --git a/tools/perf/util/namespaces.h b/tools/perf/util/namespaces.h
index 9ceea9643507..567829262c42 100644
--- a/tools/perf/util/namespaces.h
+++ b/tools/perf/util/namespaces.h
@@ -47,12 +47,18 @@ struct nscookie {
int nsinfo__init(struct nsinfo *nsi);
struct nsinfo *nsinfo__new(pid_t pid);
-struct nsinfo *nsinfo__copy(struct nsinfo *nsi);
-void nsinfo__delete(struct nsinfo *nsi);
+struct nsinfo *nsinfo__copy(const struct nsinfo *nsi);
struct nsinfo *nsinfo__get(struct nsinfo *nsi);
void nsinfo__put(struct nsinfo *nsi);
+bool nsinfo__need_setns(const struct nsinfo *nsi);
+void nsinfo__clear_need_setns(struct nsinfo *nsi);
+pid_t nsinfo__tgid(const struct nsinfo *nsi);
+pid_t nsinfo__nstgid(const struct nsinfo *nsi);
+pid_t nsinfo__pid(const struct nsinfo *nsi);
+pid_t nsinfo__in_pidns(const struct nsinfo *nsi);
+
void nsinfo__mountns_enter(struct nsinfo *nsi, struct nscookie *nc);
void nsinfo__mountns_exit(struct nscookie *nc);
diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c
index 48c8f609441b..b887dfeea673 100644
--- a/tools/perf/util/ordered-events.c
+++ b/tools/perf/util/ordered-events.c
@@ -192,7 +192,7 @@ void ordered_events__delete(struct ordered_events *oe, struct ordered_event *eve
}
int ordered_events__queue(struct ordered_events *oe, union perf_event *event,
- u64 timestamp, u64 file_offset)
+ u64 timestamp, u64 file_offset, const char *file_path)
{
struct ordered_event *oevent;
@@ -217,6 +217,7 @@ int ordered_events__queue(struct ordered_events *oe, union perf_event *event,
return -ENOMEM;
oevent->file_offset = file_offset;
+ oevent->file_path = file_path;
return 0;
}
diff --git a/tools/perf/util/ordered-events.h b/tools/perf/util/ordered-events.h
index 75345946c4b9..0b05c3c0aeaa 100644
--- a/tools/perf/util/ordered-events.h
+++ b/tools/perf/util/ordered-events.h
@@ -9,6 +9,7 @@ struct perf_sample;
struct ordered_event {
u64 timestamp;
u64 file_offset;
+ const char *file_path;
union perf_event *event;
struct list_head list;
};
@@ -53,7 +54,7 @@ struct ordered_events {
};
int ordered_events__queue(struct ordered_events *oe, union perf_event *event,
- u64 timestamp, u64 file_offset);
+ u64 timestamp, u64 file_offset, const char *file_path);
void ordered_events__delete(struct ordered_events *oe, struct ordered_event *event);
int ordered_events__flush(struct ordered_events *oe, enum oe_flush how);
int ordered_events__flush_time(struct ordered_events *oe, u64 timestamp);
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 8dfbba15aeb8..9a1c7e63e663 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -1998,7 +1998,8 @@ int perf_pmu__cpus_match(struct perf_pmu *pmu, struct perf_cpu_map *cpus,
{
struct perf_cpu_map *pmu_cpus = pmu->cpus;
struct perf_cpu_map *matched_cpus, *unmatched_cpus;
- int matched_nr = 0, unmatched_nr = 0;
+ struct perf_cpu cpu;
+ int i, matched_nr = 0, unmatched_nr = 0;
matched_cpus = perf_cpu_map__default_new();
if (!matched_cpus)
@@ -2010,14 +2011,11 @@ int perf_pmu__cpus_match(struct perf_pmu *pmu, struct perf_cpu_map *cpus,
return -1;
}
- for (int i = 0; i < cpus->nr; i++) {
- int cpu;
-
- cpu = perf_cpu_map__idx(pmu_cpus, cpus->map[i]);
- if (cpu == -1)
- unmatched_cpus->map[unmatched_nr++] = cpus->map[i];
+ perf_cpu_map__for_each_cpu(cpu, i, cpus) {
+ if (!perf_cpu_map__has(pmu_cpus, cpu))
+ unmatched_cpus->map[unmatched_nr++] = cpu;
else
- matched_cpus->map[matched_nr++] = cpus->map[i];
+ matched_cpus->map[matched_nr++] = cpu;
}
unmatched_cpus->nr = unmatched_nr;
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index a834918a0a0d..062b5cbe67af 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -332,7 +332,7 @@ static int kernel_get_module_dso(const char *module, struct dso **pdso)
char module_name[128];
snprintf(module_name, sizeof(module_name), "[%s]", module);
- map = maps__find_by_name(&host_machine->kmaps, module_name);
+ map = maps__find_by_name(machine__kernel_maps(host_machine), module_name);
if (map) {
dso = map->dso;
goto found;
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 52d8995cfd73..5be5fa2391de 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -58,10 +58,21 @@ int parse_callchain_record(const char *arg __maybe_unused,
}
/*
- * Add this one here not to drag util/env.c
+ * Add these not to drag util/env.c
*/
struct perf_env perf_env;
+const char *perf_env__cpuid(struct perf_env *env __maybe_unused)
+{
+ return NULL;
+}
+
+// This one is a bit easier, wouldn't drag too much, but leave it as a stub we need it here
+const char *perf_env__arch(struct perf_env *env __maybe_unused)
+{
+ return NULL;
+}
+
/*
* Add this one here not to drag util/stat-shadow.c
*/
diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h
index ef6c2715fdd9..be9a957501f4 100644
--- a/tools/perf/util/record.h
+++ b/tools/perf/util/record.h
@@ -78,6 +78,8 @@ struct record_opts {
int ctl_fd_ack;
bool ctl_fd_close;
int synth;
+ int threads_spec;
+ const char *threads_user_spec;
};
extern const char * const *record_usage;
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index e752e1f4a5f0..413f2d19c13f 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -1216,7 +1216,7 @@ static void python_export_sample_table(struct db_export *dbe,
struct tables *tables = container_of(dbe, struct tables, dbe);
PyObject *t;
- t = tuple_new(24);
+ t = tuple_new(25);
tuple_set_d64(t, 0, es->db_id);
tuple_set_d64(t, 1, es->evsel->db_id);
@@ -1242,6 +1242,7 @@ static void python_export_sample_table(struct db_export *dbe,
tuple_set_d64(t, 21, es->call_path_id);
tuple_set_d64(t, 22, es->sample->insn_cnt);
tuple_set_d64(t, 23, es->sample->cyc_cnt);
+ tuple_set_s32(t, 24, es->sample->flags);
call_object(tables->sample_handler, t, "sample_table");
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 498b05708db5..3b8dfe603e50 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -39,7 +39,8 @@
#ifdef HAVE_ZSTD_SUPPORT
static int perf_session__process_compressed_event(struct perf_session *session,
- union perf_event *event, u64 file_offset)
+ union perf_event *event, u64 file_offset,
+ const char *file_path)
{
void *src;
size_t decomp_size, src_size;
@@ -61,6 +62,7 @@ static int perf_session__process_compressed_event(struct perf_session *session,
}
decomp->file_pos = file_offset;
+ decomp->file_path = file_path;
decomp->mmap_len = mmap_len;
decomp->head = 0;
@@ -100,7 +102,8 @@ static int perf_session__process_compressed_event(struct perf_session *session,
static int perf_session__deliver_event(struct perf_session *session,
union perf_event *event,
struct perf_tool *tool,
- u64 file_offset);
+ u64 file_offset,
+ const char *file_path);
static int perf_session__open(struct perf_session *session, int repipe_fd)
{
@@ -182,7 +185,8 @@ static int ordered_events__deliver_event(struct ordered_events *oe,
ordered_events);
return perf_session__deliver_event(session, event->event,
- session->tool, event->file_offset);
+ session->tool, event->file_offset,
+ event->file_path);
}
struct perf_session *__perf_session__new(struct perf_data *data,
@@ -471,7 +475,8 @@ static int process_event_time_conv_stub(struct perf_session *perf_session __mayb
static int perf_session__process_compressed_event_stub(struct perf_session *session __maybe_unused,
union perf_event *event __maybe_unused,
- u64 file_offset __maybe_unused)
+ u64 file_offset __maybe_unused,
+ const char *file_path __maybe_unused)
{
dump_printf(": unhandled!\n");
return 0;
@@ -1072,9 +1077,9 @@ static int process_finished_round(struct perf_tool *tool __maybe_unused,
}
int perf_session__queue_event(struct perf_session *s, union perf_event *event,
- u64 timestamp, u64 file_offset)
+ u64 timestamp, u64 file_offset, const char *file_path)
{
- return ordered_events__queue(&s->ordered_events, event, timestamp, file_offset);
+ return ordered_events__queue(&s->ordered_events, event, timestamp, file_offset, file_path);
}
static void callchain__lbr_callstack_printf(struct perf_sample *sample)
@@ -1154,14 +1159,15 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack)
struct branch_entry *e = &entries[i];
if (!callstack) {
- printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x\n",
+ printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x %s\n",
i, e->from, e->to,
(unsigned short)e->flags.cycles,
e->flags.mispred ? "M" : " ",
e->flags.predicted ? "P" : " ",
e->flags.abort ? "A" : " ",
e->flags.in_tx ? "T" : " ",
- (unsigned)e->flags.reserved);
+ (unsigned)e->flags.reserved,
+ e->flags.type ? branch_type_name(e->flags.type) : "");
} else {
printf("..... %2"PRIu64": %016" PRIx64 "\n",
i, i > 0 ? e->from : e->to);
@@ -1277,13 +1283,14 @@ static void sample_read__printf(struct perf_sample *sample, u64 read_format)
}
static void dump_event(struct evlist *evlist, union perf_event *event,
- u64 file_offset, struct perf_sample *sample)
+ u64 file_offset, struct perf_sample *sample,
+ const char *file_path)
{
if (!dump_trace)
return;
- printf("\n%#" PRIx64 " [%#x]: event: %d\n",
- file_offset, event->header.size, event->header.type);
+ printf("\n%#" PRIx64 "@%s [%#x]: event: %d\n",
+ file_offset, file_path, event->header.size, event->header.type);
trace_event(event);
if (event->header.type == PERF_RECORD_SAMPLE && evlist->trace_event_sample_raw)
@@ -1486,12 +1493,13 @@ static int machines__deliver_event(struct machines *machines,
struct evlist *evlist,
union perf_event *event,
struct perf_sample *sample,
- struct perf_tool *tool, u64 file_offset)
+ struct perf_tool *tool, u64 file_offset,
+ const char *file_path)
{
struct evsel *evsel;
struct machine *machine;
- dump_event(evlist, event, file_offset, sample);
+ dump_event(evlist, event, file_offset, sample, file_path);
evsel = evlist__id2evsel(evlist, sample->id);
@@ -1573,7 +1581,8 @@ static int machines__deliver_event(struct machines *machines,
static int perf_session__deliver_event(struct perf_session *session,
union perf_event *event,
struct perf_tool *tool,
- u64 file_offset)
+ u64 file_offset,
+ const char *file_path)
{
struct perf_sample sample;
int ret = evlist__parse_sample(session->evlist, event, &sample);
@@ -1590,7 +1599,7 @@ static int perf_session__deliver_event(struct perf_session *session,
return 0;
ret = machines__deliver_event(&session->machines, session->evlist,
- event, &sample, tool, file_offset);
+ event, &sample, tool, file_offset, file_path);
if (dump_trace && sample.aux_sample.size)
auxtrace__dump_auxtrace_sample(session, &sample);
@@ -1600,7 +1609,8 @@ static int perf_session__deliver_event(struct perf_session *session,
static s64 perf_session__process_user_event(struct perf_session *session,
union perf_event *event,
- u64 file_offset)
+ u64 file_offset,
+ const char *file_path)
{
struct ordered_events *oe = &session->ordered_events;
struct perf_tool *tool = session->tool;
@@ -1610,7 +1620,7 @@ static s64 perf_session__process_user_event(struct perf_session *session,
if (event->header.type != PERF_RECORD_COMPRESSED ||
tool->compressed == perf_session__process_compressed_event_stub)
- dump_event(session->evlist, event, file_offset, &sample);
+ dump_event(session->evlist, event, file_offset, &sample, file_path);
/* These events are processed right away */
switch (event->header.type) {
@@ -1669,9 +1679,9 @@ static s64 perf_session__process_user_event(struct perf_session *session,
case PERF_RECORD_HEADER_FEATURE:
return tool->feature(session, event);
case PERF_RECORD_COMPRESSED:
- err = tool->compressed(session, event, file_offset);
+ err = tool->compressed(session, event, file_offset, file_path);
if (err)
- dump_event(session->evlist, event, file_offset, &sample);
+ dump_event(session->evlist, event, file_offset, &sample, file_path);
return err;
default:
return -EINVAL;
@@ -1688,9 +1698,9 @@ int perf_session__deliver_synth_event(struct perf_session *session,
events_stats__inc(&evlist->stats, event->header.type);
if (event->header.type >= PERF_RECORD_USER_TYPE_START)
- return perf_session__process_user_event(session, event, 0);
+ return perf_session__process_user_event(session, event, 0, NULL);
- return machines__deliver_event(&session->machines, evlist, event, sample, tool, 0);
+ return machines__deliver_event(&session->machines, evlist, event, sample, tool, 0, NULL);
}
static void event_swap(union perf_event *event, bool sample_id_all)
@@ -1787,7 +1797,8 @@ int perf_session__peek_events(struct perf_session *session, u64 offset,
}
static s64 perf_session__process_event(struct perf_session *session,
- union perf_event *event, u64 file_offset)
+ union perf_event *event, u64 file_offset,
+ const char *file_path)
{
struct evlist *evlist = session->evlist;
struct perf_tool *tool = session->tool;
@@ -1802,7 +1813,7 @@ static s64 perf_session__process_event(struct perf_session *session,
events_stats__inc(&evlist->stats, event->header.type);
if (event->header.type >= PERF_RECORD_USER_TYPE_START)
- return perf_session__process_user_event(session, event, file_offset);
+ return perf_session__process_user_event(session, event, file_offset, file_path);
if (tool->ordered_events) {
u64 timestamp = -1ULL;
@@ -1811,12 +1822,12 @@ static s64 perf_session__process_event(struct perf_session *session,
if (ret && ret != -1)
return ret;
- ret = perf_session__queue_event(session, event, timestamp, file_offset);
+ ret = perf_session__queue_event(session, event, timestamp, file_offset, file_path);
if (ret != -ETIME)
return ret;
}
- return perf_session__deliver_event(session, event, tool, file_offset);
+ return perf_session__deliver_event(session, event, tool, file_offset, file_path);
}
void perf_event_header__bswap(struct perf_event_header *hdr)
@@ -2043,7 +2054,7 @@ more:
}
}
- if ((skip = perf_session__process_event(session, event, head)) < 0) {
+ if ((skip = perf_session__process_event(session, event, head, "pipe")) < 0) {
pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n",
head, event->header.size, event->header.type);
err = -EINVAL;
@@ -2140,7 +2151,8 @@ static int __perf_session__process_decomp_events(struct perf_session *session)
size = event->header.size;
if (size < sizeof(struct perf_event_header) ||
- (skip = perf_session__process_event(session, event, decomp->file_pos)) < 0) {
+ (skip = perf_session__process_event(session, event, decomp->file_pos,
+ decomp->file_path)) < 0) {
pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n",
decomp->file_pos + decomp->head, event->header.size, event->header.type);
return -EINVAL;
@@ -2171,10 +2183,12 @@ struct reader;
typedef s64 (*reader_cb_t)(struct perf_session *session,
union perf_event *event,
- u64 file_offset);
+ u64 file_offset,
+ const char *file_path);
struct reader {
int fd;
+ const char *path;
u64 data_size;
u64 data_offset;
reader_cb_t process;
@@ -2186,6 +2200,8 @@ struct reader {
u64 file_pos;
u64 file_offset;
u64 head;
+ u64 size;
+ bool done;
struct zstd_data zstd_data;
struct decomp_data decomp_data;
};
@@ -2292,7 +2308,7 @@ reader__read_event(struct reader *rd, struct perf_session *session,
skip = -EINVAL;
if (size < sizeof(struct perf_event_header) ||
- (skip = rd->process(session, event, rd->file_pos)) < 0) {
+ (skip = rd->process(session, event, rd->file_pos, rd->path)) < 0) {
pr_err("%#" PRIx64 " [%#x]: failed to process type: %d [%s]\n",
rd->file_offset + rd->head, event->header.size,
event->header.type, strerror(-skip));
@@ -2303,6 +2319,7 @@ reader__read_event(struct reader *rd, struct perf_session *session,
if (skip)
size += skip;
+ rd->size += size;
rd->head += size;
rd->file_pos += size;
@@ -2359,15 +2376,17 @@ out:
static s64 process_simple(struct perf_session *session,
union perf_event *event,
- u64 file_offset)
+ u64 file_offset,
+ const char *file_path)
{
- return perf_session__process_event(session, event, file_offset);
+ return perf_session__process_event(session, event, file_offset, file_path);
}
static int __perf_session__process_events(struct perf_session *session)
{
struct reader rd = {
.fd = perf_data__fd(session->data),
+ .path = session->data->file.path,
.data_size = session->header.data_size,
.data_offset = session->header.data_offset,
.process = process_simple,
@@ -2411,6 +2430,135 @@ out_err:
return err;
}
+/*
+ * Processing 2 MB of data from each reader in sequence,
+ * because that's the way the ordered events sorting works
+ * most efficiently.
+ */
+#define READER_MAX_SIZE (2 * 1024 * 1024)
+
+/*
+ * This function reads, merge and process directory data.
+ * It assumens the version 1 of directory data, where each
+ * data file holds per-cpu data, already sorted by kernel.
+ */
+static int __perf_session__process_dir_events(struct perf_session *session)
+{
+ struct perf_data *data = session->data;
+ struct perf_tool *tool = session->tool;
+ int i, ret, readers, nr_readers;
+ struct ui_progress prog;
+ u64 total_size = perf_data__size(session->data);
+ struct reader *rd;
+
+ perf_tool__fill_defaults(tool);
+
+ ui_progress__init_size(&prog, total_size, "Sorting events...");
+
+ nr_readers = 1;
+ for (i = 0; i < data->dir.nr; i++) {
+ if (data->dir.files[i].size)
+ nr_readers++;
+ }
+
+ rd = zalloc(nr_readers * sizeof(struct reader));
+ if (!rd)
+ return -ENOMEM;
+
+ rd[0] = (struct reader) {
+ .fd = perf_data__fd(session->data),
+ .path = session->data->file.path,
+ .data_size = session->header.data_size,
+ .data_offset = session->header.data_offset,
+ .process = process_simple,
+ .in_place_update = session->data->in_place_update,
+ };
+ ret = reader__init(&rd[0], NULL);
+ if (ret)
+ goto out_err;
+ ret = reader__mmap(&rd[0], session);
+ if (ret)
+ goto out_err;
+ readers = 1;
+
+ for (i = 0; i < data->dir.nr; i++) {
+ if (!data->dir.files[i].size)
+ continue;
+ rd[readers] = (struct reader) {
+ .fd = data->dir.files[i].fd,
+ .path = data->dir.files[i].path,
+ .data_size = data->dir.files[i].size,
+ .data_offset = 0,
+ .process = process_simple,
+ .in_place_update = session->data->in_place_update,
+ };
+ ret = reader__init(&rd[readers], NULL);
+ if (ret)
+ goto out_err;
+ ret = reader__mmap(&rd[readers], session);
+ if (ret)
+ goto out_err;
+ readers++;
+ }
+
+ i = 0;
+ while (readers) {
+ if (session_done())
+ break;
+
+ if (rd[i].done) {
+ i = (i + 1) % nr_readers;
+ continue;
+ }
+ if (reader__eof(&rd[i])) {
+ rd[i].done = true;
+ readers--;
+ continue;
+ }
+
+ session->active_decomp = &rd[i].decomp_data;
+ ret = reader__read_event(&rd[i], session, &prog);
+ if (ret < 0) {
+ goto out_err;
+ } else if (ret == READER_NODATA) {
+ ret = reader__mmap(&rd[i], session);
+ if (ret)
+ goto out_err;
+ }
+
+ if (rd[i].size >= READER_MAX_SIZE) {
+ rd[i].size = 0;
+ i = (i + 1) % nr_readers;
+ }
+ }
+
+ ret = ordered_events__flush(&session->ordered_events, OE_FLUSH__FINAL);
+ if (ret)
+ goto out_err;
+
+ ret = perf_session__flush_thread_stacks(session);
+out_err:
+ ui_progress__finish();
+
+ if (!tool->no_warn)
+ perf_session__warn_about_errors(session);
+
+ /*
+ * We may switching perf.data output, make ordered_events
+ * reusable.
+ */
+ ordered_events__reinit(&session->ordered_events);
+
+ session->one_mmap = false;
+
+ session->active_decomp = &session->decomp_data;
+ for (i = 0; i < nr_readers; i++)
+ reader__release_decomp(&rd[i]);
+ zfree(&rd);
+
+ return ret;
+}
+
int perf_session__process_events(struct perf_session *session)
{
if (perf_session__register_idle_thread(session) < 0)
@@ -2419,6 +2567,9 @@ int perf_session__process_events(struct perf_session *session)
if (perf_data__is_pipe(session->data))
return __perf_session__process_pipe_events(session);
+ if (perf_data__is_dir(session->data))
+ return __perf_session__process_dir_events(session);
+
return __perf_session__process_events(session);
}
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 46c854292ad6..34500a3da735 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -52,6 +52,7 @@ struct perf_session {
struct decomp {
struct decomp *next;
u64 file_pos;
+ const char *file_path;
size_t mmap_len;
u64 head;
size_t size;
@@ -87,7 +88,7 @@ int perf_session__peek_events(struct perf_session *session, u64 offset,
int perf_session__process_events(struct perf_session *session);
int perf_session__queue_event(struct perf_session *s, union perf_event *event,
- u64 timestamp, u64 file_offset);
+ u64 timestamp, u64 file_offset, const char *file_path);
void perf_tool__fill_defaults(struct perf_tool *tool);
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 2da081ef532b..6d5588e80935 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -990,6 +990,128 @@ struct sort_entry sort_sym_to = {
.se_width_idx = HISTC_SYMBOL_TO,
};
+static int _hist_entry__addr_snprintf(struct map_symbol *ms,
+ u64 ip, char level, char *bf, size_t size,
+ unsigned int width)
+{
+ struct symbol *sym = ms->sym;
+ struct map *map = ms->map;
+ size_t ret = 0, offs;
+
+ ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", level);
+ if (sym && map) {
+ if (sym->type == STT_OBJECT) {
+ ret += repsep_snprintf(bf + ret, size - ret, "%s", sym->name);
+ ret += repsep_snprintf(bf + ret, size - ret, "+0x%llx",
+ ip - map->unmap_ip(map, sym->start));
+ } else {
+ ret += repsep_snprintf(bf + ret, size - ret, "%.*s",
+ width - ret,
+ sym->name);
+ offs = ip - sym->start;
+ if (offs)
+ ret += repsep_snprintf(bf + ret, size - ret, "+0x%llx", offs);
+ }
+ } else {
+ size_t len = BITS_PER_LONG / 4;
+ ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx",
+ len, ip);
+ }
+
+ return ret;
+}
+
+static int hist_entry__addr_from_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ if (he->branch_info) {
+ struct addr_map_symbol *from = &he->branch_info->from;
+
+ return _hist_entry__addr_snprintf(&from->ms, from->al_addr,
+ he->level, bf, size, width);
+ }
+
+ return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
+}
+
+static int hist_entry__addr_to_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ if (he->branch_info) {
+ struct addr_map_symbol *to = &he->branch_info->to;
+
+ return _hist_entry__addr_snprintf(&to->ms, to->al_addr,
+ he->level, bf, size, width);
+ }
+
+ return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
+}
+
+static int64_t
+sort__addr_from_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ struct addr_map_symbol *from_l;
+ struct addr_map_symbol *from_r;
+ int64_t ret;
+
+ if (!left->branch_info || !right->branch_info)
+ return cmp_null(left->branch_info, right->branch_info);
+
+ from_l = &left->branch_info->from;
+ from_r = &right->branch_info->from;
+
+ /*
+ * comparing symbol address alone is not enough since it's a
+ * relative address within a dso.
+ */
+ ret = _sort__dso_cmp(from_l->ms.map, from_r->ms.map);
+ if (ret != 0)
+ return ret;
+
+ return _sort__addr_cmp(from_l->addr, from_r->addr);
+}
+
+static int64_t
+sort__addr_to_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ struct addr_map_symbol *to_l;
+ struct addr_map_symbol *to_r;
+ int64_t ret;
+
+ if (!left->branch_info || !right->branch_info)
+ return cmp_null(left->branch_info, right->branch_info);
+
+ to_l = &left->branch_info->to;
+ to_r = &right->branch_info->to;
+
+ /*
+ * comparing symbol address alone is not enough since it's a
+ * relative address within a dso.
+ */
+ ret = _sort__dso_cmp(to_l->ms.map, to_r->ms.map);
+ if (ret != 0)
+ return ret;
+
+ return _sort__addr_cmp(to_l->addr, to_r->addr);
+}
+
+struct sort_entry sort_addr_from = {
+ .se_header = "Source Address",
+ .se_cmp = sort__addr_from_cmp,
+ .se_snprintf = hist_entry__addr_from_snprintf,
+ .se_filter = hist_entry__sym_from_filter, /* shared with sym_from */
+ .se_width_idx = HISTC_ADDR_FROM,
+};
+
+struct sort_entry sort_addr_to = {
+ .se_header = "Target Address",
+ .se_cmp = sort__addr_to_cmp,
+ .se_snprintf = hist_entry__addr_to_snprintf,
+ .se_filter = hist_entry__sym_to_filter, /* shared with sym_to */
+ .se_width_idx = HISTC_ADDR_TO,
+};
+
+
static int64_t
sort__mispredict_cmp(struct hist_entry *left, struct hist_entry *right)
{
@@ -1893,6 +2015,8 @@ static struct sort_dimension bstack_sort_dimensions[] = {
DIM(SORT_SRCLINE_FROM, "srcline_from", sort_srcline_from),
DIM(SORT_SRCLINE_TO, "srcline_to", sort_srcline_to),
DIM(SORT_SYM_IPC, "ipc_lbr", sort_sym_ipc),
+ DIM(SORT_ADDR_FROM, "addr_from", sort_addr_from),
+ DIM(SORT_ADDR_TO, "addr_to", sort_addr_to),
};
#undef DIM
@@ -3126,6 +3250,10 @@ static bool get_elide(int idx, FILE *output)
return __get_elide(symbol_conf.dso_from_list, "dso_from", output);
case HISTC_DSO_TO:
return __get_elide(symbol_conf.dso_to_list, "dso_to", output);
+ case HISTC_ADDR_FROM:
+ return __get_elide(symbol_conf.sym_from_list, "addr_from", output);
+ case HISTC_ADDR_TO:
+ return __get_elide(symbol_conf.sym_to_list, "addr_to", output);
default:
break;
}
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index f994261888e1..2ddc00d1c464 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -251,6 +251,8 @@ enum sort_type {
SORT_SRCLINE_FROM,
SORT_SRCLINE_TO,
SORT_SYM_IPC,
+ SORT_ADDR_FROM,
+ SORT_ADDR_TO,
/* memory mode specific sort keys */
__SORT_MEMORY_MODE,
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index dfde9eada224..dea0fc495185 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1735,8 +1735,8 @@ static int dso__find_perf_map(char *filebuf, size_t bufsz,
nsi = *nsip;
- if (nsi->need_setns) {
- snprintf(filebuf, bufsz, "/tmp/perf-%d.map", nsi->nstgid);
+ if (nsinfo__need_setns(nsi)) {
+ snprintf(filebuf, bufsz, "/tmp/perf-%d.map", nsinfo__nstgid(nsi));
nsinfo__mountns_enter(nsi, &nsc);
rc = access(filebuf, R_OK);
nsinfo__mountns_exit(&nsc);
@@ -1748,8 +1748,8 @@ static int dso__find_perf_map(char *filebuf, size_t bufsz,
if (nnsi) {
nsinfo__put(nsi);
- nnsi->need_setns = false;
- snprintf(filebuf, bufsz, "/tmp/perf-%d.map", nnsi->tgid);
+ nsinfo__clear_need_setns(nnsi);
+ snprintf(filebuf, bufsz, "/tmp/perf-%d.map", nsinfo__tgid(nnsi));
*nsip = nnsi;
rc = 0;
}
@@ -1864,6 +1864,16 @@ int dso__load(struct dso *dso, struct map *map)
nsinfo__mountns_exit(&nsc);
is_reg = is_regular_file(name);
+ if (!is_reg && errno == ENOENT && dso->nsinfo) {
+ char *new_name = filename_with_chroot(dso->nsinfo->pid,
+ name);
+ if (new_name) {
+ is_reg = is_regular_file(new_name);
+ strlcpy(name, new_name, PATH_MAX);
+ free(new_name);
+ }
+ }
+
#ifdef HAVE_LIBBFD_SUPPORT
if (is_reg)
bfdrc = dso__load_bfd_symbols(dso, name);
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index ef873f2cc38f..f2352dba1875 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h
@@ -28,7 +28,8 @@ typedef int (*event_attr_op)(struct perf_tool *tool,
typedef int (*event_op2)(struct perf_session *session, union perf_event *event);
typedef s64 (*event_op3)(struct perf_session *session, union perf_event *event);
-typedef int (*event_op4)(struct perf_session *session, union perf_event *event, u64 data);
+typedef int (*event_op4)(struct perf_session *session, union perf_event *event, u64 data,
+ const char *str);
typedef int (*event_oe)(struct perf_tool *tool, union perf_event *event,
struct ordered_events *oe);
diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h
index ff8391208ecd..1c2c0a838430 100644
--- a/tools/perf/util/top.h
+++ b/tools/perf/util/top.h
@@ -33,7 +33,10 @@ struct perf_top {
int print_entries, count_filter, delay_secs;
int max_stack;
bool hide_kernel_symbols, hide_user_symbols, zero;
- bool use_tui, use_stdio;
+#ifdef HAVE_SLANG_SUPPORT
+ bool use_tui;
+#endif
+ bool use_stdio;
bool vmlinux_warned;
bool dump_symtab;
bool stitch_lbr;
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index 9634f0ae57be..c9c83a40647c 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -206,7 +206,7 @@ unsigned long long eval_flag(const char *flag)
if (isdigit(flag[0]))
return strtoull(flag, NULL, 0);
- for (i = 0; i < (int)(sizeof(flags)/sizeof(flags[0])); i++)
+ for (i = 0; i < (int)(ARRAY_SIZE(flags)); i++)
if (strcmp(flags[i].name, flag) == 0)
return flags[i].value;
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index fb4f6616b5fa..f8571a66d063 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -431,3 +431,34 @@ void perf_debuginfod_setup(struct perf_debuginfod *di)
pr_debug("DEBUGINFOD_URLS=%s\n", getenv("DEBUGINFOD_URLS"));
}
+
+/*
+ * Return a new filename prepended with task's root directory if it's in
+ * a chroot. Callers should free the returned string.
+ */
+char *filename_with_chroot(int pid, const char *filename)
+{
+ char buf[PATH_MAX];
+ char proc_root[32];
+ char *new_name = NULL;
+ int ret;
+
+ scnprintf(proc_root, sizeof(proc_root), "/proc/%d/root", pid);
+ ret = readlink(proc_root, buf, sizeof(buf) - 1);
+ if (ret <= 0)
+ return NULL;
+
+ /* readlink(2) does not append a null byte to buf */
+ buf[ret] = '\0';
+
+ if (!strcmp(buf, "/"))
+ return NULL;
+
+ if (strstr(buf, "(deleted)"))
+ return NULL;
+
+ if (asprintf(&new_name, "%s/%s", buf, filename) < 0)
+ return NULL;
+
+ return new_name;
+}
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 7b625cbd2dd8..0f78f1e7782d 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -77,4 +77,6 @@ struct perf_debuginfod {
bool set;
};
void perf_debuginfod_setup(struct perf_debuginfod *di);
+
+char *filename_with_chroot(int pid, const char *filename);
#endif /* GIT_COMPAT_UTIL_H */