summaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/lib/api/fs/fs.c17
-rw-r--r--tools/lib/api/fs/fs.h12
-rw-r--r--tools/lib/traceevent/parse-filter.c29
-rw-r--r--tools/perf/Documentation/Makefile4
-rw-r--r--tools/perf/Documentation/itrace.txt1
-rw-r--r--tools/perf/Documentation/perf-bench.txt8
-rw-r--r--tools/perf/Documentation/perf-c2c.txt11
-rw-r--r--tools/perf/Documentation/perf-list.txt8
-rw-r--r--tools/perf/Documentation/perf-report.txt11
-rw-r--r--tools/perf/Documentation/perf-script.txt11
-rw-r--r--tools/perf/Documentation/perf-top.txt9
-rw-r--r--tools/perf/Documentation/perf.data-file-format.txt16
-rw-r--r--tools/perf/bench/Build2
-rw-r--r--tools/perf/bench/bench.h2
-rw-r--r--tools/perf/bench/synthesize.c101
-rw-r--r--tools/perf/builtin-bench.c6
-rw-r--r--tools/perf/builtin-c2c.c12
-rw-r--r--tools/perf/builtin-ftrace.c5
-rw-r--r--tools/perf/builtin-report.c15
-rw-r--r--tools/perf/builtin-script.c318
-rw-r--r--tools/perf/builtin-stat.c5
-rw-r--r--tools/perf/builtin-top.c11
-rw-r--r--tools/perf/design.txt3
-rwxr-xr-xtools/perf/scripts/python/bin/flamegraph-record2
-rwxr-xr-xtools/perf/scripts/python/bin/flamegraph-report3
-rwxr-xr-xtools/perf/scripts/python/flamegraph.py124
-rw-r--r--tools/perf/tests/expr.c4
-rw-r--r--tools/perf/tests/parse-events.c17
-rw-r--r--tools/perf/util/annotate.c20
-rw-r--r--tools/perf/util/arm-spe.c9
-rw-r--r--tools/perf/util/auxtrace.c94
-rw-r--r--tools/perf/util/auxtrace.h14
-rw-r--r--tools/perf/util/bpf-event.c93
-rw-r--r--tools/perf/util/branch.h19
-rw-r--r--tools/perf/util/callchain.h8
-rw-r--r--tools/perf/util/cap.h4
-rw-r--r--tools/perf/util/cs-etm.c11
-rw-r--r--tools/perf/util/dso.c1
-rw-r--r--tools/perf/util/dso.h1
-rw-r--r--tools/perf/util/env.h3
-rw-r--r--tools/perf/util/evlist.c6
-rw-r--r--tools/perf/util/evsel.c35
-rw-r--r--tools/perf/util/evsel.h18
-rw-r--r--tools/perf/util/expr.c16
-rw-r--r--tools/perf/util/expr.h16
-rw-r--r--tools/perf/util/expr.l10
-rw-r--r--tools/perf/util/expr.y6
-rw-r--r--tools/perf/util/header.c108
-rw-r--r--tools/perf/util/header.h1
-rw-r--r--tools/perf/util/hist.c23
-rw-r--r--tools/perf/util/intel-bts.c10
-rw-r--r--tools/perf/util/intel-pt.c95
-rw-r--r--tools/perf/util/machine.c434
-rw-r--r--tools/perf/util/metricgroup.c60
-rw-r--r--tools/perf/util/parse-events.l1
-rw-r--r--tools/perf/util/parse-events.y9
-rw-r--r--tools/perf/util/pmu.c102
-rw-r--r--tools/perf/util/pmu.h9
-rw-r--r--tools/perf/util/record.c62
-rw-r--r--tools/perf/util/s390-cpumcf-kernel.h1
-rw-r--r--tools/perf/util/s390-cpumsf.c11
-rw-r--r--tools/perf/util/sort.c2
-rw-r--r--tools/perf/util/sort.h2
-rw-r--r--tools/perf/util/stat-shadow.c2
-rw-r--r--tools/perf/util/symbol.c1
-rw-r--r--tools/perf/util/synthetic-events.c22
-rw-r--r--tools/perf/util/thread-stack.c57
-rw-r--r--tools/perf/util/thread-stack.h3
-rw-r--r--tools/perf/util/thread.c24
-rw-r--r--tools/perf/util/thread.h15
-rw-r--r--tools/perf/util/top.h1
-rw-r--r--tools/perf/util/util.c1
72 files changed, 1747 insertions, 460 deletions
diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c
index 027b18f7ed8c..82f53d81a7a7 100644
--- a/tools/lib/api/fs/fs.c
+++ b/tools/lib/api/fs/fs.c
@@ -90,6 +90,7 @@ struct fs {
const char * const *mounts;
char path[PATH_MAX];
bool found;
+ bool checked;
long magic;
};
@@ -111,31 +112,37 @@ static struct fs fs__entries[] = {
.name = "sysfs",
.mounts = sysfs__fs_known_mountpoints,
.magic = SYSFS_MAGIC,
+ .checked = false,
},
[FS__PROCFS] = {
.name = "proc",
.mounts = procfs__known_mountpoints,
.magic = PROC_SUPER_MAGIC,
+ .checked = false,
},
[FS__DEBUGFS] = {
.name = "debugfs",
.mounts = debugfs__known_mountpoints,
.magic = DEBUGFS_MAGIC,
+ .checked = false,
},
[FS__TRACEFS] = {
.name = "tracefs",
.mounts = tracefs__known_mountpoints,
.magic = TRACEFS_MAGIC,
+ .checked = false,
},
[FS__HUGETLBFS] = {
.name = "hugetlbfs",
.mounts = hugetlbfs__known_mountpoints,
.magic = HUGETLBFS_MAGIC,
+ .checked = false,
},
[FS__BPF_FS] = {
.name = "bpf",
.mounts = bpf_fs__known_mountpoints,
.magic = BPF_FS_MAGIC,
+ .checked = false,
},
};
@@ -158,6 +165,7 @@ static bool fs__read_mounts(struct fs *fs)
}
fclose(fp);
+ fs->checked = true;
return fs->found = found;
}
@@ -220,6 +228,7 @@ static bool fs__env_override(struct fs *fs)
return false;
fs->found = true;
+ fs->checked = true;
strncpy(fs->path, override_path, sizeof(fs->path) - 1);
fs->path[sizeof(fs->path) - 1] = '\0';
return true;
@@ -246,6 +255,14 @@ static const char *fs__mountpoint(int idx)
if (fs->found)
return (const char *)fs->path;
+ /* the mount point was already checked for the mount point
+ * but and did not exist, so return NULL to avoid scanning again.
+ * This makes the found and not found paths cost equivalent
+ * in case of multiple calls.
+ */
+ if (fs->checked)
+ return NULL;
+
return fs__get_mountpoint(fs);
}
diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h
index 936edb95e1f3..aa222ca30311 100644
--- a/tools/lib/api/fs/fs.h
+++ b/tools/lib/api/fs/fs.h
@@ -18,6 +18,18 @@
const char *name##__mount(void); \
bool name##__configured(void); \
+/*
+ * The xxxx__mountpoint() entry points find the first match mount point for each
+ * filesystems listed below, where xxxx is the filesystem type.
+ *
+ * The interface is as follows:
+ *
+ * - If a mount point is found on first call, it is cached and used for all
+ * subsequent calls.
+ *
+ * - If a mount point is not found, NULL is returned on first call and all
+ * subsequent calls.
+ */
FS(sysfs)
FS(procfs)
FS(debugfs)
diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c
index 20eed719542e..c271aeeb227d 100644
--- a/tools/lib/traceevent/parse-filter.c
+++ b/tools/lib/traceevent/parse-filter.c
@@ -1958,7 +1958,8 @@ static char *op_to_str(struct tep_event_filter *filter, struct tep_filter_arg *a
default:
break;
}
- asprintf(&str, val ? "TRUE" : "FALSE");
+ if (asprintf(&str, val ? "TRUE" : "FALSE") < 0)
+ str = NULL;
break;
}
}
@@ -1976,7 +1977,8 @@ static char *op_to_str(struct tep_event_filter *filter, struct tep_filter_arg *a
break;
}
- asprintf(&str, "(%s) %s (%s)", left, op, right);
+ if (asprintf(&str, "(%s) %s (%s)", left, op, right) < 0)
+ str = NULL;
break;
case TEP_FILTER_OP_NOT:
@@ -1992,10 +1994,12 @@ static char *op_to_str(struct tep_event_filter *filter, struct tep_filter_arg *a
right_val = 0;
if (right_val >= 0) {
/* just return the opposite */
- asprintf(&str, right_val ? "FALSE" : "TRUE");
+ if (asprintf(&str, right_val ? "FALSE" : "TRUE") < 0)
+ str = NULL;
break;
}
- asprintf(&str, "%s(%s)", op, right);
+ if (asprintf(&str, "%s(%s)", op, right) < 0)
+ str = NULL;
break;
default:
@@ -2011,7 +2015,8 @@ static char *val_to_str(struct tep_event_filter *filter, struct tep_filter_arg *
{
char *str = NULL;
- asprintf(&str, "%lld", arg->value.val);
+ if (asprintf(&str, "%lld", arg->value.val) < 0)
+ str = NULL;
return str;
}
@@ -2069,7 +2074,8 @@ static char *exp_to_str(struct tep_event_filter *filter, struct tep_filter_arg *
break;
}
- asprintf(&str, "%s %s %s", lstr, op, rstr);
+ if (asprintf(&str, "%s %s %s", lstr, op, rstr) < 0)
+ str = NULL;
out:
free(lstr);
free(rstr);
@@ -2113,7 +2119,8 @@ static char *num_to_str(struct tep_event_filter *filter, struct tep_filter_arg *
if (!op)
op = "<=";
- asprintf(&str, "%s %s %s", lstr, op, rstr);
+ if (asprintf(&str, "%s %s %s", lstr, op, rstr) < 0)
+ str = NULL;
break;
default:
@@ -2148,8 +2155,9 @@ static char *str_to_str(struct tep_event_filter *filter, struct tep_filter_arg *
if (!op)
op = "!~";
- asprintf(&str, "%s %s \"%s\"",
- arg->str.field->name, op, arg->str.val);
+ if (asprintf(&str, "%s %s \"%s\"",
+ arg->str.field->name, op, arg->str.val) < 0)
+ str = NULL;
break;
default:
@@ -2165,7 +2173,8 @@ static char *arg_to_str(struct tep_event_filter *filter, struct tep_filter_arg *
switch (arg->type) {
case TEP_FILTER_ARG_BOOLEAN:
- asprintf(&str, arg->boolean.value ? "TRUE" : "FALSE");
+ if (asprintf(&str, arg->boolean.value ? "TRUE" : "FALSE") < 0)
+ str = NULL;
return str;
case TEP_FILTER_ARG_OP:
diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile
index 31824d5269cc..6e54979c2124 100644
--- a/tools/perf/Documentation/Makefile
+++ b/tools/perf/Documentation/Makefile
@@ -48,7 +48,7 @@ man5dir=$(mandir)/man5
man7dir=$(mandir)/man7
ASCIIDOC=asciidoc
-ASCIIDOC_EXTRA = --unsafe -f asciidoc.conf
+ASCIIDOC_EXTRA += --unsafe -f asciidoc.conf
ASCIIDOC_HTML = xhtml11
MANPAGE_XSL = manpage-normal.xsl
XMLTO_EXTRA =
@@ -59,7 +59,7 @@ HTML_REF = origin/html
ifdef USE_ASCIIDOCTOR
ASCIIDOC = asciidoctor
-ASCIIDOC_EXTRA = -a compat-mode
+ASCIIDOC_EXTRA += -a compat-mode
ASCIIDOC_EXTRA += -I. -rasciidoctor-extensions
ASCIIDOC_EXTRA += -a mansource="perf" -a manmanual="perf Manual"
ASCIIDOC_HTML = xhtml5
diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt
index 82ff7dad40c2..671e154ede03 100644
--- a/tools/perf/Documentation/itrace.txt
+++ b/tools/perf/Documentation/itrace.txt
@@ -10,6 +10,7 @@
e synthesize error events
d create a debug log
g synthesize a call chain (use with i or x)
+ G synthesize a call chain on existing event records
l synthesize last branch entries (use with i or x)
s skip initial number of events
diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt
index 0921a3c67381..bad16512c48d 100644
--- a/tools/perf/Documentation/perf-bench.txt
+++ b/tools/perf/Documentation/perf-bench.txt
@@ -61,6 +61,9 @@ SUBSYSTEM
'epoll'::
Eventpoll (epoll) stressing benchmarks.
+'internals'::
+ Benchmark internal perf functionality.
+
'all'::
All benchmark subsystems.
@@ -214,6 +217,11 @@ Suite for evaluating concurrent epoll_wait calls.
*ctl*::
Suite for evaluating multiple epoll_ctl calls.
+SUITES FOR 'internals'
+~~~~~~~~~~~~~~~~~~~~~~
+*synthesize*::
+Suite for evaluating perf's event synthesis performance.
+
SEE ALSO
--------
linkperf:perf[1]
diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt
index e6150f21267d..2133eb320cb0 100644
--- a/tools/perf/Documentation/perf-c2c.txt
+++ b/tools/perf/Documentation/perf-c2c.txt
@@ -111,6 +111,17 @@ REPORT OPTIONS
--display::
Switch to HITM type (rmt, lcl) to display and sort on. Total HITMs as default.
+--stitch-lbr::
+ Show callgraph with stitched LBRs, which may have more complete
+ callgraph. The perf.data file must have been obtained using
+ perf c2c record --call-graph lbr.
+ Disabled by default. In common cases with call stack overflows,
+ it can recreate better call stacks than the default lbr call stack
+ output. But this approach is not full proof. There can be cases
+ where it creates incorrect call stacks from incorrect matches.
+ The known limitations include exception handing such as
+ setjmp/longjmp will have calls/returns not match.
+
C2C RECORD
----------
The perf c2c record command setup options related to HITM cacheline analysis
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index 6345db33c533..376a50b3452d 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -115,6 +115,11 @@ raw encoding of 0x1A8 can be used:
perf stat -e r1a8 -a sleep 1
perf record -e r1a8 ...
+It's also possible to use pmu syntax:
+
+ perf record -e r1a8 -a sleep 1
+ perf record -e cpu/r1a8/ ...
+
You should refer to the processor specific documentation for getting these
details. Some of them are referenced in the SEE ALSO section below.
@@ -258,6 +263,9 @@ Normally all events in an event group sample, but with :S only
the first event (the leader) samples, and it only reads the values of the
other events in the group.
+However, in the case AUX area events (e.g. Intel PT or CoreSight), the AUX
+area event must be the leader, so then the second event samples, not the first.
+
OPTIONS
-------
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index f569b9ea4002..d068103690cc 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -488,6 +488,17 @@ include::itrace.txt[]
This option extends the perf report to show reference callgraphs,
which collected by reference event, in no callgraph event.
+--stitch-lbr::
+ Show callgraph with stitched LBRs, which may have more complete
+ callgraph. The perf.data file must have been obtained using
+ perf record --call-graph lbr.
+ Disabled by default. In common cases with call stack overflows,
+ it can recreate better call stacks than the default lbr call stack
+ output. But this approach is not full proof. There can be cases
+ where it creates incorrect call stacks from incorrect matches.
+ The known limitations include exception handing such as
+ setjmp/longjmp will have calls/returns not match.
+
--socket-filter::
Only report the samples on the processor socket that match with this filter
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 963487e82edc..372dfd110e6d 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -440,6 +440,17 @@ include::itrace.txt[]
--show-on-off-events::
Show the --switch-on/off events too.
+--stitch-lbr::
+ Show callgraph with stitched LBRs, which may have more complete
+ callgraph. The perf.data file must have been obtained using
+ perf record --call-graph lbr.
+ Disabled by default. In common cases with call stack overflows,
+ it can recreate better call stacks than the default lbr call stack
+ output. But this approach is not full proof. There can be cases
+ where it creates incorrect call stacks from incorrect matches.
+ The known limitations include exception handing such as
+ setjmp/longjmp will have calls/returns not match.
+
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-script-perl[1],
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 487737a725e9..20227dabc208 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -319,6 +319,15 @@ Default is to monitor all CPUS.
go straight to the histogram browser, just like 'perf top' with no events
explicitely specified does.
+--stitch-lbr::
+ Show callgraph with stitched LBRs, which may have more complete
+ callgraph. The option must be used with --call-graph lbr recording.
+ Disabled by default. In common cases with call stack overflows,
+ it can recreate better call stacks than the default lbr call stack
+ output. But this approach is not full proof. There can be cases
+ where it creates incorrect call stacks from incorrect matches.
+ The known limitations include exception handing such as
+ setjmp/longjmp will have calls/returns not match.
INTERACTIVE PROMPTING KEYS
--------------------------
diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt
index b0152e1095c5..b6472e463284 100644
--- a/tools/perf/Documentation/perf.data-file-format.txt
+++ b/tools/perf/Documentation/perf.data-file-format.txt
@@ -373,6 +373,22 @@ struct {
Indicates that trace contains records of PERF_RECORD_COMPRESSED type
that have perf_events records in compressed form.
+ HEADER_CPU_PMU_CAPS = 28,
+
+ A list of cpu PMU capabilities. The format of data is as below.
+
+struct {
+ u32 nr_cpu_pmu_caps;
+ {
+ char name[];
+ char value[];
+ } [nr_cpu_pmu_caps]
+};
+
+
+Example:
+ cpu pmu capabilities: branches=32, max_precise=3, pmu_name=icelake
+
other bits are reserved and should ignored for now
HEADER_FEAT_BITS = 256,
diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build
index e4e321b6f883..042827385c87 100644
--- a/tools/perf/bench/Build
+++ b/tools/perf/bench/Build
@@ -6,9 +6,9 @@ perf-y += futex-wake.o
perf-y += futex-wake-parallel.o
perf-y += futex-requeue.o
perf-y += futex-lock-pi.o
-
perf-y += epoll-wait.o
perf-y += epoll-ctl.o
+perf-y += synthesize.o
perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-lib.o
perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index 4aa6de1aa67d..4d669c803237 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -41,9 +41,9 @@ int bench_futex_wake_parallel(int argc, const char **argv);
int bench_futex_requeue(int argc, const char **argv);
/* pi futexes */
int bench_futex_lock_pi(int argc, const char **argv);
-
int bench_epoll_wait(int argc, const char **argv);
int bench_epoll_ctl(int argc, const char **argv);
+int bench_synthesize(int argc, const char **argv);
#define BENCH_FORMAT_DEFAULT_STR "default"
#define BENCH_FORMAT_DEFAULT 0
diff --git a/tools/perf/bench/synthesize.c b/tools/perf/bench/synthesize.c
new file mode 100644
index 000000000000..6291257bc9c9
--- /dev/null
+++ b/tools/perf/bench/synthesize.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Benchmark synthesis of perf events such as at the start of a 'perf
+ * record'. Synthesis is done on the current process and the 'dummy' event
+ * handlers are invoked that support dump_trace but otherwise do nothing.
+ *
+ * Copyright 2019 Google LLC.
+ */
+#include <stdio.h>
+#include "bench.h"
+#include "../util/debug.h"
+#include "../util/session.h"
+#include "../util/synthetic-events.h"
+#include "../util/target.h"
+#include "../util/thread_map.h"
+#include "../util/tool.h"
+#include <linux/err.h>
+#include <linux/time64.h>
+#include <subcmd/parse-options.h>
+
+static unsigned int iterations = 10000;
+
+static const struct option options[] = {
+ OPT_UINTEGER('i', "iterations", &iterations,
+ "Number of iterations used to compute average"),
+ OPT_END()
+};
+
+static const char *const usage[] = {
+ "perf bench internals synthesize <options>",
+ NULL
+};
+
+
+static int do_synthesize(struct perf_session *session,
+ struct perf_thread_map *threads,
+ struct target *target, bool data_mmap)
+{
+ const unsigned int nr_threads_synthesize = 1;
+ struct timeval start, end, diff;
+ u64 runtime_us;
+ unsigned int i;
+ double average;
+ int err;
+
+ gettimeofday(&start, NULL);
+ for (i = 0; i < iterations; i++) {
+ err = machine__synthesize_threads(&session->machines.host,
+ target, threads, data_mmap,
+ nr_threads_synthesize);
+ if (err)
+ return err;
+ }
+
+ gettimeofday(&end, NULL);
+ timersub(&end, &start, &diff);
+ runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
+ average = (double)runtime_us/(double)iterations;
+ printf("Average %ssynthesis took: %f usec\n",
+ data_mmap ? "data " : "", average);
+ return 0;
+}
+
+int bench_synthesize(int argc, const char **argv)
+{
+ struct perf_tool tool;
+ struct perf_session *session;
+ struct target target = {
+ .pid = "self",
+ };
+ struct perf_thread_map *threads;
+ int err;
+
+ argc = parse_options(argc, argv, options, usage, 0);
+
+ session = perf_session__new(NULL, false, NULL);
+ if (IS_ERR(session)) {
+ pr_err("Session creation failed.\n");
+ return PTR_ERR(session);
+ }
+ threads = thread_map__new_by_pid(getpid());
+ if (!threads) {
+ pr_err("Thread map creation failed.\n");
+ err = -ENOMEM;
+ goto err_out;
+ }
+ perf_tool__fill_defaults(&tool);
+
+ err = do_synthesize(session, threads, &target, false);
+ if (err)
+ goto err_out;
+
+ err = do_synthesize(session, threads, &target, true);
+
+err_out:
+ if (threads)
+ perf_thread_map__put(threads);
+
+ perf_session__delete(session);
+ return err;
+}
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index c06fe21c8613..11c79a8d85d6 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -76,6 +76,11 @@ static struct bench epoll_benchmarks[] = {
};
#endif // HAVE_EVENTFD
+static struct bench internals_benchmarks[] = {
+ { "synthesize", "Benchmark perf event synthesis", bench_synthesize },
+ { NULL, NULL, NULL }
+};
+
struct collection {
const char *name;
const char *summary;
@@ -92,6 +97,7 @@ static struct collection collections[] = {
#ifdef HAVE_EVENTFD
{"epoll", "Epoll stressing benchmarks", epoll_benchmarks },
#endif
+ { "internals", "Perf-internals benchmarks", internals_benchmarks },
{ "all", "All benchmarks", NULL },
{ NULL, NULL, NULL }
};
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 246ac0b4d54f..0d544c4fb4be 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -95,6 +95,7 @@ struct perf_c2c {
bool use_stdio;
bool stats_only;
bool symbol_full;
+ bool stitch_lbr;
/* HITM shared clines stats */
struct c2c_stats hitm_stats;
@@ -273,6 +274,9 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
return -1;
}
+ if (c2c.stitch_lbr)
+ al.thread->lbr_stitch_enable = true;
+
ret = sample__resolve_callchain(sample, &callchain_cursor, NULL,
evsel, &al, sysctl_perf_event_max_stack);
if (ret)
@@ -2601,6 +2605,12 @@ static int setup_callchain(struct evlist *evlist)
}
}
+ if (c2c.stitch_lbr && (mode != CALLCHAIN_LBR)) {
+ ui__warning("Can't find LBR callchain. Switch off --stitch-lbr.\n"
+ "Please apply --call-graph lbr when recording.\n");
+ c2c.stitch_lbr = false;
+ }
+
callchain_param.record_mode = mode;
callchain_param.min_percent = 0;
return 0;
@@ -2752,6 +2762,8 @@ static int perf_c2c__report(int argc, const char **argv)
OPT_STRING('c', "coalesce", &coalesce, "coalesce fields",
"coalesce fields: pid,tid,iaddr,dso"),
OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"),
+ OPT_BOOLEAN(0, "stitch-lbr", &c2c.stitch_lbr,
+ "Enable LBR callgraph stitching approach"),
OPT_PARENT(c2c_options),
OPT_END()
};
diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index d5adc417a4ca..55eda54240fb 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -284,10 +284,11 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
.events = POLLIN,
};
- if (!perf_cap__capable(CAP_SYS_ADMIN)) {
+ if (!(perf_cap__capable(CAP_PERFMON) ||
+ perf_cap__capable(CAP_SYS_ADMIN))) {
pr_err("ftrace only works for %s!\n",
#ifdef HAVE_LIBCAP_SUPPORT
- "users with the SYS_ADMIN capability"
+ "users with the CAP_PERFMON or CAP_SYS_ADMIN capability"
#else
"root"
#endif
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 26d8fc27e427..0c32767b1c56 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -84,6 +84,7 @@ struct report {
bool header_only;
bool nonany_branch_mode;
bool group_set;
+ bool stitch_lbr;
int max_stack;
struct perf_read_values show_threads_values;
struct annotation_options annotation_opts;
@@ -267,6 +268,9 @@ static int process_sample_event(struct perf_tool *tool,
return -1;
}
+ if (rep->stitch_lbr)
+ al.thread->lbr_stitch_enable = true;
+
if (symbol_conf.hide_unresolved && al.sym == NULL)
goto out_put;
@@ -339,6 +343,7 @@ static int report__setup_sample_type(struct report *rep)
bool is_pipe = perf_data__is_pipe(session->data);
if (session->itrace_synth_opts->callchain ||
+ session->itrace_synth_opts->add_callchain ||
(!is_pipe &&
perf_header__has_feat(&session->header, HEADER_AUXTRACE) &&
!session->itrace_synth_opts->set))
@@ -407,6 +412,12 @@ static int report__setup_sample_type(struct report *rep)
callchain_param.record_mode = CALLCHAIN_FP;
}
+ if (rep->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) {
+ ui__warning("Can't find LBR callchain. Switch off --stitch-lbr.\n"
+ "Please apply --call-graph lbr when recording.\n");
+ rep->stitch_lbr = false;
+ }
+
/* ??? handle more cases than just ANY? */
if (!(perf_evlist__combined_branch_type(session->evlist) &
PERF_SAMPLE_BRANCH_ANY))
@@ -1257,6 +1268,8 @@ int cmd_report(int argc, const char **argv)
"Show full source file name path for source lines"),
OPT_BOOLEAN(0, "show-ref-call-graph", &symbol_conf.show_ref_callgraph,
"Show callgraph from reference event"),
+ OPT_BOOLEAN(0, "stitch-lbr", &report.stitch_lbr,
+ "Enable LBR callgraph stitching approach"),
OPT_INTEGER(0, "socket-filter", &report.socket_filter,
"only show processor socket that match with this filter"),
OPT_BOOLEAN(0, "raw-trace", &symbol_conf.raw_trace,
@@ -1332,7 +1345,7 @@ int cmd_report(int argc, const char **argv)
if (symbol_conf.cumulate_callchain && !callchain_param.order_set)
callchain_param.order = ORDER_CALLER;
- if (itrace_synth_opts.callchain &&
+ if ((itrace_synth_opts.callchain || itrace_synth_opts.add_callchain) &&
(int)itrace_synth_opts.callchain_sz > report.max_stack)
report.max_stack = itrace_synth_opts.callchain_sz;
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 1f57a7ecdf3d..a2236542900d 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -1697,6 +1697,7 @@ struct perf_script {
bool show_cgroup_events;
bool allocated;
bool per_event_dump;
+ bool stitch_lbr;
struct evswitch evswitch;
struct perf_cpu_map *cpus;
struct perf_thread_map *threads;
@@ -1923,6 +1924,9 @@ static void process_event(struct perf_script *script,
if (PRINT_FIELD(IP)) {
struct callchain_cursor *cursor = NULL;
+ if (script->stitch_lbr)
+ al->thread->lbr_stitch_enable = true;
+
if (symbol_conf.use_callchain && sample->callchain &&
thread__resolve_callchain(al->thread, &callchain_cursor, evsel,
sample, NULL, NULL, scripting_max_stack) == 0)
@@ -2040,7 +2044,7 @@ static int cleanup_scripting(void)
static bool filter_cpu(struct perf_sample *sample)
{
- if (cpu_list)
+ if (cpu_list && sample->cpu != (u32)-1)
return !test_bit(sample->cpu, cpu_bitmap);
return false;
}
@@ -2138,41 +2142,59 @@ static int process_attr(struct perf_tool *tool, union perf_event *event,
return err;
}
-static int process_comm_event(struct perf_tool *tool,
- union perf_event *event,
- struct perf_sample *sample,
- struct machine *machine)
+static int print_event_with_time(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine,
+ pid_t pid, pid_t tid, u64 timestamp)
{
- struct thread *thread;
struct perf_script *script = container_of(tool, struct perf_script, tool);
struct perf_session *session = script->session;
struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
- int ret = -1;
+ struct thread *thread = NULL;
- thread = machine__findnew_thread(machine, event->comm.pid, event->comm.tid);
- if (thread == NULL) {
- pr_debug("problem processing COMM event, skipping it.\n");
- return -1;
+ if (evsel && !evsel->core.attr.sample_id_all) {
+ sample->cpu = 0;
+ sample->time = timestamp;
+ sample->pid = pid;
+ sample->tid = tid;
}
- if (perf_event__process_comm(tool, event, sample, machine) < 0)
- goto out;
+ if (filter_cpu(sample))
+ return 0;
- if (!evsel->core.attr.sample_id_all) {
- sample->cpu = 0;
- sample->time = 0;
- sample->tid = event->comm.tid;
- sample->pid = event->comm.pid;
- }
- if (!filter_cpu(sample)) {
+ if (tid != -1)
+ thread = machine__findnew_thread(machine, pid, tid);
+
+ if (thread && evsel) {
perf_sample__fprintf_start(sample, thread, evsel,
- PERF_RECORD_COMM, stdout);
- perf_event__fprintf(event, stdout);
+ event->header.type, stdout);
}
- ret = 0;
-out:
+
+ perf_event__fprintf(event, stdout);
+
thread__put(thread);
- return ret;
+
+ return 0;
+}
+
+static int print_event(struct perf_tool *tool, union perf_event *event,
+ struct perf_sample *sample, struct machine *machine,
+ pid_t pid, pid_t tid)
+{
+ return print_event_with_time(tool, event, sample, machine, pid, tid, 0);
+}
+
+static int process_comm_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ if (perf_event__process_comm(tool, event, sample, machine) < 0)
+ return -1;
+
+ return print_event(tool, event, sample, machine, event->comm.pid,
+ event->comm.tid);
}
static int process_namespaces_event(struct perf_tool *tool,
@@ -2180,37 +2202,11 @@ static int process_namespaces_event(struct perf_tool *tool,
struct perf_sample *sample,
struct machine *machine)
{
- struct thread *thread;
- struct perf_script *script = container_of(tool, struct perf_script, tool);
- struct perf_session *session = script->session;
- struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
- int ret = -1;
-
- thread = machine__findnew_thread(machine, event->namespaces.pid,
- event->namespaces.tid);
- if (thread == NULL) {
- pr_debug("problem processing NAMESPACES event, skipping it.\n");
- return -1;
- }
-
if (perf_event__process_namespaces(tool, event, sample, machine) < 0)
- goto out;
+ return -1;
- if (!evsel->core.attr.sample_id_all) {
- sample->cpu = 0;
- sample->time = 0;
- sample->tid = event->namespaces.tid;
- sample->pid = event->namespaces.pid;
- }
- if (!filter_cpu(sample)) {
- perf_sample__fprintf_start(sample, thread, evsel,
- PERF_RECORD_NAMESPACES, stdout);
- perf_event__fprintf(event, stdout);
- }
- ret = 0;
-out:
- thread__put(thread);
- return ret;
+ return print_event(tool, event, sample, machine, event->namespaces.pid,
+ event->namespaces.tid);
}
static int process_cgroup_event(struct perf_tool *tool,
@@ -2218,34 +2214,11 @@ static int process_cgroup_event(struct perf_tool *tool,
struct perf_sample *sample,
struct machine *machine)
{
- struct thread *thread;
- struct perf_script *script = container_of(tool, struct perf_script, tool);
- struct perf_session *session = script->session;
- struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
- int ret = -1;
-
- thread = machine__findnew_thread(machine, sample->pid, sample->tid);
- if (thread == NULL) {
- pr_debug("problem processing CGROUP event, skipping it.\n");
- return -1;
- }
-
if (perf_event__process_cgroup(tool, event, sample, machine) < 0)
- goto out;
+ return -1;
- if (!evsel->core.attr.sample_id_all) {
- sample->cpu = 0;
- sample->time = 0;
- }
- if (!filter_cpu(sample)) {
- perf_sample__fprintf_start(sample, thread, evsel,
- PERF_RECORD_CGROUP, stdout);
- perf_event__fprintf(event, stdout);
- }
- ret = 0;
-out:
- thread__put(thread);
- return ret;
+ return print_event(tool, event, sample, machine, sample->pid,
+ sample->tid);
}
static int process_fork_event(struct perf_tool *tool,
@@ -2253,69 +2226,24 @@ static int process_fork_event(struct perf_tool *tool,
struct perf_sample *sample,
struct machine *machine)
{
- struct thread *thread;
- struct perf_script *script = container_of(tool, struct perf_script, tool);
- struct perf_session *session = script->session;
- struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
-
if (perf_event__process_fork(tool, event, sample, machine) < 0)
return -1;
- thread = machine__findnew_thread(machine, event->fork.pid, event->fork.tid);
- if (thread == NULL) {
- pr_debug("problem processing FORK event, skipping it.\n");
- return -1;
- }
-
- if (!evsel->core.attr.sample_id_all) {
- sample->cpu = 0;
- sample->time = event->fork.time;
- sample->tid = event->fork.tid;
- sample->pid = event->fork.pid;
- }
- if (!filter_cpu(sample)) {
- perf_sample__fprintf_start(sample, thread, evsel,
- PERF_RECORD_FORK, stdout);
- perf_event__fprintf(event, stdout);
- }
- thread__put(thread);
-
- return 0;
+ return print_event_with_time(tool, event, sample, machine,
+ event->fork.pid, event->fork.tid,
+ event->fork.time);
}
static int process_exit_event(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
{
- int err = 0;
- struct thread *thread;
- struct perf_script *script = container_of(tool, struct perf_script, tool);
- struct perf_session *session = script->session;
- struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
-
- thread = machine__findnew_thread(machine, event->fork.pid, event->fork.tid);
- if (thread == NULL) {
- pr_debug("problem processing EXIT event, skipping it.\n");
+ /* Print before 'exit' deletes anything */
+ if (print_event_with_time(tool, event, sample, machine, event->fork.pid,
+ event->fork.tid, event->fork.time))
return -1;
- }
-
- if (!evsel->core.attr.sample_id_all) {
- sample->cpu = 0;
- sample->time = 0;
- sample->tid = event->fork.tid;
- sample->pid = event->fork.pid;
- }
- if (!filter_cpu(sample)) {
- perf_sample__fprintf_start(sample, thread, evsel,
- PERF_RECORD_EXIT, stdout);
- perf_event__fprintf(event, stdout);
- }
- if (perf_event__process_exit(tool, event, sample, machine) < 0)
- err = -1;
-
- thread__put(thread);
- return err;
+ return perf_event__process_exit(tool, event, sample, machine);
}
static int process_mmap_event(struct perf_tool *tool,
@@ -2323,33 +2251,11 @@ static int process_mmap_event(struct perf_tool *tool,
struct perf_sample *sample,
struct machine *machine)
{
- struct thread *thread;
- struct perf_script *script = container_of(tool, struct perf_script, tool);
- struct perf_session *session = script->session;
- struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
-
if (perf_event__process_mmap(tool, event, sample, machine) < 0)
return -1;
- thread = machine__findnew_thread(machine, event->mmap.pid, event->mmap.tid);
- if (thread == NULL) {
- pr_debug("problem processing MMAP event, skipping it.\n");
- return -1;
- }
-
- if (!evsel->core.attr.sample_id_all) {
- sample->cpu = 0;
- sample->time = 0;
- sample->tid = event->mmap.tid;
- sample->pid = event->mmap.pid;
- }
- if (!filter_cpu(sample)) {
- perf_sample__fprintf_start(sample, thread, evsel,
- PERF_RECORD_MMAP, stdout);
- perf_event__fprintf(event, stdout);
- }
- thread__put(thread);
- return 0;
+ return print_event(tool, event, sample, machine, event->mmap.pid,
+ event->mmap.tid);
}
static int process_mmap2_event(struct perf_tool *tool,
@@ -2357,33 +2263,11 @@ static int process_mmap2_event(struct perf_tool *tool,
struct perf_sample *sample,
struct machine *machine)
{
- struct thread *thread;
- struct perf_script *script = container_of(tool, struct perf_script, tool);
- struct perf_session *session = script->session;
- struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
-
if (perf_event__process_mmap2(tool, event, sample, machine) < 0)
return -1;
- thread = machine__findnew_thread(machine, event->mmap2.pid, event->mmap2.tid);
- if (thread == NULL) {
- pr_debug("problem processing MMAP2 event, skipping it.\n");
- return -1;
- }
-
- if (!evsel->core.attr.sample_id_all) {
- sample->cpu = 0;
- sample->time = 0;
- sample->tid = event->mmap2.tid;
- sample->pid = event->mmap2.pid;
- }
- if (!filter_cpu(sample)) {
- perf_sample__fprintf_start(sample, thread, evsel,
- PERF_RECORD_MMAP2, stdout);
- perf_event__fprintf(event, stdout);
- }
- thread__put(thread);
- return 0;
+ return print_event(tool, event, sample, machine, event->mmap2.pid,
+ event->mmap2.tid);
}
static int process_switch_event(struct perf_tool *tool,
@@ -2391,10 +2275,7 @@ static int process_switch_event(struct perf_tool *tool,
struct perf_sample *sample,
struct machine *machine)
{
- struct thread *thread;
struct perf_script *script = container_of(tool, struct perf_script, tool);
- struct perf_session *session = script->session;
- struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
if (perf_event__process_switch(tool, event, sample, machine) < 0)
return -1;
@@ -2405,20 +2286,8 @@ static int process_switch_event(struct perf_tool *tool,
if (!script->show_switch_events)
return 0;
- thread = machine__findnew_thread(machine, sample->pid,
- sample->tid);
- if (thread == NULL) {
- pr_debug("problem processing SWITCH event, skipping it.\n");
- return -1;
- }
-
- if (!filter_cpu(sample)) {
- perf_sample__fprintf_start(sample, thread, evsel,
- PERF_RECORD_SWITCH, stdout);
- perf_event__fprintf(event, stdout);
- }
- thread__put(thread);
- return 0;
+ return print_event(tool, event, sample, machine, sample->pid,
+ sample->tid);
}
static int
@@ -2427,23 +2296,8 @@ process_lost_event(struct perf_tool *tool,
struct perf_sample *sample,
struct machine *machine)
{
- struct perf_script *script = container_of(tool, struct perf_script, tool);
- struct perf_session *session = script->session;
- struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
- struct thread *thread;
-
- thread = machine__findnew_thread(machine, sample->pid,
- sample->tid);
- if (thread == NULL)
- return -1;
-
- if (!filter_cpu(sample)) {
- perf_sample__fprintf_start(sample, thread, evsel,
- PERF_RECORD_LOST, stdout);
- perf_event__fprintf(event, stdout);
- }
- thread__put(thread);
- return 0;
+ return print_event(tool, event, sample, machine, sample->pid,
+ sample->tid);
}
static int
@@ -2462,33 +2316,11 @@ process_bpf_events(struct perf_tool *tool __maybe_unused,
struct perf_sample *sample,
struct machine *machine)
{
- struct thread *thread;
- struct perf_script *script = container_of(tool, struct perf_script, tool);
- struct perf_session *session = script->session;
- struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
-
if (machine__process_ksymbol(machine, event, sample) < 0)
return -1;
- if (!evsel->core.attr.sample_id_all) {
- perf_event__fprintf(event, stdout);
- return 0;
- }
-
- thread = machine__findnew_thread(machine, sample->pid, sample->tid);
- if (thread == NULL) {
- pr_debug("problem processing MMAP event, skipping it.\n");
- return -1;
- }
-
- if (!filter_cpu(sample)) {
- perf_sample__fprintf_start(sample, thread, evsel,
- event->header.type, stdout);
- perf_event__fprintf(event, stdout);
- }
-
- thread__put(thread);
- return 0;
+ return print_event(tool, event, sample, machine, sample->pid,
+ sample->tid);
}
static void sig_handler(int sig __maybe_unused)
@@ -3342,6 +3174,12 @@ static void script__setup_sample_type(struct perf_script *script)
else
callchain_param.record_mode = CALLCHAIN_FP;
}
+
+ if (script->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) {
+ pr_warning("Can't find LBR callchain. Switch off --stitch-lbr.\n"
+ "Please apply --call-graph lbr when recording.\n");
+ script->stitch_lbr = false;
+ }
}
static int process_stat_round_event(struct perf_session *session,
@@ -3653,6 +3491,8 @@ int cmd_script(int argc, const char **argv)
"file", "file saving guest os /proc/kallsyms"),
OPT_STRING(0, "guestmodules", &symbol_conf.default_guest_modules,
"file", "file saving guest os /proc/modules"),
+ OPT_BOOLEAN('\0', "stitch-lbr", &script.stitch_lbr,
+ "Enable LBR callgraph stitching approach"),
OPTS_EVSWITCH(&script.evswitch),
OPT_END()
};
@@ -3709,7 +3549,7 @@ int cmd_script(int argc, const char **argv)
return -1;
}
- if (itrace_synth_opts.callchain &&
+ if ((itrace_synth_opts.callchain || itrace_synth_opts.add_callchain) &&
itrace_synth_opts.callchain_sz > scripting_max_stack)
scripting_max_stack = itrace_synth_opts.callchain_sz;
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index ec053dc1e35c..9207b6c45475 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -686,8 +686,11 @@ try_again_reset:
break;
}
}
- if (child_pid != -1)
+ if (child_pid != -1) {
+ if (timeout)
+ kill(child_pid, SIGTERM);
wait4(child_pid, &status, 0, &stat_config.ru_data);
+ }
if (workload_exec_errno) {
const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 289cf83e658a..6b067a5ba1d5 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -33,6 +33,7 @@
#include "util/map.h"
#include "util/mmap.h"
#include "util/session.h"
+#include "util/thread.h"
#include "util/symbol.h"
#include "util/synthetic-events.h"
#include "util/top.h"
@@ -775,6 +776,9 @@ static void perf_event__process_sample(struct perf_tool *tool,
if (machine__resolve(machine, &al, sample) < 0)
return;
+ if (top->stitch_lbr)
+ al.thread->lbr_stitch_enable = true;
+
if (!machine->kptr_restrict_warned &&
symbol_conf.kptr_restrict &&
al.cpumode == PERF_RECORD_MISC_KERNEL) {
@@ -1571,6 +1575,8 @@ int cmd_top(int argc, const char **argv)
"Sort the output by the event at the index n in group. "
"If n is invalid, sort by the first event. "
"WARNING: should be used on grouped events."),
+ OPT_BOOLEAN(0, "stitch-lbr", &top.stitch_lbr,
+ "Enable LBR callgraph stitching approach"),
OPTS_EVSWITCH(&top.evswitch),
OPT_END()
};
@@ -1640,6 +1646,11 @@ int cmd_top(int argc, const char **argv)
}
}
+ if (top.stitch_lbr && !(callchain_param.record_mode == CALLCHAIN_LBR)) {
+ pr_err("Error: --stitch-lbr must be used with --call-graph lbr\n");
+ goto out_delete_evlist;
+ }
+
if (opts->branch_stack && callchain_param.enabled)
symbol_conf.show_branchflag_count = true;
diff --git a/tools/perf/design.txt b/tools/perf/design.txt
index 0453ba26cdbd..a42fab308ff6 100644
--- a/tools/perf/design.txt
+++ b/tools/perf/design.txt
@@ -258,7 +258,8 @@ gets schedule to. Per task counters can be created by any user, for
their own tasks.
A 'pid == -1' and 'cpu == x' counter is a per CPU counter that counts
-all events on CPU-x. Per CPU counters need CAP_SYS_ADMIN privilege.
+all events on CPU-x. Per CPU counters need CAP_PERFMON or CAP_SYS_ADMIN
+privilege.
The 'flags' parameter is currently unused and must be zero.
diff --git a/tools/perf/scripts/python/bin/flamegraph-record b/tools/perf/scripts/python/bin/flamegraph-record
new file mode 100755
index 000000000000..725d66e71570
--- /dev/null
+++ b/tools/perf/scripts/python/bin/flamegraph-record
@@ -0,0 +1,2 @@
+#!/usr/bin/sh
+perf record -g "$@"
diff --git a/tools/perf/scripts/python/bin/flamegraph-report b/tools/perf/scripts/python/bin/flamegraph-report
new file mode 100755
index 000000000000..b1a79afd903b
--- /dev/null
+++ b/tools/perf/scripts/python/bin/flamegraph-report
@@ -0,0 +1,3 @@
+#!/usr/bin/sh
+# description: create flame graphs
+perf script -s "$PERF_EXEC_PATH"/scripts/python/flamegraph.py -- "$@"
diff --git a/tools/perf/scripts/python/flamegraph.py b/tools/perf/scripts/python/flamegraph.py
new file mode 100755
index 000000000000..61f3be9add6b
--- /dev/null
+++ b/tools/perf/scripts/python/flamegraph.py
@@ -0,0 +1,124 @@
+# flamegraph.py - create flame graphs from perf samples
+# SPDX-License-Identifier: GPL-2.0
+#
+# Usage:
+#
+# perf record -a -g -F 99 sleep 60
+# perf script report flamegraph
+#
+# Combined:
+#
+# perf script flamegraph -a -F 99 sleep 60
+#
+# Written by Andreas Gerstmayr <agerstmayr@redhat.com>
+# Flame Graphs invented by Brendan Gregg <bgregg@netflix.com>
+# Works in tandem with d3-flame-graph by Martin Spier <mspier@netflix.com>
+
+from __future__ import print_function
+import sys
+import os
+import argparse
+import json
+
+
+class Node:
+ def __init__(self, name, libtype=""):
+ self.name = name
+ self.libtype = libtype
+ self.value = 0
+ self.children = []
+
+ def toJSON(self):
+ return {
+ "n": self.name,
+ "l": self.libtype,
+ "v": self.value,
+ "c": self.children
+ }
+
+
+class FlameGraphCLI:
+ def __init__(self, args):
+ self.args = args
+ self.stack = Node("root")
+
+ if self.args.format == "html" and \
+ not os.path.isfile(self.args.template):
+ print("Flame Graph template {} does not exist. Please install "
+ "the js-d3-flame-graph (RPM) or libjs-d3-flame-graph (deb) "
+ "package, specify an existing flame graph template "
+ "(--template PATH) or another output format "
+ "(--format FORMAT).".format(self.args.template),
+ file=sys.stderr)
+ sys.exit(1)
+
+ def find_or_create_node(self, node, name, dso):
+ libtype = "kernel" if dso == "[kernel.kallsyms]" else ""
+ if name is None:
+ name = "[unknown]"
+
+ for child in node.children:
+ if child.name == name and child.libtype == libtype:
+ return child
+
+ child = Node(name, libtype)
+ node.children.append(child)
+ return child
+
+ def process_event(self, event):
+ node = self.find_or_create_node(self.stack, event["comm"], None)
+ if "callchain" in event:
+ for entry in reversed(event['callchain']):
+ node = self.find_or_create_node(
+ node, entry.get("sym", {}).get("name"), event.get("dso"))
+ else:
+ node = self.find_or_create_node(
+ node, entry.get("symbol"), event.get("dso"))
+ node.value += 1
+
+ def trace_end(self):
+ json_str = json.dumps(self.stack, default=lambda x: x.toJSON())
+
+ if self.args.format == "html":
+ try:
+ with open(self.args.template) as f:
+ output_str = f.read().replace("/** @flamegraph_json **/",
+ json_str)
+ except IOError as e:
+ print("Error reading template file: {}".format(e), file=sys.stderr)
+ sys.exit(1)
+ output_fn = self.args.output or "flamegraph.html"
+ else:
+ output_str = json_str
+ output_fn = self.args.output or "stacks.json"
+
+ if output_fn == "-":
+ sys.stdout.write(output_str)
+ else:
+ print("dumping data to {}".format(output_fn))
+ try:
+ with open(output_fn, "w") as out:
+ out.write(output_str)
+ except IOError as e:
+ print("Error writing output file: {}".format(e), file=sys.stderr)
+ sys.exit(1)
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(description="Create flame graphs.")
+ parser.add_argument("-f", "--format",
+ default="html", choices=["json", "html"],
+ help="output file format")
+ parser.add_argument("-o", "--output",
+ help="output file name")
+ parser.add_argument("--template",
+ default="/usr/share/d3-flame-graph/d3-flamegraph-base.html",
+ help="path to flamegraph HTML template")
+ parser.add_argument("-i", "--input",
+ help=argparse.SUPPRESS)
+
+ args = parser.parse_args()
+ cli = FlameGraphCLI(args)
+
+ process_event = cli.process_event
+ trace_end = cli.trace_end
diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c
index 28313e59d6f6..ea10fc4412c4 100644
--- a/tools/perf/tests/expr.c
+++ b/tools/perf/tests/expr.c
@@ -6,7 +6,7 @@
#include <string.h>
#include <linux/zalloc.h>
-static int test(struct parse_ctx *ctx, const char *e, double val2)
+static int test(struct expr_parse_ctx *ctx, const char *e, double val2)
{
double val;
@@ -22,7 +22,7 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused)
const char **other;
double val;
int i, ret;
- struct parse_ctx ctx;
+ struct expr_parse_ctx ctx;
int num_other;
expr__ctx_init(&ctx);
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index 091c3aeccc27..902bd9d591a0 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -1356,6 +1356,16 @@ static int test__checkevent_complex_name(struct evlist *evlist)
return 0;
}
+static int test__checkevent_raw_pmu(struct evlist *evlist)
+{
+ struct evsel *evsel = evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong config", 0x1a == evsel->core.attr.config);
+ return 0;
+}
+
static int test__sym_event_slash(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
@@ -1750,7 +1760,12 @@ static struct evlist_test test__events_pmu[] = {
.name = "cpu/name='COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks',period=0x1,event=0x2/ukp",
.check = test__checkevent_complex_name,
.id = 3,
- }
+ },
+ {
+ .name = "software/r1a/",
+ .check = test__checkevent_raw_pmu,
+ .id = 4,
+ },
};
struct terms_test {
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index f1ea0d61eb5b..9760d58e979a 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -1821,6 +1821,24 @@ static int symbol__disassemble_bpf(struct symbol *sym __maybe_unused,
}
#endif // defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT)
+static int
+symbol__disassemble_bpf_image(struct symbol *sym,
+ struct annotate_args *args)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ struct disasm_line *dl;
+
+ args->offset = -1;
+ args->line = strdup("to be implemented");
+ args->line_nr = 0;
+ dl = disasm_line__new(args);
+ if (dl)
+ annotation_line__add(&dl->al, &notes->src->source);
+
+ free(args->line);
+ return 0;
+}
+
/*
* Possibly create a new version of line with tabs expanded. Returns the
* existing or new line, storage is updated if a new line is allocated. If
@@ -1920,6 +1938,8 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO) {
return symbol__disassemble_bpf(sym, args);
+ } else if (dso->binary_type == DSO_BINARY_TYPE__BPF_IMAGE) {
+ return symbol__disassemble_bpf_image(sym, args);
} else if (dso__is_kcore(dso)) {
kce.kcore_filename = symfs_filename;
kce.addr = map__rip_2objdump(map, sym->start);
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index 53be12b23ff4..875a0dd540e5 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -176,6 +176,14 @@ static void arm_spe_free(struct perf_session *session)
free(spe);
}
+static bool arm_spe_evsel_is_auxtrace(struct perf_session *session,
+ struct evsel *evsel)
+{
+ struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace);
+
+ return evsel->core.attr.type == spe->pmu_type;
+}
+
static const char * const arm_spe_info_fmts[] = {
[ARM_SPE_PMU_TYPE] = " PMU Type %"PRId64"\n",
};
@@ -218,6 +226,7 @@ int arm_spe_process_auxtrace_info(union perf_event *event,
spe->auxtrace.flush_events = arm_spe_flush;
spe->auxtrace.free_events = arm_spe_free_events;
spe->auxtrace.free = arm_spe_free;
+ spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace;
session->auxtrace = &spe->auxtrace;
arm_spe_print_info(&auxtrace_info->priv[0]);
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 3571ce72ca28..33ad33378a90 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -58,25 +58,6 @@
#include "symbol/kallsyms.h"
#include <internal/lib.h>
-static struct perf_pmu *perf_evsel__find_pmu(struct evsel *evsel)
-{
- struct perf_pmu *pmu = NULL;
-
- while ((pmu = perf_pmu__scan(pmu)) != NULL) {
- if (pmu->type == evsel->core.attr.type)
- break;
- }
-
- return pmu;
-}
-
-static bool perf_evsel__is_aux_event(struct evsel *evsel)
-{
- struct perf_pmu *pmu = perf_evsel__find_pmu(evsel);
-
- return pmu && pmu->auxtrace;
-}
-
/*
* Make a group from 'leader' to 'last', requiring that the events were not
* already grouped to a different leader.
@@ -1234,29 +1215,79 @@ out_free:
return err;
}
+static void unleader_evsel(struct evlist *evlist, struct evsel *leader)
+{
+ struct evsel *new_leader = NULL;
+ struct evsel *evsel;
+
+ /* Find new leader for the group */
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->leader != leader || evsel == leader)
+ continue;
+ if (!new_leader)
+ new_leader = evsel;
+ evsel->leader = new_leader;
+ }
+
+ /* Update group information */
+ if (new_leader) {
+ zfree(&new_leader->group_name);
+ new_leader->group_name = leader->group_name;
+ leader->group_name = NULL;
+
+ new_leader->core.nr_members = leader->core.nr_members - 1;
+ leader->core.nr_members = 1;
+ }
+}
+
+static void unleader_auxtrace(struct perf_session *session)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(session->evlist, evsel) {
+ if (auxtrace__evsel_is_auxtrace(session, evsel) &&
+ perf_evsel__is_group_leader(evsel)) {
+ unleader_evsel(session->evlist, evsel);
+ }
+ }
+}
+
int perf_event__process_auxtrace_info(struct perf_session *session,
union perf_event *event)
{
enum auxtrace_type type = event->auxtrace_info.type;
+ int err;
if (dump_trace)
fprintf(stdout, " type: %u\n", type);
switch (type) {
case PERF_AUXTRACE_INTEL_PT:
- return intel_pt_process_auxtrace_info(event, session);
+ err = intel_pt_process_auxtrace_info(event, session);
+ break;
case PERF_AUXTRACE_INTEL_BTS:
- return intel_bts_process_auxtrace_info(event, session);
+ err = intel_bts_process_auxtrace_info(event, session);
+ break;
case PERF_AUXTRACE_ARM_SPE:
- return arm_spe_process_auxtrace_info(event, session);
+ err = arm_spe_process_auxtrace_info(event, session);
+ break;
case PERF_AUXTRACE_CS_ETM:
- return cs_etm__process_auxtrace_info(event, session);
+ err = cs_etm__process_auxtrace_info(event, session);
+ break;
case PERF_AUXTRACE_S390_CPUMSF:
- return s390_cpumsf_process_auxtrace_info(event, session);
+ err = s390_cpumsf_process_auxtrace_info(event, session);
+ break;
case PERF_AUXTRACE_UNKNOWN:
default:
return -EINVAL;
}
+
+ if (err)
+ return err;
+
+ unleader_auxtrace(session);
+
+ return 0;
}
s64 perf_event__process_auxtrace(struct perf_session *session,
@@ -1412,8 +1443,12 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
synth_opts->branches = true;
synth_opts->returns = true;
break;
+ case 'G':
case 'g':
- synth_opts->callchain = true;
+ if (p[-1] == 'G')
+ synth_opts->add_callchain = true;
+ else
+ synth_opts->callchain = true;
synth_opts->callchain_sz =
PERF_ITRACE_DEFAULT_CALLCHAIN_SZ;
while (*p == ' ' || *p == ',')
@@ -2577,3 +2612,12 @@ void auxtrace__free(struct perf_session *session)
return session->auxtrace->free(session);
}
+
+bool auxtrace__evsel_is_auxtrace(struct perf_session *session,
+ struct evsel *evsel)
+{
+ if (!session->auxtrace || !session->auxtrace->evsel_is_auxtrace)
+ return false;
+
+ return session->auxtrace->evsel_is_auxtrace(session, evsel);
+}
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index e58ef160b599..dd8a4ff8209e 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -21,6 +21,7 @@
union perf_event;
struct perf_session;
struct evlist;
+struct evsel;
struct perf_tool;
struct mmap;
struct perf_sample;
@@ -73,6 +74,7 @@ enum itrace_period_type {
* @calls: limit branch samples to calls (can be combined with @returns)
* @returns: limit branch samples to returns (can be combined with @calls)
* @callchain: add callchain to 'instructions' events
+ * @add_callchain: add callchain to existing event records
* @thread_stack: feed branches to the thread_stack
* @last_branch: add branch context to 'instruction' events
* @callchain_sz: maximum callchain size
@@ -100,6 +102,7 @@ struct itrace_synth_opts {
bool calls;
bool returns;
bool callchain;
+ bool add_callchain;
bool thread_stack;
bool last_branch;
unsigned int callchain_sz;
@@ -166,6 +169,8 @@ struct auxtrace {
struct perf_tool *tool);
void (*free_events)(struct perf_session *session);
void (*free)(struct perf_session *session);
+ bool (*evsel_is_auxtrace)(struct perf_session *session,
+ struct evsel *evsel);
};
/**
@@ -584,6 +589,8 @@ void auxtrace__dump_auxtrace_sample(struct perf_session *session,
int auxtrace__flush_events(struct perf_session *session, struct perf_tool *tool);
void auxtrace__free_events(struct perf_session *session);
void auxtrace__free(struct perf_session *session);
+bool auxtrace__evsel_is_auxtrace(struct perf_session *session,
+ struct evsel *evsel);
#define ITRACE_HELP \
" i: synthesize instructions events\n" \
@@ -750,6 +757,13 @@ void auxtrace_index__free(struct list_head *head __maybe_unused)
}
static inline
+bool auxtrace__evsel_is_auxtrace(struct perf_session *session __maybe_unused,
+ struct evsel *evsel __maybe_unused)
+{
+ return false;
+}
+
+static inline
int auxtrace_parse_filters(struct evlist *evlist __maybe_unused)
{
return 0;
diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c
index a3207d900339..0cd41a862952 100644
--- a/tools/perf/util/bpf-event.c
+++ b/tools/perf/util/bpf-event.c
@@ -6,6 +6,9 @@
#include <bpf/libbpf.h>
#include <linux/btf.h>
#include <linux/err.h>
+#include <linux/string.h>
+#include <internal/lib.h>
+#include <symbol/kallsyms.h>
#include "bpf-event.h"
#include "debug.h"
#include "dso.h"
@@ -290,11 +293,82 @@ out:
return err ? -1 : 0;
}
+struct kallsyms_parse {
+ union perf_event *event;
+ perf_event__handler_t process;
+ struct machine *machine;
+ struct perf_tool *tool;
+};
+
+static int
+process_bpf_image(char *name, u64 addr, struct kallsyms_parse *data)
+{
+ struct machine *machine = data->machine;
+ union perf_event *event = data->event;
+ struct perf_record_ksymbol *ksymbol;
+ int len;
+
+ ksymbol = &event->ksymbol;
+
+ *ksymbol = (struct perf_record_ksymbol) {
+ .header = {
+ .type = PERF_RECORD_KSYMBOL,
+ .size = offsetof(struct perf_record_ksymbol, name),
+ },
+ .addr = addr,
+ .len = page_size,
+ .ksym_type = PERF_RECORD_KSYMBOL_TYPE_BPF,
+ .flags = 0,
+ };
+
+ len = scnprintf(ksymbol->name, KSYM_NAME_LEN, "%s", name);
+ ksymbol->header.size += PERF_ALIGN(len + 1, sizeof(u64));
+ memset((void *) event + event->header.size, 0, machine->id_hdr_size);
+ event->header.size += machine->id_hdr_size;
+
+ return perf_tool__process_synth_event(data->tool, event, machine,
+ data->process);
+}
+
+static int
+kallsyms_process_symbol(void *data, const char *_name,
+ char type __maybe_unused, u64 start)
+{
+ char disp[KSYM_NAME_LEN];
+ char *module, *name;
+ unsigned long id;
+ int err = 0;
+
+ module = strchr(_name, '\t');
+ if (!module)
+ return 0;
+
+ /* We are going after [bpf] module ... */
+ if (strcmp(module + 1, "[bpf]"))
+ return 0;
+
+ name = memdup(_name, (module - _name) + 1);
+ if (!name)
+ return -ENOMEM;
+
+ name[module - _name] = 0;
+
+ /* .. and only for trampolines and dispatchers */
+ if ((sscanf(name, "bpf_trampoline_%lu", &id) == 1) ||
+ (sscanf(name, "bpf_dispatcher_%s", disp) == 1))
+ err = process_bpf_image(name, start, data);
+
+ free(name);
+ return err;
+}
+
int perf_event__synthesize_bpf_events(struct perf_session *session,
perf_event__handler_t process,
struct machine *machine,
struct record_opts *opts)
{
+ const char *kallsyms_filename = "/proc/kallsyms";
+ struct kallsyms_parse arg;
union perf_event *event;
__u32 id = 0;
int err;
@@ -303,6 +377,8 @@ int perf_event__synthesize_bpf_events(struct perf_session *session,
event = malloc(sizeof(event->bpf) + KSYM_NAME_LEN + machine->id_hdr_size);
if (!event)
return -1;
+
+ /* Synthesize all the bpf programs in system. */
while (true) {
err = bpf_prog_get_next_id(id, &id);
if (err) {
@@ -335,6 +411,23 @@ int perf_event__synthesize_bpf_events(struct perf_session *session,
break;
}
}
+
+ /* Synthesize all the bpf images - trampolines/dispatchers. */
+ if (symbol_conf.kallsyms_name != NULL)
+ kallsyms_filename = symbol_conf.kallsyms_name;
+
+ arg = (struct kallsyms_parse) {
+ .event = event,
+ .process = process,
+ .machine = machine,
+ .tool = session->tool,
+ };
+
+ if (kallsyms__parse(kallsyms_filename, &arg, kallsyms_process_symbol)) {
+ pr_err("%s: failed to synthesize bpf images: %s\n",
+ __func__, strerror(errno));
+ }
+
free(event);
return err;
}
diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h
index 154a05cd03af..4d3f02fa223d 100644
--- a/tools/perf/util/branch.h
+++ b/tools/perf/util/branch.h
@@ -15,13 +15,18 @@
#include "event.h"
struct branch_flags {
- u64 mispred:1;
- u64 predicted:1;
- u64 in_tx:1;
- u64 abort:1;
- u64 cycles:16;
- u64 type:4;
- u64 reserved:40;
+ union {
+ u64 value;
+ struct {
+ u64 mispred:1;
+ u64 predicted:1;
+ u64 in_tx:1;
+ u64 abort:1;
+ u64 cycles:16;
+ u64 type:4;
+ u64 reserved:40;
+ };
+ };
};
struct branch_info {
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 706bb7bbe1e1..8f668ee29f25 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -143,6 +143,9 @@ struct callchain_cursor_node {
u64 ip;
struct map_symbol ms;
const char *srcline;
+ /* Indicate valid cursor node for LBR stitch */
+ bool valid;
+
bool branch;
struct branch_flags branch_flags;
u64 branch_from;
@@ -151,6 +154,11 @@ struct callchain_cursor_node {
struct callchain_cursor_node *next;
};
+struct stitch_list {
+ struct list_head node;
+ struct callchain_cursor_node cursor;
+};
+
struct callchain_cursor {
u64 nr;
struct callchain_cursor_node *first;
diff --git a/tools/perf/util/cap.h b/tools/perf/util/cap.h
index 051dc590ceee..ae52878c0b2e 100644
--- a/tools/perf/util/cap.h
+++ b/tools/perf/util/cap.h
@@ -29,4 +29,8 @@ static inline bool perf_cap__capable(int cap __maybe_unused)
#define CAP_SYSLOG 34
#endif
+#ifndef CAP_PERFMON
+#define CAP_PERFMON 38
+#endif
+
#endif /* __PERF_CAP_H */
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 62d2f9b9ce1b..3c802fde4954 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -631,6 +631,16 @@ static void cs_etm__free(struct perf_session *session)
zfree(&aux);
}
+static bool cs_etm__evsel_is_auxtrace(struct perf_session *session,
+ struct evsel *evsel)
+{
+ struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
+ struct cs_etm_auxtrace,
+ auxtrace);
+
+ return evsel->core.attr.type == aux->pmu_type;
+}
+
static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address)
{
struct machine *machine;
@@ -2618,6 +2628,7 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
etm->auxtrace.flush_events = cs_etm__flush_events;
etm->auxtrace.free_events = cs_etm__free_events;
etm->auxtrace.free = cs_etm__free;
+ etm->auxtrace.evsel_is_auxtrace = cs_etm__evsel_is_auxtrace;
session->auxtrace = &etm->auxtrace;
etm->unknown_thread = thread__new(999999999, 999999999);
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 91f21239608b..f338990e0fe6 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -191,6 +191,7 @@ int dso__read_binary_type_filename(const struct dso *dso,
case DSO_BINARY_TYPE__GUEST_KALLSYMS:
case DSO_BINARY_TYPE__JAVA_JIT:
case DSO_BINARY_TYPE__BPF_PROG_INFO:
+ case DSO_BINARY_TYPE__BPF_IMAGE:
case DSO_BINARY_TYPE__NOT_FOUND:
ret = -1;
break;
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index 2db64b79617a..9553a1fd9e8a 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -40,6 +40,7 @@ enum dso_binary_type {
DSO_BINARY_TYPE__GUEST_KCORE,
DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO,
DSO_BINARY_TYPE__BPF_PROG_INFO,
+ DSO_BINARY_TYPE__BPF_IMAGE,
DSO_BINARY_TYPE__NOT_FOUND,
};
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index 7632075a8792..1ab2682d5d2b 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -48,6 +48,7 @@ struct perf_env {
char *cpuid;
unsigned long long total_mem;
unsigned int msr_pmu_type;
+ unsigned int max_branches;
int nr_cmdline;
int nr_sibling_cores;
@@ -57,12 +58,14 @@ struct perf_env {
int nr_memory_nodes;
int nr_pmu_mappings;
int nr_groups;
+ int nr_cpu_pmu_caps;
char *cmdline;
const char **cmdline_argv;
char *sibling_cores;
char *sibling_dies;
char *sibling_threads;
char *pmu_mappings;
+ char *cpu_pmu_caps;
struct cpu_topology_map *cpu;
struct cpu_cache_level *caches;
int caches_cnt;
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 1548237b6558..82d9f9bb8975 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1131,8 +1131,10 @@ bool perf_evlist__valid_read_format(struct evlist *evlist)
u64 sample_type = first->core.attr.sample_type;
evlist__for_each_entry(evlist, pos) {
- if (read_format != pos->core.attr.read_format)
- return false;
+ if (read_format != pos->core.attr.read_format) {
+ pr_debug("Read format differs %#" PRIx64 " vs %#" PRIx64 "\n",
+ read_format, (u64)pos->core.attr.read_format);
+ }
}
/* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index eb880efbce16..6a571d322bb2 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1002,25 +1002,6 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts,
}
}
- /*
- * Disable sampling for all group members other
- * than leader in case leader 'leads' the sampling.
- */
- if ((leader != evsel) && leader->sample_read) {
- attr->freq = 0;
- attr->sample_freq = 0;
- attr->sample_period = 0;
- attr->write_backward = 0;
-
- /*
- * We don't get sample for slave events, we make them
- * when delivering group leader sample. Set the slave
- * event to follow the master sample_type to ease up
- * report.
- */
- attr->sample_type = leader->core.attr.sample_type;
- }
-
if (opts->no_samples)
attr->sample_freq = 0;
@@ -2136,7 +2117,7 @@ int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event,
}
}
- if (evsel__has_callchain(evsel)) {
+ if (type & PERF_SAMPLE_CALLCHAIN) {
const u64 max_callchain_nr = UINT64_MAX / sizeof(u64);
OVERFLOW_CHECK_u64(array);
@@ -2446,6 +2427,10 @@ bool perf_evsel__fallback(struct evsel *evsel, int err,
char *new_name;
const char *sep = ":";
+ /* If event has exclude user then don't exclude kernel. */
+ if (evsel->core.attr.exclude_user)
+ return false;
+
/* Is there already the separator in the name. */
if (strchr(name, '/') ||
strchr(name, ':'))
@@ -2523,14 +2508,14 @@ int perf_evsel__open_strerror(struct evsel *evsel, struct target *target,
"You may not have permission to collect %sstats.\n\n"
"Consider tweaking /proc/sys/kernel/perf_event_paranoid,\n"
"which controls use of the performance events system by\n"
- "unprivileged users (without CAP_SYS_ADMIN).\n\n"
+ "unprivileged users (without CAP_PERFMON or CAP_SYS_ADMIN).\n\n"
"The current value is %d:\n\n"
" -1: Allow use of (almost) all events by all users\n"
" Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK\n"
- ">= 0: Disallow ftrace function tracepoint by users without CAP_SYS_ADMIN\n"
- " Disallow raw tracepoint access by users without CAP_SYS_ADMIN\n"
- ">= 1: Disallow CPU event access by users without CAP_SYS_ADMIN\n"
- ">= 2: Disallow kernel profiling by users without CAP_SYS_ADMIN\n\n"
+ ">= 0: Disallow ftrace function tracepoint by users without CAP_PERFMON or CAP_SYS_ADMIN\n"
+ " Disallow raw tracepoint access by users without CAP_SYS_PERFMON or CAP_SYS_ADMIN\n"
+ ">= 1: Disallow CPU event access by users without CAP_PERFMON or CAP_SYS_ADMIN\n"
+ ">= 2: Disallow kernel profiling by users without CAP_PERFMON or CAP_SYS_ADMIN\n\n"
"To make this setting permanent, edit /etc/sysctl.conf too, e.g.:\n\n"
" kernel.perf_event_paranoid = -1\n" ,
target->system_wide ? "system-wide " : "",
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 53187c501ee8..a463bc65b001 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -104,6 +104,14 @@ struct evsel {
perf_evsel__sb_cb_t *cb;
void *data;
} side_band;
+ /*
+ * For reporting purposes, an evsel sample can have a callchain
+ * synthesized from AUX area data. Keep track of synthesized sample
+ * types here. Note, the recorded sample_type cannot be changed because
+ * it is needed to continue to parse events.
+ * See also evsel__has_callchain().
+ */
+ __u64 synth_sample_type;
};
struct perf_missing_features {
@@ -150,6 +158,9 @@ int perf_evsel__object_config(size_t object_size,
int (*init)(struct evsel *evsel),
void (*fini)(struct evsel *evsel));
+struct perf_pmu *perf_evsel__find_pmu(struct evsel *evsel);
+bool perf_evsel__is_aux_event(struct evsel *evsel);
+
struct evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx);
static inline struct evsel *evsel__new(struct perf_event_attr *attr)
@@ -398,7 +409,12 @@ static inline bool perf_evsel__has_branch_hw_idx(const struct evsel *evsel)
static inline bool evsel__has_callchain(const struct evsel *evsel)
{
- return (evsel->core.attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0;
+ /*
+ * For reporting purposes, an evsel sample can have a recorded callchain
+ * or a callchain synthesized from AUX area data.
+ */
+ return evsel->core.attr.sample_type & PERF_SAMPLE_CALLCHAIN ||
+ evsel->synth_sample_type & PERF_SAMPLE_CALLCHAIN;
}
struct perf_env *perf_evsel__env(struct evsel *evsel);
diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c
index fd192ddf93c1..c3382d58cf40 100644
--- a/tools/perf/util/expr.c
+++ b/tools/perf/util/expr.c
@@ -3,7 +3,6 @@
#include <assert.h>
#include "expr.h"
#include "expr-bison.h"
-#define YY_EXTRA_TYPE int
#include "expr-flex.h"
#ifdef PARSER_DEBUG
@@ -11,7 +10,7 @@ extern int expr_debug;
#endif
/* Caller must make sure id is allocated */
-void expr__add_id(struct parse_ctx *ctx, const char *name, double val)
+void expr__add_id(struct expr_parse_ctx *ctx, const char *name, double val)
{
int idx;
@@ -21,20 +20,23 @@ void expr__add_id(struct parse_ctx *ctx, const char *name, double val)
ctx->ids[idx].val = val;
}
-void expr__ctx_init(struct parse_ctx *ctx)
+void expr__ctx_init(struct expr_parse_ctx *ctx)
{
ctx->num_ids = 0;
}
static int
-__expr__parse(double *val, struct parse_ctx *ctx, const char *expr,
+__expr__parse(double *val, struct expr_parse_ctx *ctx, const char *expr,
int start)
{
+ struct expr_scanner_ctx scanner_ctx = {
+ .start_token = start,
+ };
YY_BUFFER_STATE buffer;
void *scanner;
int ret;
- ret = expr_lex_init_extra(start, &scanner);
+ ret = expr_lex_init_extra(&scanner_ctx, &scanner);
if (ret)
return ret;
@@ -52,7 +54,7 @@ __expr__parse(double *val, struct parse_ctx *ctx, const char *expr,
return ret;
}
-int expr__parse(double *final_val, struct parse_ctx *ctx, const char *expr)
+int expr__parse(double *final_val, struct expr_parse_ctx *ctx, const char *expr)
{
return __expr__parse(final_val, ctx, expr, EXPR_PARSE) ? -1 : 0;
}
@@ -75,7 +77,7 @@ int expr__find_other(const char *expr, const char *one, const char ***other,
int *num_other)
{
int err, i = 0, j = 0;
- struct parse_ctx ctx;
+ struct expr_parse_ctx ctx;
expr__ctx_init(&ctx);
err = __expr__parse(NULL, &ctx, expr, EXPR_OTHER);
diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h
index 9377538f4097..0938ad166ece 100644
--- a/tools/perf/util/expr.h
+++ b/tools/perf/util/expr.h
@@ -5,19 +5,23 @@
#define EXPR_MAX_OTHER 20
#define MAX_PARSE_ID EXPR_MAX_OTHER
-struct parse_id {
+struct expr_parse_id {
const char *name;
double val;
};
-struct parse_ctx {
+struct expr_parse_ctx {
int num_ids;
- struct parse_id ids[MAX_PARSE_ID];
+ struct expr_parse_id ids[MAX_PARSE_ID];
};
-void expr__ctx_init(struct parse_ctx *ctx);
-void expr__add_id(struct parse_ctx *ctx, const char *id, double val);
-int expr__parse(double *final_val, struct parse_ctx *ctx, const char *expr);
+struct expr_scanner_ctx {
+ int start_token;
+};
+
+void expr__ctx_init(struct expr_parse_ctx *ctx);
+void expr__add_id(struct expr_parse_ctx *ctx, const char *id, double val);
+int expr__parse(double *final_val, struct expr_parse_ctx *ctx, const char *expr);
int expr__find_other(const char *expr, const char *one, const char ***other,
int *num_other);
diff --git a/tools/perf/util/expr.l b/tools/perf/util/expr.l
index eaad29243c23..2582c2464938 100644
--- a/tools/perf/util/expr.l
+++ b/tools/perf/util/expr.l
@@ -76,13 +76,13 @@ sym [0-9a-zA-Z_\.:@]+
symbol {spec}*{sym}*{spec}*{sym}*
%%
- {
- int start_token;
+ struct expr_scanner_ctx *sctx = expr_get_extra(yyscanner);
- start_token = expr_get_extra(yyscanner);
+ {
+ int start_token = sctx->start_token;
- if (start_token) {
- expr_set_extra(NULL, yyscanner);
+ if (sctx->start_token) {
+ sctx->start_token = 0;
return start_token;
}
}
diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y
index 4720cbe79357..cd17486c1c5d 100644
--- a/tools/perf/util/expr.y
+++ b/tools/perf/util/expr.y
@@ -15,7 +15,7 @@
%define api.pure full
%parse-param { double *final_val }
-%parse-param { struct parse_ctx *ctx }
+%parse-param { struct expr_parse_ctx *ctx }
%parse-param {void *scanner}
%lex-param {void* scanner}
@@ -39,14 +39,14 @@
%{
static void expr_error(double *final_val __maybe_unused,
- struct parse_ctx *ctx __maybe_unused,
+ struct expr_parse_ctx *ctx __maybe_unused,
void *scanner,
const char *s)
{
pr_debug("%s\n", s);
}
-static int lookup_id(struct parse_ctx *ctx, char *id, double *val)
+static int lookup_id(struct expr_parse_ctx *ctx, char *id, double *val)
{
int i;
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index acbd046bf95c..28e82da04b7a 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1395,6 +1395,38 @@ static int write_compressed(struct feat_fd *ff __maybe_unused,
return do_write(ff, &(ff->ph->env.comp_mmap_len), sizeof(ff->ph->env.comp_mmap_len));
}
+static int write_cpu_pmu_caps(struct feat_fd *ff,
+ struct evlist *evlist __maybe_unused)
+{
+ struct perf_pmu *cpu_pmu = perf_pmu__find("cpu");
+ struct perf_pmu_caps *caps = NULL;
+ int nr_caps;
+ int ret;
+
+ if (!cpu_pmu)
+ return -ENOENT;
+
+ nr_caps = perf_pmu__caps_parse(cpu_pmu);
+ if (nr_caps < 0)
+ return nr_caps;
+
+ ret = do_write(ff, &nr_caps, sizeof(nr_caps));
+ if (ret < 0)
+ return ret;
+
+ list_for_each_entry(caps, &cpu_pmu->caps, list) {
+ ret = do_write_string(ff, caps->name);
+ if (ret < 0)
+ return ret;
+
+ ret = do_write_string(ff, caps->value);
+ if (ret < 0)
+ return ret;
+ }
+
+ return ret;
+}
+
static void print_hostname(struct feat_fd *ff, FILE *fp)
{
fprintf(fp, "# hostname : %s\n", ff->ph->env.hostname);
@@ -1809,6 +1841,27 @@ static void print_compressed(struct feat_fd *ff, FILE *fp)
ff->ph->env.comp_level, ff->ph->env.comp_ratio);
}
+static void print_cpu_pmu_caps(struct feat_fd *ff, FILE *fp)
+{
+ const char *delimiter = "# cpu pmu capabilities: ";
+ u32 nr_caps = ff->ph->env.nr_cpu_pmu_caps;
+ char *str;
+
+ if (!nr_caps) {
+ fprintf(fp, "# cpu pmu capabilities: not available\n");
+ return;
+ }
+
+ str = ff->ph->env.cpu_pmu_caps;
+ while (nr_caps--) {
+ fprintf(fp, "%s%s", delimiter, str);
+ delimiter = ", ";
+ str += strlen(str) + 1;
+ }
+
+ fprintf(fp, "\n");
+}
+
static void print_pmu_mappings(struct feat_fd *ff, FILE *fp)
{
const char *delimiter = "# pmu mappings: ";
@@ -2846,6 +2899,60 @@ static int process_compressed(struct feat_fd *ff,
return 0;
}
+static int process_cpu_pmu_caps(struct feat_fd *ff,
+ void *data __maybe_unused)
+{
+ char *name, *value;
+ struct strbuf sb;
+ u32 nr_caps;
+
+ if (do_read_u32(ff, &nr_caps))
+ return -1;
+
+ if (!nr_caps) {
+ pr_debug("cpu pmu capabilities not available\n");
+ return 0;
+ }
+
+ ff->ph->env.nr_cpu_pmu_caps = nr_caps;
+
+ if (strbuf_init(&sb, 128) < 0)
+ return -1;
+
+ while (nr_caps--) {
+ name = do_read_string(ff);
+ if (!name)
+ goto error;
+
+ value = do_read_string(ff);
+ if (!value)
+ goto free_name;
+
+ if (strbuf_addf(&sb, "%s=%s", name, value) < 0)
+ goto free_value;
+
+ /* include a NULL character at the end */
+ if (strbuf_add(&sb, "", 1) < 0)
+ goto free_value;
+
+ if (!strcmp(name, "branches"))
+ ff->ph->env.max_branches = atoi(value);
+
+ free(value);
+ free(name);
+ }
+ ff->ph->env.cpu_pmu_caps = strbuf_detach(&sb, NULL);
+ return 0;
+
+free_value:
+ free(value);
+free_name:
+ free(name);
+error:
+ strbuf_release(&sb);
+ return -1;
+}
+
#define FEAT_OPR(n, func, __full_only) \
[HEADER_##n] = { \
.name = __stringify(n), \
@@ -2903,6 +3010,7 @@ const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = {
FEAT_OPR(BPF_PROG_INFO, bpf_prog_info, false),
FEAT_OPR(BPF_BTF, bpf_btf, false),
FEAT_OPR(COMPRESSED, compressed, false),
+ FEAT_OPR(CPU_PMU_CAPS, cpu_pmu_caps, false),
};
struct header_print_data {
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 840f95cee349..650bd1c7a99b 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -43,6 +43,7 @@ enum {
HEADER_BPF_PROG_INFO,
HEADER_BPF_BTF,
HEADER_COMPRESSED,
+ HEADER_CPU_PMU_CAPS,
HEADER_LAST_FEATURE,
HEADER_FEAT_BITS = 256,
};
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 283a69ff6a3d..c2550dbe7dc3 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -1070,6 +1070,20 @@ iter_next_cumulative_entry(struct hist_entry_iter *iter,
return fill_callchain_info(al, node, iter->hide_unresolved);
}
+static bool
+hist_entry__fast__sym_diff(struct hist_entry *left,
+ struct hist_entry *right)
+{
+ struct symbol *sym_l = left->ms.sym;
+ struct symbol *sym_r = right->ms.sym;
+
+ if (!sym_l && !sym_r)
+ return left->ip != right->ip;
+
+ return !!_sort__sym_cmp(sym_l, sym_r);
+}
+
+
static int
iter_add_next_cumulative_entry(struct hist_entry_iter *iter,
struct addr_location *al)
@@ -1096,6 +1110,7 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter,
};
int i;
struct callchain_cursor cursor;
+ bool fast = hists__has(he_tmp.hists, sym);
callchain_cursor_snapshot(&cursor, &callchain_cursor);
@@ -1106,6 +1121,14 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter,
* It's possible that it has cycles or recursive calls.
*/
for (i = 0; i < iter->curr; i++) {
+ /*
+ * For most cases, there are no duplicate entries in callchain.
+ * The symbols are usually different. Do a quick check for
+ * symbols first.
+ */
+ if (fast && hist_entry__fast__sym_diff(he_cache[i], &he_tmp))
+ continue;
+
if (hist_entry__cmp(he_cache[i], &he_tmp) == 0) {
/* to avoid calling callback function */
iter->he = NULL;
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
index 34cb380d19a3..059e1c805ed0 100644
--- a/tools/perf/util/intel-bts.c
+++ b/tools/perf/util/intel-bts.c
@@ -728,6 +728,15 @@ static void intel_bts_free(struct perf_session *session)
free(bts);
}
+static bool intel_bts_evsel_is_auxtrace(struct perf_session *session,
+ struct evsel *evsel)
+{
+ struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
+ auxtrace);
+
+ return evsel->core.attr.type == bts->pmu_type;
+}
+
struct intel_bts_synth {
struct perf_tool dummy_tool;
struct perf_session *session;
@@ -883,6 +892,7 @@ int intel_bts_process_auxtrace_info(union perf_event *event,
bts->auxtrace.flush_events = intel_bts_flush;
bts->auxtrace.free_events = intel_bts_free_events;
bts->auxtrace.free = intel_bts_free;
+ bts->auxtrace.evsel_is_auxtrace = intel_bts_evsel_is_auxtrace;
session->auxtrace = &bts->auxtrace;
intel_bts_print_info(&auxtrace_info->priv[0], INTEL_BTS_PMU_TYPE,
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 23c8289c2472..4be7634dccf5 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -124,6 +124,8 @@ struct intel_pt {
struct range *time_ranges;
unsigned int range_cnt;
+
+ struct ip_callchain *chain;
};
enum switch_state {
@@ -868,6 +870,45 @@ static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns)
pt->tc.time_mult;
}
+static struct ip_callchain *intel_pt_alloc_chain(struct intel_pt *pt)
+{
+ size_t sz = sizeof(struct ip_callchain);
+
+ /* Add 1 to callchain_sz for callchain context */
+ sz += (pt->synth_opts.callchain_sz + 1) * sizeof(u64);
+ return zalloc(sz);
+}
+
+static int intel_pt_callchain_init(struct intel_pt *pt)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(pt->session->evlist, evsel) {
+ if (!(evsel->core.attr.sample_type & PERF_SAMPLE_CALLCHAIN))
+ evsel->synth_sample_type |= PERF_SAMPLE_CALLCHAIN;
+ }
+
+ pt->chain = intel_pt_alloc_chain(pt);
+ if (!pt->chain)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void intel_pt_add_callchain(struct intel_pt *pt,
+ struct perf_sample *sample)
+{
+ struct thread *thread = machine__findnew_thread(pt->machine,
+ sample->pid,
+ sample->tid);
+
+ thread_stack__sample_late(thread, sample->cpu, pt->chain,
+ pt->synth_opts.callchain_sz + 1, sample->ip,
+ pt->kernel_start);
+
+ sample->callchain = pt->chain;
+}
+
static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
unsigned int queue_nr)
{
@@ -880,11 +921,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
return NULL;
if (pt->synth_opts.callchain) {
- size_t sz = sizeof(struct ip_callchain);
-
- /* Add 1 to callchain_sz for callchain context */
- sz += (pt->synth_opts.callchain_sz + 1) * sizeof(u64);
- ptq->chain = zalloc(sz);
+ ptq->chain = intel_pt_alloc_chain(pt);
if (!ptq->chain)
goto out_free;
}
@@ -1680,15 +1717,14 @@ static u64 intel_pt_lbr_flags(u64 info)
union {
struct branch_flags flags;
u64 result;
- } u = {
- .flags = {
- .mispred = !!(info & LBR_INFO_MISPRED),
- .predicted = !(info & LBR_INFO_MISPRED),
- .in_tx = !!(info & LBR_INFO_IN_TX),
- .abort = !!(info & LBR_INFO_ABORT),
- .cycles = info & LBR_INFO_CYCLES,
- }
- };
+ } u;
+
+ u.result = 0;
+ u.flags.mispred = !!(info & LBR_INFO_MISPRED);
+ u.flags.predicted = !(info & LBR_INFO_MISPRED);
+ u.flags.in_tx = !!(info & LBR_INFO_IN_TX);
+ u.flags.abort = !!(info & LBR_INFO_ABORT);
+ u.flags.cycles = info & LBR_INFO_CYCLES;
return u.result;
}
@@ -1992,7 +2028,8 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
if (!(state->type & INTEL_PT_BRANCH))
return 0;
- if (pt->synth_opts.callchain || pt->synth_opts.thread_stack)
+ if (pt->synth_opts.callchain || pt->synth_opts.add_callchain ||
+ pt->synth_opts.thread_stack)
thread_stack__event(ptq->thread, ptq->cpu, ptq->flags, state->from_ip,
state->to_ip, ptq->insn_len,
state->trace_nr);
@@ -2639,6 +2676,11 @@ static int intel_pt_process_event(struct perf_session *session,
if (err)
return err;
+ if (event->header.type == PERF_RECORD_SAMPLE) {
+ if (pt->synth_opts.add_callchain && !sample->callchain)
+ intel_pt_add_callchain(pt, sample);
+ }
+
if (event->header.type == PERF_RECORD_AUX &&
(event->aux.flags & PERF_AUX_FLAG_TRUNCATED) &&
pt->synth_opts.errors) {
@@ -2710,11 +2752,21 @@ static void intel_pt_free(struct perf_session *session)
session->auxtrace = NULL;
thread__put(pt->unknown_thread);
addr_filters__exit(&pt->filts);
+ zfree(&pt->chain);
zfree(&pt->filter);
zfree(&pt->time_ranges);
free(pt);
}
+static bool intel_pt_evsel_is_auxtrace(struct perf_session *session,
+ struct evsel *evsel)
+{
+ struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
+ auxtrace);
+
+ return evsel->core.attr.type == pt->pmu_type;
+}
+
static int intel_pt_process_auxtrace_event(struct perf_session *session,
union perf_event *event,
struct perf_tool *tool __maybe_unused)
@@ -3310,6 +3362,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
pt->auxtrace.flush_events = intel_pt_flush;
pt->auxtrace.free_events = intel_pt_free_events;
pt->auxtrace.free = intel_pt_free;
+ pt->auxtrace.evsel_is_auxtrace = intel_pt_evsel_is_auxtrace;
session->auxtrace = &pt->auxtrace;
if (dump_trace)
@@ -3338,6 +3391,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
!session->itrace_synth_opts->inject) {
pt->synth_opts.branches = false;
pt->synth_opts.callchain = true;
+ pt->synth_opts.add_callchain = true;
}
pt->synth_opts.thread_stack =
session->itrace_synth_opts->thread_stack;
@@ -3370,14 +3424,22 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
pt->branches_filter |= PERF_IP_FLAG_RETURN |
PERF_IP_FLAG_TRACE_BEGIN;
- if (pt->synth_opts.callchain && !symbol_conf.use_callchain) {
+ if ((pt->synth_opts.callchain || pt->synth_opts.add_callchain) &&
+ !symbol_conf.use_callchain) {
symbol_conf.use_callchain = true;
if (callchain_register_param(&callchain_param) < 0) {
symbol_conf.use_callchain = false;
pt->synth_opts.callchain = false;
+ pt->synth_opts.add_callchain = false;
}
}
+ if (pt->synth_opts.add_callchain) {
+ err = intel_pt_callchain_init(pt);
+ if (err)
+ goto err_delete_thread;
+ }
+
err = intel_pt_synth_events(pt, session);
if (err)
goto err_delete_thread;
@@ -3400,6 +3462,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
return 0;
err_delete_thread:
+ zfree(&pt->chain);
thread__zput(pt->unknown_thread);
err_free_queues:
intel_pt_log_disable();
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 97142e9671be..5ac32cabe4e6 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -736,6 +736,12 @@ int machine__process_switch_event(struct machine *machine __maybe_unused,
return 0;
}
+static int is_bpf_image(const char *name)
+{
+ return strncmp(name, "bpf_trampoline_", sizeof("bpf_trampoline_") - 1) ||
+ strncmp(name, "bpf_dispatcher_", sizeof("bpf_dispatcher_") - 1);
+}
+
static int machine__process_ksymbol_register(struct machine *machine,
union perf_event *event,
struct perf_sample *sample __maybe_unused)
@@ -759,6 +765,12 @@ static int machine__process_ksymbol_register(struct machine *machine,
map->start = event->ksymbol.addr;
map->end = map->start + event->ksymbol.len;
maps__insert(&machine->kmaps, map);
+ dso__set_loaded(dso);
+
+ if (is_bpf_image(event->ksymbol.name)) {
+ dso->binary_type = DSO_BINARY_TYPE__BPF_IMAGE;
+ dso__set_long_name(dso, "", false);
+ }
}
sym = symbol__new(map->map_ip(map, map->start),
@@ -2178,6 +2190,303 @@ static int remove_loops(struct branch_entry *l, int nr,
return nr;
}
+static int lbr_callchain_add_kernel_ip(struct thread *thread,
+ struct callchain_cursor *cursor,
+ struct perf_sample *sample,
+ struct symbol **parent,
+ struct addr_location *root_al,
+ u64 branch_from,
+ bool callee, int end)
+{
+ struct ip_callchain *chain = sample->callchain;
+ u8 cpumode = PERF_RECORD_MISC_USER;
+ int err, i;
+
+ if (callee) {
+ for (i = 0; i < end + 1; i++) {
+ err = add_callchain_ip(thread, cursor, parent,
+ root_al, &cpumode, chain->ips[i],
+ false, NULL, NULL, branch_from);
+ if (err)
+ return err;
+ }
+ return 0;
+ }
+
+ for (i = end; i >= 0; i--) {
+ err = add_callchain_ip(thread, cursor, parent,
+ root_al, &cpumode, chain->ips[i],
+ false, NULL, NULL, branch_from);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static void save_lbr_cursor_node(struct thread *thread,
+ struct callchain_cursor *cursor,
+ int idx)
+{
+ struct lbr_stitch *lbr_stitch = thread->lbr_stitch;
+
+ if (!lbr_stitch)
+ return;
+
+ if (cursor->pos == cursor->nr) {
+ lbr_stitch->prev_lbr_cursor[idx].valid = false;
+ return;
+ }
+
+ if (!cursor->curr)
+ cursor->curr = cursor->first;
+ else
+ cursor->curr = cursor->curr->next;
+ memcpy(&lbr_stitch->prev_lbr_cursor[idx], cursor->curr,
+ sizeof(struct callchain_cursor_node));
+
+ lbr_stitch->prev_lbr_cursor[idx].valid = true;
+ cursor->pos++;
+}
+
+static int lbr_callchain_add_lbr_ip(struct thread *thread,
+ struct callchain_cursor *cursor,
+ struct perf_sample *sample,
+ struct symbol **parent,
+ struct addr_location *root_al,
+ u64 *branch_from,
+ bool callee)
+{
+ struct branch_stack *lbr_stack = sample->branch_stack;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
+ u8 cpumode = PERF_RECORD_MISC_USER;
+ int lbr_nr = lbr_stack->nr;
+ struct branch_flags *flags;
+ int err, i;
+ u64 ip;
+
+ /*
+ * The curr and pos are not used in writing session. They are cleared
+ * in callchain_cursor_commit() when the writing session is closed.
+ * Using curr and pos to track the current cursor node.
+ */
+ if (thread->lbr_stitch) {
+ cursor->curr = NULL;
+ cursor->pos = cursor->nr;
+ if (cursor->nr) {
+ cursor->curr = cursor->first;
+ for (i = 0; i < (int)(cursor->nr - 1); i++)
+ cursor->curr = cursor->curr->next;
+ }
+ }
+
+ if (callee) {
+ /* Add LBR ip from first entries.to */
+ ip = entries[0].to;
+ flags = &entries[0].flags;
+ *branch_from = entries[0].from;
+ err = add_callchain_ip(thread, cursor, parent,
+ root_al, &cpumode, ip,
+ true, flags, NULL,
+ *branch_from);
+ if (err)
+ return err;
+
+ /*
+ * The number of cursor node increases.
+ * Move the current cursor node.
+ * But does not need to save current cursor node for entry 0.
+ * It's impossible to stitch the whole LBRs of previous sample.
+ */
+ if (thread->lbr_stitch && (cursor->pos != cursor->nr)) {
+ if (!cursor->curr)
+ cursor->curr = cursor->first;
+ else
+ cursor->curr = cursor->curr->next;
+ cursor->pos++;
+ }
+
+ /* Add LBR ip from entries.from one by one. */
+ for (i = 0; i < lbr_nr; i++) {
+ ip = entries[i].from;
+ flags = &entries[i].flags;
+ err = add_callchain_ip(thread, cursor, parent,
+ root_al, &cpumode, ip,
+ true, flags, NULL,
+ *branch_from);
+ if (err)
+ return err;
+ save_lbr_cursor_node(thread, cursor, i);
+ }
+ return 0;
+ }
+
+ /* Add LBR ip from entries.from one by one. */
+ for (i = lbr_nr - 1; i >= 0; i--) {
+ ip = entries[i].from;
+ flags = &entries[i].flags;
+ err = add_callchain_ip(thread, cursor, parent,
+ root_al, &cpumode, ip,
+ true, flags, NULL,
+ *branch_from);
+ if (err)
+ return err;
+ save_lbr_cursor_node(thread, cursor, i);
+ }
+
+ /* Add LBR ip from first entries.to */
+ ip = entries[0].to;
+ flags = &entries[0].flags;
+ *branch_from = entries[0].from;
+ err = add_callchain_ip(thread, cursor, parent,
+ root_al, &cpumode, ip,
+ true, flags, NULL,
+ *branch_from);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int lbr_callchain_add_stitched_lbr_ip(struct thread *thread,
+ struct callchain_cursor *cursor)
+{
+ struct lbr_stitch *lbr_stitch = thread->lbr_stitch;
+ struct callchain_cursor_node *cnode;
+ struct stitch_list *stitch_node;
+ int err;
+
+ list_for_each_entry(stitch_node, &lbr_stitch->lists, node) {
+ cnode = &stitch_node->cursor;
+
+ err = callchain_cursor_append(cursor, cnode->ip,
+ &cnode->ms,
+ cnode->branch,
+ &cnode->branch_flags,
+ cnode->nr_loop_iter,
+ cnode->iter_cycles,
+ cnode->branch_from,
+ cnode->srcline);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static struct stitch_list *get_stitch_node(struct thread *thread)
+{
+ struct lbr_stitch *lbr_stitch = thread->lbr_stitch;
+ struct stitch_list *stitch_node;
+
+ if (!list_empty(&lbr_stitch->free_lists)) {
+ stitch_node = list_first_entry(&lbr_stitch->free_lists,
+ struct stitch_list, node);
+ list_del(&stitch_node->node);
+
+ return stitch_node;
+ }
+
+ return malloc(sizeof(struct stitch_list));
+}
+
+static bool has_stitched_lbr(struct thread *thread,
+ struct perf_sample *cur,
+ struct perf_sample *prev,
+ unsigned int max_lbr,
+ bool callee)
+{
+ struct branch_stack *cur_stack = cur->branch_stack;
+ struct branch_entry *cur_entries = perf_sample__branch_entries(cur);
+ struct branch_stack *prev_stack = prev->branch_stack;
+ struct branch_entry *prev_entries = perf_sample__branch_entries(prev);
+ struct lbr_stitch *lbr_stitch = thread->lbr_stitch;
+ int i, j, nr_identical_branches = 0;
+ struct stitch_list *stitch_node;
+ u64 cur_base, distance;
+
+ if (!cur_stack || !prev_stack)
+ return false;
+
+ /* Find the physical index of the base-of-stack for current sample. */
+ cur_base = max_lbr - cur_stack->nr + cur_stack->hw_idx + 1;
+
+ distance = (prev_stack->hw_idx > cur_base) ? (prev_stack->hw_idx - cur_base) :
+ (max_lbr + prev_stack->hw_idx - cur_base);
+ /* Previous sample has shorter stack. Nothing can be stitched. */
+ if (distance + 1 > prev_stack->nr)
+ return false;
+
+ /*
+ * Check if there are identical LBRs between two samples.
+ * Identicall LBRs must have same from, to and flags values. Also,
+ * they have to be saved in the same LBR registers (same physical
+ * index).
+ *
+ * Starts from the base-of-stack of current sample.
+ */
+ for (i = distance, j = cur_stack->nr - 1; (i >= 0) && (j >= 0); i--, j--) {
+ if ((prev_entries[i].from != cur_entries[j].from) ||
+ (prev_entries[i].to != cur_entries[j].to) ||
+ (prev_entries[i].flags.value != cur_entries[j].flags.value))
+ break;
+ nr_identical_branches++;
+ }
+
+ if (!nr_identical_branches)
+ return false;
+
+ /*
+ * Save the LBRs between the base-of-stack of previous sample
+ * and the base-of-stack of current sample into lbr_stitch->lists.
+ * These LBRs will be stitched later.
+ */
+ for (i = prev_stack->nr - 1; i > (int)distance; i--) {
+
+ if (!lbr_stitch->prev_lbr_cursor[i].valid)
+ continue;
+
+ stitch_node = get_stitch_node(thread);
+ if (!stitch_node)
+ return false;
+
+ memcpy(&stitch_node->cursor, &lbr_stitch->prev_lbr_cursor[i],
+ sizeof(struct callchain_cursor_node));
+
+ if (callee)
+ list_add(&stitch_node->node, &lbr_stitch->lists);
+ else
+ list_add_tail(&stitch_node->node, &lbr_stitch->lists);
+ }
+
+ return true;
+}
+
+static bool alloc_lbr_stitch(struct thread *thread, unsigned int max_lbr)
+{
+ if (thread->lbr_stitch)
+ return true;
+
+ thread->lbr_stitch = zalloc(sizeof(*thread->lbr_stitch));
+ if (!thread->lbr_stitch)
+ goto err;
+
+ thread->lbr_stitch->prev_lbr_cursor = calloc(max_lbr + 1, sizeof(struct callchain_cursor_node));
+ if (!thread->lbr_stitch->prev_lbr_cursor)
+ goto free_lbr_stitch;
+
+ INIT_LIST_HEAD(&thread->lbr_stitch->lists);
+ INIT_LIST_HEAD(&thread->lbr_stitch->free_lists);
+
+ return true;
+
+free_lbr_stitch:
+ zfree(&thread->lbr_stitch);
+err:
+ pr_warning("Failed to allocate space for stitched LBRs. Disable LBR stitch\n");
+ thread->lbr_stitch_enable = false;
+ return false;
+}
+
/*
* Recolve LBR callstack chain sample
* Return:
@@ -2190,12 +2499,16 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
struct perf_sample *sample,
struct symbol **parent,
struct addr_location *root_al,
- int max_stack)
+ int max_stack,
+ unsigned int max_lbr)
{
+ bool callee = (callchain_param.order == ORDER_CALLEE);
struct ip_callchain *chain = sample->callchain;
int chain_nr = min(max_stack, (int)chain->nr), i;
- u8 cpumode = PERF_RECORD_MISC_USER;
- u64 ip, branch_from = 0;
+ struct lbr_stitch *lbr_stitch;
+ bool stitched_lbr = false;
+ u64 branch_from = 0;
+ int err;
for (i = 0; i < chain_nr; i++) {
if (chain->ips[i] == PERF_CONTEXT_USER)
@@ -2203,71 +2516,65 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
}
/* LBR only affects the user callchain */
- if (i != chain_nr) {
- struct branch_stack *lbr_stack = sample->branch_stack;
- struct branch_entry *entries = perf_sample__branch_entries(sample);
- int lbr_nr = lbr_stack->nr, j, k;
- bool branch;
- struct branch_flags *flags;
- /*
- * LBR callstack can only get user call chain.
- * The mix_chain_nr is kernel call chain
- * number plus LBR user call chain number.
- * i is kernel call chain number,
- * 1 is PERF_CONTEXT_USER,
- * lbr_nr + 1 is the user call chain number.
- * For details, please refer to the comments
- * in callchain__printf
- */
- int mix_chain_nr = i + 1 + lbr_nr + 1;
+ if (i == chain_nr)
+ return 0;
- for (j = 0; j < mix_chain_nr; j++) {
- int err;
- branch = false;
- flags = NULL;
+ if (thread->lbr_stitch_enable && !sample->no_hw_idx &&
+ (max_lbr > 0) && alloc_lbr_stitch(thread, max_lbr)) {
+ lbr_stitch = thread->lbr_stitch;
- if (callchain_param.order == ORDER_CALLEE) {
- if (j < i + 1)
- ip = chain->ips[j];
- else if (j > i + 1) {
- k = j - i - 2;
- ip = entries[k].from;
- branch = true;
- flags = &entries[k].flags;
- } else {
- ip = entries[0].to;
- branch = true;
- flags = &entries[0].flags;
- branch_from = entries[0].from;
- }
- } else {
- if (j < lbr_nr) {
- k = lbr_nr - j - 1;
- ip = entries[k].from;
- branch = true;
- flags = &entries[k].flags;
- }
- else if (j > lbr_nr)
- ip = chain->ips[i + 1 - (j - lbr_nr)];
- else {
- ip = entries[0].to;
- branch = true;
- flags = &entries[0].flags;
- branch_from = entries[0].from;
- }
- }
+ stitched_lbr = has_stitched_lbr(thread, sample,
+ &lbr_stitch->prev_sample,
+ max_lbr, callee);
- err = add_callchain_ip(thread, cursor, parent,
- root_al, &cpumode, ip,
- branch, flags, NULL,
- branch_from);
+ if (!stitched_lbr && !list_empty(&lbr_stitch->lists)) {
+ list_replace_init(&lbr_stitch->lists,
+ &lbr_stitch->free_lists);
+ }
+ memcpy(&lbr_stitch->prev_sample, sample, sizeof(*sample));
+ }
+
+ if (callee) {
+ /* Add kernel ip */
+ err = lbr_callchain_add_kernel_ip(thread, cursor, sample,
+ parent, root_al, branch_from,
+ true, i);
+ if (err)
+ goto error;
+
+ err = lbr_callchain_add_lbr_ip(thread, cursor, sample, parent,
+ root_al, &branch_from, true);
+ if (err)
+ goto error;
+
+ if (stitched_lbr) {
+ err = lbr_callchain_add_stitched_lbr_ip(thread, cursor);
if (err)
- return (err < 0) ? err : 0;
+ goto error;
}
- return 1;
+
+ } else {
+ if (stitched_lbr) {
+ err = lbr_callchain_add_stitched_lbr_ip(thread, cursor);
+ if (err)
+ goto error;
+ }
+ err = lbr_callchain_add_lbr_ip(thread, cursor, sample, parent,
+ root_al, &branch_from, false);
+ if (err)
+ goto error;
+
+ /* Add kernel ip */
+ err = lbr_callchain_add_kernel_ip(thread, cursor, sample,
+ parent, root_al, branch_from,
+ false, i);
+ if (err)
+ goto error;
}
+ return 1;
- return 0;
+error:
+ return (err < 0) ? err : 0;
}
static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread,
@@ -2312,8 +2619,11 @@ static int thread__resolve_callchain_sample(struct thread *thread,
chain_nr = chain->nr;
if (perf_evsel__has_branch_callstack(evsel)) {
+ struct perf_env *env = perf_evsel__env(evsel);
+
err = resolve_lbr_callchain_sample(thread, cursor, sample, parent,
- root_al, max_stack);
+ root_al, max_stack,
+ !env ? 0 : env->max_branches);
if (err)
return (err < 0) ? err : 0;
}
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 926449a7cdbf..7ad81c8177ea 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -485,6 +485,39 @@ static bool metricgroup__has_constraint(struct pmu_event *pe)
return false;
}
+static int __metricgroup__add_metric(struct strbuf *events,
+ struct list_head *group_list, struct pmu_event *pe)
+{
+
+ const char **ids;
+ int idnum;
+ struct egroup *eg;
+
+ if (expr__find_other(pe->metric_expr, NULL, &ids, &idnum) < 0)
+ return -EINVAL;
+
+ if (events->len > 0)
+ strbuf_addf(events, ",");
+
+ if (metricgroup__has_constraint(pe))
+ metricgroup__add_metric_non_group(events, ids, idnum);
+ else
+ metricgroup__add_metric_weak_group(events, ids, idnum);
+
+ eg = malloc(sizeof(*eg));
+ if (!eg)
+ return -ENOMEM;
+
+ eg->ids = ids;
+ eg->idnum = idnum;
+ eg->metric_name = pe->metric_name;
+ eg->metric_expr = pe->metric_expr;
+ eg->metric_unit = pe->unit;
+ list_add_tail(&eg->nd, group_list);
+
+ return 0;
+}
+
static int metricgroup__add_metric(const char *metric, struct strbuf *events,
struct list_head *group_list)
{
@@ -504,35 +537,12 @@ static int metricgroup__add_metric(const char *metric, struct strbuf *events,
continue;
if (match_metric(pe->metric_group, metric) ||
match_metric(pe->metric_name, metric)) {
- const char **ids;
- int idnum;
- struct egroup *eg;
pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name);
- if (expr__find_other(pe->metric_expr,
- NULL, &ids, &idnum) < 0)
- continue;
- if (events->len > 0)
- strbuf_addf(events, ",");
-
- if (metricgroup__has_constraint(pe))
- metricgroup__add_metric_non_group(events, ids, idnum);
- else
- metricgroup__add_metric_weak_group(events, ids, idnum);
-
- eg = malloc(sizeof(struct egroup));
- if (!eg) {
- ret = -ENOMEM;
+ ret = __metricgroup__add_metric(events, group_list, pe);
+ if (ret == -ENOMEM)
break;
- }
- eg->ids = ids;
- eg->idnum = idnum;
- eg->metric_name = pe->metric_name;
- eg->metric_expr = pe->metric_expr;
- eg->metric_unit = pe->unit;
- list_add_tail(&eg->nd, group_list);
- ret = 0;
}
}
return ret;
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index baa48f28d57d..c589fc42f058 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -286,6 +286,7 @@ no-overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOOVERWRITE); }
percore { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_PERCORE); }
aux-output { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT); }
aux-sample-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE); }
+r{num_raw_hex} { return raw(yyscanner); }
, { return ','; }
"/" { BEGIN(INITIAL); return '/'; }
{name_minus} { return str(yyscanner, PE_NAME); }
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index 94f8bcd83582..e879eb257874 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -706,6 +706,15 @@ event_term
}
event_term:
+PE_RAW
+{
+ struct parse_events_term *term;
+
+ ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_CONFIG,
+ NULL, $1, false, &@1, NULL));
+ $$ = term;
+}
+|
PE_NAME '=' PE_NAME
{
struct parse_events_term *term;
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index ef6a63f3d386..d9f89ed18dea 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -18,6 +18,7 @@
#include <regex.h>
#include <perf/cpumap.h>
#include "debug.h"
+#include "evsel.h"
#include "pmu.h"
#include "parse-events.h"
#include "header.h"
@@ -849,6 +850,7 @@ static struct perf_pmu *pmu_lookup(const char *name)
INIT_LIST_HEAD(&pmu->format);
INIT_LIST_HEAD(&pmu->aliases);
+ INIT_LIST_HEAD(&pmu->caps);
list_splice(&format, &pmu->format);
list_splice(&aliases, &pmu->aliases);
list_add_tail(&pmu->list, &pmus);
@@ -884,6 +886,25 @@ struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu)
return NULL;
}
+struct perf_pmu *perf_evsel__find_pmu(struct evsel *evsel)
+{
+ struct perf_pmu *pmu = NULL;
+
+ while ((pmu = perf_pmu__scan(pmu)) != NULL) {
+ if (pmu->type == evsel->core.attr.type)
+ break;
+ }
+
+ return pmu;
+}
+
+bool perf_evsel__is_aux_event(struct evsel *evsel)
+{
+ struct perf_pmu *pmu = perf_evsel__find_pmu(evsel);
+
+ return pmu && pmu->auxtrace;
+}
+
struct perf_pmu *perf_pmu__find(const char *name)
{
struct perf_pmu *pmu;
@@ -1574,3 +1595,84 @@ int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt,
va_end(args);
return ret;
}
+
+static int perf_pmu__new_caps(struct list_head *list, char *name, char *value)
+{
+ struct perf_pmu_caps *caps = zalloc(sizeof(*caps));
+
+ if (!caps)
+ return -ENOMEM;
+
+ caps->name = strdup(name);
+ if (!caps->name)
+ goto free_caps;
+ caps->value = strndup(value, strlen(value) - 1);
+ if (!caps->value)
+ goto free_name;
+ list_add_tail(&caps->list, list);
+ return 0;
+
+free_name:
+ zfree(caps->name);
+free_caps:
+ free(caps);
+
+ return -ENOMEM;
+}
+
+/*
+ * Reading/parsing the given pmu capabilities, which should be located at:
+ * /sys/bus/event_source/devices/<dev>/caps as sysfs group attributes.
+ * Return the number of capabilities
+ */
+int perf_pmu__caps_parse(struct perf_pmu *pmu)
+{
+ struct stat st;
+ char caps_path[PATH_MAX];
+ const char *sysfs = sysfs__mountpoint();
+ DIR *caps_dir;
+ struct dirent *evt_ent;
+ int nr_caps = 0;
+
+ if (!sysfs)
+ return -1;
+
+ snprintf(caps_path, PATH_MAX,
+ "%s" EVENT_SOURCE_DEVICE_PATH "%s/caps", sysfs, pmu->name);
+
+ if (stat(caps_path, &st) < 0)
+ return 0; /* no error if caps does not exist */
+
+ caps_dir = opendir(caps_path);
+ if (!caps_dir)
+ return -EINVAL;
+
+ while ((evt_ent = readdir(caps_dir)) != NULL) {
+ char path[PATH_MAX + NAME_MAX + 1];
+ char *name = evt_ent->d_name;
+ char value[128];
+ FILE *file;
+
+ if (!strcmp(name, ".") || !strcmp(name, ".."))
+ continue;
+
+ snprintf(path, sizeof(path), "%s/%s", caps_path, name);
+
+ file = fopen(path, "r");
+ if (!file)
+ continue;
+
+ if (!fgets(value, sizeof(value), file) ||
+ (perf_pmu__new_caps(&pmu->caps, name, value) < 0)) {
+ fclose(file);
+ continue;
+ }
+
+ nr_caps++;
+ fclose(file);
+ }
+
+ closedir(caps_dir);
+
+ return nr_caps;
+}
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 5fb3f16828df..1edd214b75a5 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -22,6 +22,12 @@ enum {
struct perf_event_attr;
+struct perf_pmu_caps {
+ char *name;
+ char *value;
+ struct list_head list;
+};
+
struct perf_pmu {
char *name;
__u32 type;
@@ -33,6 +39,7 @@ struct perf_pmu {
struct perf_cpu_map *cpus;
struct list_head format; /* HEAD struct perf_pmu_format -> list */
struct list_head aliases; /* HEAD struct perf_pmu_alias -> list */
+ struct list_head caps; /* HEAD struct perf_pmu_caps -> list */
struct list_head list; /* ELEM */
};
@@ -107,4 +114,6 @@ bool pmu_uncore_alias_match(const char *pmu_name, const char *name);
int perf_pmu__convert_scale(const char *scale, char **end, double *sval);
+int perf_pmu__caps_parse(struct perf_pmu *pmu);
+
#endif /* __PMU_H */
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
index 7def66168503..6d3e3df6e2a1 100644
--- a/tools/perf/util/record.c
+++ b/tools/perf/util/record.c
@@ -167,6 +167,64 @@ bool perf_can_aux_sample(void)
return true;
}
+/*
+ * perf_evsel__config_leader_sampling() uses special rules for leader sampling.
+ * However, if the leader is an AUX area event, then assume the event to sample
+ * is the next event.
+ */
+static struct evsel *perf_evsel__read_sampler(struct evsel *evsel,
+ struct evlist *evlist)
+{
+ struct evsel *leader = evsel->leader;
+
+ if (perf_evsel__is_aux_event(leader)) {
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->leader == leader && evsel != evsel->leader)
+ return evsel;
+ }
+ }
+
+ return leader;
+}
+
+static void perf_evsel__config_leader_sampling(struct evsel *evsel,
+ struct evlist *evlist)
+{
+ struct perf_event_attr *attr = &evsel->core.attr;
+ struct evsel *leader = evsel->leader;
+ struct evsel *read_sampler;
+
+ if (!leader->sample_read)
+ return;
+
+ read_sampler = perf_evsel__read_sampler(evsel, evlist);
+
+ if (evsel == read_sampler)
+ return;
+
+ /*
+ * Disable sampling for all group members other than the leader in
+ * case the leader 'leads' the sampling, except when the leader is an
+ * AUX area event, in which case the 2nd event in the group is the one
+ * that 'leads' the sampling.
+ */
+ attr->freq = 0;
+ attr->sample_freq = 0;
+ attr->sample_period = 0;
+ attr->write_backward = 0;
+
+ /*
+ * We don't get a sample for slave events, we make them when delivering
+ * the group leader sample. Set the slave event to follow the master
+ * sample_type to ease up reporting.
+ * An AUX area event also has sample_type requirements, so also include
+ * the sample type bits from the leader's sample_type to cover that
+ * case.
+ */
+ attr->sample_type = read_sampler->core.attr.sample_type |
+ leader->core.attr.sample_type;
+}
+
void perf_evlist__config(struct evlist *evlist, struct record_opts *opts,
struct callchain_param *callchain)
{
@@ -193,6 +251,10 @@ void perf_evlist__config(struct evlist *evlist, struct record_opts *opts,
evsel->core.attr.comm_exec = 1;
}
+ /* Configure leader sampling here now that the sample type is known */
+ evlist__for_each_entry(evlist, evsel)
+ perf_evsel__config_leader_sampling(evsel, evlist);
+
if (opts->full_auxtrace) {
/*
* Need to be able to synthesize and parse selected events with
diff --git a/tools/perf/util/s390-cpumcf-kernel.h b/tools/perf/util/s390-cpumcf-kernel.h
index d4356030b504..f55ca07f3ca1 100644
--- a/tools/perf/util/s390-cpumcf-kernel.h
+++ b/tools/perf/util/s390-cpumcf-kernel.h
@@ -11,6 +11,7 @@
#define S390_CPUMCF_DIAG_DEF 0xfeef /* Counter diagnostic entry ID */
#define PERF_EVENT_CPUM_CF_DIAG 0xBC000 /* Event: Counter sets */
+#define PERF_EVENT_CPUM_SF_DIAG 0xBD000 /* Event: Combined-sampling */
struct cf_ctrset_entry { /* CPU-M CF counter set entry (8 byte) */
unsigned int def:16; /* 0-15 Data Entry Format */
diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c
index 6785cd87aa4d..38a942881d1a 100644
--- a/tools/perf/util/s390-cpumsf.c
+++ b/tools/perf/util/s390-cpumsf.c
@@ -1047,6 +1047,14 @@ static void s390_cpumsf_free(struct perf_session *session)
free(sf);
}
+static bool
+s390_cpumsf_evsel_is_auxtrace(struct perf_session *session __maybe_unused,
+ struct evsel *evsel)
+{
+ return evsel->core.attr.type == PERF_TYPE_RAW &&
+ evsel->core.attr.config == PERF_EVENT_CPUM_SF_DIAG;
+}
+
static int s390_cpumsf_get_type(const char *cpuid)
{
int ret, family = 0;
@@ -1071,7 +1079,7 @@ static bool check_auxtrace_itrace(struct itrace_synth_opts *itops)
itops->pwr_events || itops->errors ||
itops->dont_decode || itops->calls || itops->returns ||
itops->callchain || itops->thread_stack ||
- itops->last_branch;
+ itops->last_branch || itops->add_callchain;
if (!ison)
return true;
pr_err("Unsupported --itrace options specified\n");
@@ -1142,6 +1150,7 @@ int s390_cpumsf_process_auxtrace_info(union perf_event *event,
sf->auxtrace.flush_events = s390_cpumsf_flush;
sf->auxtrace.free_events = s390_cpumsf_free_events;
sf->auxtrace.free = s390_cpumsf_free;
+ sf->auxtrace.evsel_is_auxtrace = s390_cpumsf_evsel_is_auxtrace;
session->auxtrace = &sf->auxtrace;
if (dump_trace)
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index f14cc728c358..dc15ddc18b7d 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -237,7 +237,7 @@ static int64_t _sort__addr_cmp(u64 left_ip, u64 right_ip)
return (int64_t)(right_ip - left_ip);
}
-static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r)
+int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r)
{
if (!sym_l || !sym_r)
return cmp_null(sym_l, sym_r);
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index cfa6ac6f7d06..66d39c4cfe2b 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -311,5 +311,7 @@ int64_t
sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right);
int64_t
sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right);
+int64_t
+_sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r);
char *hist_entry__srcline(struct hist_entry *he);
#endif /* __PERF_SORT_H */
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 03ecb8cd0eec..1ad5c5be7e97 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -729,7 +729,7 @@ static void generic_metric(struct perf_stat_config *config,
struct runtime_stat *st)
{
print_metric_t print_metric = out->print_metric;
- struct parse_ctx pctx;
+ struct expr_parse_ctx pctx;
double ratio, scale;
int i;
void *ctxp = out->ctx;
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 26bc6a0096ce..8f4300492dc7 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1544,6 +1544,7 @@ static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod,
return true;
case DSO_BINARY_TYPE__BPF_PROG_INFO:
+ case DSO_BINARY_TYPE__BPF_IMAGE:
case DSO_BINARY_TYPE__NOT_FOUND:
default:
return false;
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
index a661b122d9d8..9d4aa951eaa6 100644
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -71,7 +71,6 @@ int perf_tool__process_synth_event(struct perf_tool *tool,
static int perf_event__get_comm_ids(pid_t pid, char *comm, size_t len,
pid_t *tgid, pid_t *ppid)
{
- char filename[PATH_MAX];
char bf[4096];
int fd;
size_t size = 0;
@@ -81,11 +80,11 @@ static int perf_event__get_comm_ids(pid_t pid, char *comm, size_t len,
*tgid = -1;
*ppid = -1;
- snprintf(filename, sizeof(filename), "/proc/%d/status", pid);
+ snprintf(bf, sizeof(bf), "/proc/%d/status", pid);
- fd = open(filename, O_RDONLY);
+ fd = open(bf, O_RDONLY);
if (fd < 0) {
- pr_debug("couldn't open %s\n", filename);
+ pr_debug("couldn't open %s\n", bf);
return -1;
}
@@ -281,9 +280,9 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
struct machine *machine,
bool mmap_data)
{
- char filename[PATH_MAX];
FILE *fp;
unsigned long long t;
+ char bf[BUFSIZ];
bool truncation = false;
unsigned long long timeout = proc_map_timeout * 1000000ULL;
int rc = 0;
@@ -293,15 +292,15 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
if (machine__is_default_guest(machine))
return 0;
- snprintf(filename, sizeof(filename), "%s/proc/%d/task/%d/maps",
- machine->root_dir, pid, pid);
+ snprintf(bf, sizeof(bf), "%s/proc/%d/task/%d/maps",
+ machine->root_dir, pid, pid);
- fp = fopen(filename, "r");
+ fp = fopen(bf, "r");
if (fp == NULL) {
/*
* We raced with a task exiting - just return:
*/
- pr_debug("couldn't open %s\n", filename);
+ pr_debug("couldn't open %s\n", bf);
return -1;
}
@@ -309,7 +308,6 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
t = rdclock();
while (1) {
- char bf[BUFSIZ];
char prot[5];
char execname[PATH_MAX];
char anonstr[] = "//anon";
@@ -321,10 +319,10 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
break;
if ((rdclock() - t) > timeout) {
- pr_warning("Reading %s time out. "
+ pr_warning("Reading %s/proc/%d/task/%d/maps time out. "
"You may want to increase "
"the time limit by --proc-map-timeout\n",
- filename);
+ machine->root_dir, pid, pid);
truncation = true;
goto out;
}
diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c
index 0885967d5bc3..83f6c83f5617 100644
--- a/tools/perf/util/thread-stack.c
+++ b/tools/perf/util/thread-stack.c
@@ -497,6 +497,63 @@ void thread_stack__sample(struct thread *thread, int cpu,
chain->nr = i;
}
+/*
+ * Hardware sample records, created some time after the event occurred, need to
+ * have subsequent addresses removed from the call chain.
+ */
+void thread_stack__sample_late(struct thread *thread, int cpu,
+ struct ip_callchain *chain, size_t sz,
+ u64 sample_ip, u64 kernel_start)
+{
+ struct thread_stack *ts = thread__stack(thread, cpu);
+ u64 sample_context = callchain_context(sample_ip, kernel_start);
+ u64 last_context, context, ip;
+ size_t nr = 0, j;
+
+ if (sz < 2) {
+ chain->nr = 0;
+ return;
+ }
+
+ if (!ts)
+ goto out;
+
+ /*
+ * When tracing kernel space, kernel addresses occur at the top of the
+ * call chain after the event occurred but before tracing stopped.
+ * Skip them.
+ */
+ for (j = 1; j <= ts->cnt; j++) {
+ ip = ts->stack[ts->cnt - j].ret_addr;
+ context = callchain_context(ip, kernel_start);
+ if (context == PERF_CONTEXT_USER ||
+ (context == sample_context && ip == sample_ip))
+ break;
+ }
+
+ last_context = sample_ip; /* Use sample_ip as an invalid context */
+
+ for (; nr < sz && j <= ts->cnt; nr++, j++) {
+ ip = ts->stack[ts->cnt - j].ret_addr;
+ context = callchain_context(ip, kernel_start);
+ if (context != last_context) {
+ if (nr >= sz - 1)
+ break;
+ chain->ips[nr++] = context;
+ last_context = context;
+ }
+ chain->ips[nr] = ip;
+ }
+out:
+ if (nr) {
+ chain->nr = nr;
+ } else {
+ chain->ips[0] = sample_context;
+ chain->ips[1] = sample_ip;
+ chain->nr = 2;
+ }
+}
+
struct call_return_processor *
call_return_processor__new(int (*process)(struct call_return *cr, u64 *parent_db_id, void *data),
void *data)
diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h
index e1ec5a58f1b2..8962ddc4e1ab 100644
--- a/tools/perf/util/thread-stack.h
+++ b/tools/perf/util/thread-stack.h
@@ -85,6 +85,9 @@ int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip,
void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr);
void thread_stack__sample(struct thread *thread, int cpu, struct ip_callchain *chain,
size_t sz, u64 ip, u64 kernel_start);
+void thread_stack__sample_late(struct thread *thread, int cpu,
+ struct ip_callchain *chain, size_t sz, u64 ip,
+ u64 kernel_start);
int thread_stack__flush(struct thread *thread);
void thread_stack__free(struct thread *thread);
size_t thread_stack__depth(struct thread *thread, int cpu);
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 28b719388028..665e5c0618ed 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -47,6 +47,7 @@ struct thread *thread__new(pid_t pid, pid_t tid)
thread->tid = tid;
thread->ppid = -1;
thread->cpu = -1;
+ thread->lbr_stitch_enable = false;
INIT_LIST_HEAD(&thread->namespaces_list);
INIT_LIST_HEAD(&thread->comm_list);
init_rwsem(&thread->namespaces_lock);
@@ -110,6 +111,7 @@ void thread__delete(struct thread *thread)
exit_rwsem(&thread->namespaces_lock);
exit_rwsem(&thread->comm_lock);
+ thread__free_stitch_list(thread);
free(thread);
}
@@ -452,3 +454,25 @@ int thread__memcpy(struct thread *thread, struct machine *machine,
return dso__data_read_offset(al.map->dso, machine, offset, buf, len);
}
+
+void thread__free_stitch_list(struct thread *thread)
+{
+ struct lbr_stitch *lbr_stitch = thread->lbr_stitch;
+ struct stitch_list *pos, *tmp;
+
+ if (!lbr_stitch)
+ return;
+
+ list_for_each_entry_safe(pos, tmp, &lbr_stitch->lists, node) {
+ list_del_init(&pos->node);
+ free(pos);
+ }
+
+ list_for_each_entry_safe(pos, tmp, &lbr_stitch->free_lists, node) {
+ list_del_init(&pos->node);
+ free(pos);
+ }
+
+ zfree(&lbr_stitch->prev_lbr_cursor);
+ zfree(&thread->lbr_stitch);
+}
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 20b96b5d1f15..b066fb30d203 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -13,6 +13,8 @@
#include <strlist.h>
#include <intlist.h>
#include "rwsem.h"
+#include "event.h"
+#include "callchain.h"
struct addr_location;
struct map;
@@ -20,6 +22,13 @@ struct perf_record_namespaces;
struct thread_stack;
struct unwind_libunwind_ops;
+struct lbr_stitch {
+ struct list_head lists;
+ struct list_head free_lists;
+ struct perf_sample prev_sample;
+ struct callchain_cursor_node *prev_lbr_cursor;
+};
+
struct thread {
union {
struct rb_node rb_node;
@@ -46,6 +55,10 @@ struct thread {
struct srccode_state srccode_state;
bool filter;
int filter_entry_depth;
+
+ /* LBR call stack stitch */
+ bool lbr_stitch_enable;
+ struct lbr_stitch *lbr_stitch;
};
struct machine;
@@ -142,4 +155,6 @@ static inline bool thread__is_filtered(struct thread *thread)
return false;
}
+void thread__free_stitch_list(struct thread *thread);
+
#endif /* __PERF_THREAD_H */
diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h
index f117d4f4821e..45dc84ddff37 100644
--- a/tools/perf/util/top.h
+++ b/tools/perf/util/top.h
@@ -36,6 +36,7 @@ struct perf_top {
bool use_tui, use_stdio;
bool vmlinux_warned;
bool dump_symtab;
+ bool stitch_lbr;
struct hist_entry *sym_filter_entry;
struct evsel *sym_evsel;
struct perf_session *session;
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index d707c9624dd9..37a9492edb3e 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -290,6 +290,7 @@ int perf_event_paranoid(void)
bool perf_event_paranoid_check(int max_level)
{
return perf_cap__capable(CAP_SYS_ADMIN) ||
+ perf_cap__capable(CAP_PERFMON) ||
perf_event_paranoid() <= max_level;
}