summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--tools/perf/Documentation/intel-pt.txt29
-rw-r--r--tools/perf/util/intel-pt.c14
2 files changed, 43 insertions, 0 deletions
diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt
index a0fbb5d71f7d..be764f9ec769 100644
--- a/tools/perf/Documentation/intel-pt.txt
+++ b/tools/perf/Documentation/intel-pt.txt
@@ -764,3 +764,32 @@ perf inject also accepts the --itrace option in which case tracing data is
removed and replaced with the synthesized events. e.g.
perf inject --itrace -i perf.data -o perf.data.new
+
+Below is an example of using Intel PT with autofdo. It requires autofdo
+(https://github.com/google/autofdo) and gcc version 5. The bubble
+sort example is from the AutoFDO tutorial (https://gcc.gnu.org/wiki/AutoFDO/Tutorial)
+amended to take the number of elements as a parameter.
+
+ $ gcc-5 -O3 sort.c -o sort_optimized
+ $ ./sort_optimized 30000
+ Bubble sorting array of 30000 elements
+ 2254 ms
+
+ $ cat ~/.perfconfig
+ [intel-pt]
+ mispred-all
+
+ $ perf record -e intel_pt//u ./sort 3000
+ Bubble sorting array of 3000 elements
+ 58 ms
+ [ perf record: Woken up 2 times to write data ]
+ [ perf record: Captured and wrote 3.939 MB perf.data ]
+ $ perf inject -i perf.data -o inj --itrace=i100usle --strip
+ $ ./create_gcov --binary=./sort --profile=inj --gcov=sort.gcov -gcov_version=1
+ $ gcc-5 -O3 -fauto-profile=sort.gcov sort.c -o sort_autofdo
+ $ ./sort_autofdo 30000
+ Bubble sorting array of 30000 elements
+ 2155 ms
+
+Note there is currently no advantage to using Intel PT instead of LBR, but
+that may change in the future if greater use is made of the data.
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 05e8fcc5188b..03ff072b5993 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -64,6 +64,7 @@ struct intel_pt {
bool data_queued;
bool est_tsc;
bool sync_switch;
+ bool mispred_all;
int have_sched_switch;
u32 pmu_type;
u64 kernel_start;
@@ -943,6 +944,7 @@ static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq)
be->flags.abort = !!(state->flags & INTEL_PT_ABORT_TX);
be->flags.in_tx = !!(state->flags & INTEL_PT_IN_TX);
/* No support for mispredict */
+ be->flags.mispred = ptq->pt->mispred_all;
if (bs->nr < ptq->pt->synth_opts.last_branch_sz)
bs->nr += 1;
@@ -1967,6 +1969,16 @@ static bool intel_pt_find_switch(struct perf_evlist *evlist)
return false;
}
+static int intel_pt_perf_config(const char *var, const char *value, void *data)
+{
+ struct intel_pt *pt = data;
+
+ if (!strcmp(var, "intel-pt.mispred-all"))
+ pt->mispred_all = perf_config_bool(var, value);
+
+ return 0;
+}
+
static const char * const intel_pt_info_fmts[] = {
[INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n",
[INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n",
@@ -2011,6 +2023,8 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
if (!pt)
return -ENOMEM;
+ perf_config(intel_pt_perf_config, pt);
+
err = auxtrace_queues__init(&pt->queues);
if (err)
goto err_free;