From 0d8dd67be013727ae57645ecd3ea2c36365d7da8 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Wed, 6 Dec 2017 14:45:14 -0800
Subject: perf/headers: Sync new perf_event.h with the tools/include/uapi
 version

perf_event.h is updated in previous patch, this patch applies the same
changes to the tools/ version. This is part is put in a separate
patch in case the two files are back ported separately.

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Yonghong Song <yhs@fb.com>
Reviewed-by: Josef Bacik <jbacik@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Cc: <daniel@iogearbox.net>
Cc: <davem@davemloft.net>
Cc: <kernel-team@fb.com>
Cc: <rostedt@goodmis.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20171206224518.3598254-5-songliubraving@fb.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/include/uapi/linux/perf_event.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index c77c9a2ebbbb..5d49cfc509e7 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -380,10 +380,14 @@ struct perf_event_attr {
 	__u32			bp_type;
 	union {
 		__u64		bp_addr;
+		__u64		kprobe_func; /* for perf_kprobe */
+		__u64		uprobe_path; /* for perf_uprobe */
 		__u64		config1; /* extension of config */
 	};
 	union {
 		__u64		bp_len;
+		__u64		kprobe_addr; /* when kprobe_func == NULL */
+		__u64		probe_offset; /* for perf_[k,u]probe */
 		__u64		config2; /* extension of config1 */
 	};
 	__u64	branch_sample_type; /* enum perf_branch_sample_type */
-- 
cgit v1.2.3


From c3dec27b7f70a9ad5f777d943d51ecdfcd9824d0 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 6 Feb 2018 19:17:58 +0100
Subject: perf record: Put new line after target override warning

There's no new-line after target-override warning, now:

  $ perf record -a --per-thread
  Warning:
  SYSTEM/CPU switch overriding PER-THREAD^C[ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.705 MB perf.data (2939 samples) ]

with patch:

  $ perf record -a --per-thread
  Warning:
  SYSTEM/CPU switch overriding PER-THREAD
  ^C[ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.705 MB perf.data (2939 samples) ]

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Fixes: 16ad2ffb822c ("perf tools: Introduce perf_target__strerror()")
Link: http://lkml.kernel.org/r/20180206181813.10943-3-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-record.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index bf4ca749d1ac..907267206973 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1803,7 +1803,7 @@ int cmd_record(int argc, const char **argv)
 	err = target__validate(&rec->opts.target);
 	if (err) {
 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
-		ui__warning("%s", errbuf);
+		ui__warning("%s\n", errbuf);
 	}
 
 	err = target__parse_uid(&rec->opts.target);
-- 
cgit v1.2.3


From 3233b37a71c794e25a0a794185df8d6abd9f277e Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 6 Feb 2018 19:17:59 +0100
Subject: perf script: Add --show-round-event to display
 PERF_RECORD_FINISHED_ROUND

Adding --show-round-event to display PERF_RECORD_FINISHED_ROUND events
like:

  # perf script --show-round-events 2>/dev/null
               yes  8591 [002] 124177.397597:         18         cpu/mem-stores/P: ff...
               yes  8591 [002] 124177.397615:          1 cpu/mem-loads,ldlat=30/P: ff...
  PERF_RECORD_FINISHED_ROUND
              perf 10380 [001] 124177.397622:          6 cpu/mem-loads,ldlat=30/P: ff...
  PERF_RECORD_FINISHED_ROUND
           swapper     0 [000] 124177.400518:         88         cpu/mem-stores/P: ff...
           swapper     0 [000] 124177.400521:         88         cpu/mem-stores/P: ff...

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180206181813.10943-4-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-script.txt |  3 +++
 tools/perf/builtin-script.c              | 17 +++++++++++++++++
 2 files changed, 20 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 7730c1d2b5d3..36ec0257f8d3 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -303,6 +303,9 @@ OPTIONS
 --show-lost-events
 	Display lost events i.e. events of type PERF_RECORD_LOST.
 
+--show-round-events
+	Display finished round events i.e. events of type PERF_RECORD_FINISHED_ROUND.
+
 --demangle::
 	Demangle symbol names to human readable form. It's enabled by default,
 	disable with --no-demangle.
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index ab19a6ee4093..cce926aeb0c0 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -1489,6 +1489,7 @@ struct perf_script {
 	bool			show_switch_events;
 	bool			show_namespace_events;
 	bool			show_lost_events;
+	bool			show_round_events;
 	bool			allocated;
 	bool			per_event_dump;
 	struct cpu_map		*cpus;
@@ -2104,6 +2105,16 @@ process_lost_event(struct perf_tool *tool,
 	return 0;
 }
 
+static int
+process_finished_round_event(struct perf_tool *tool __maybe_unused,
+			     union perf_event *event,
+			     struct ordered_events *oe __maybe_unused)
+
+{
+	perf_event__fprintf(event, stdout);
+	return 0;
+}
+
 static void sig_handler(int sig __maybe_unused)
 {
 	session_done = 1;
@@ -2200,6 +2211,10 @@ static int __cmd_script(struct perf_script *script)
 		script->tool.namespaces = process_namespaces_event;
 	if (script->show_lost_events)
 		script->tool.lost = process_lost_event;
+	if (script->show_round_events) {
+		script->tool.ordered_events = false;
+		script->tool.finished_round = process_finished_round_event;
+	}
 
 	if (perf_script__setup_per_event_dump(script)) {
 		pr_err("Couldn't create the per event dump files\n");
@@ -3139,6 +3154,8 @@ int cmd_script(int argc, const char **argv)
 		    "Show namespace events (if recorded)"),
 	OPT_BOOLEAN('\0', "show-lost-events", &script.show_lost_events,
 		    "Show lost events (if recorded)"),
+	OPT_BOOLEAN('\0', "show-round-events", &script.show_round_events,
+		    "Show round events (if recorded)"),
 	OPT_BOOLEAN('\0', "per-event-dump", &script.per_event_dump,
 		    "Dump trace output to files named by the monitored events"),
 	OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"),
-- 
cgit v1.2.3


From 6baddfc6900eca7f6b360c91ff737890ab4f1d55 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 6 Feb 2018 19:18:00 +0100
Subject: tools lib api fs: Add filename__read_xll function

Adding filename__read_xll function to be able to read files with hex
numbers in, which do not have 0x prefix.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180206181813.10943-5-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/api/fs/fs.c | 29 ++++++++++++++++++++++-------
 tools/lib/api/fs/fs.h |  1 +
 2 files changed, 23 insertions(+), 7 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c
index b24afc0e6e81..8b0e4a4315bd 100644
--- a/tools/lib/api/fs/fs.c
+++ b/tools/lib/api/fs/fs.c
@@ -315,12 +315,8 @@ int filename__read_int(const char *filename, int *value)
 	return err;
 }
 
-/*
- * Parses @value out of @filename with strtoull.
- * By using 0 for base, the strtoull detects the
- * base automatically (see man strtoull).
- */
-int filename__read_ull(const char *filename, unsigned long long *value)
+static int filename__read_ull_base(const char *filename,
+				   unsigned long long *value, int base)
 {
 	char line[64];
 	int fd = open(filename, O_RDONLY), err = -1;
@@ -329,7 +325,7 @@ int filename__read_ull(const char *filename, unsigned long long *value)
 		return -1;
 
 	if (read(fd, line, sizeof(line)) > 0) {
-		*value = strtoull(line, NULL, 0);
+		*value = strtoull(line, NULL, base);
 		if (*value != ULLONG_MAX)
 			err = 0;
 	}
@@ -338,6 +334,25 @@ int filename__read_ull(const char *filename, unsigned long long *value)
 	return err;
 }
 
+/*
+ * Parses @value out of @filename with strtoull.
+ * By using 16 for base to treat the number as hex.
+ */
+int filename__read_xll(const char *filename, unsigned long long *value)
+{
+	return filename__read_ull_base(filename, value, 16);
+}
+
+/*
+ * Parses @value out of @filename with strtoull.
+ * By using 0 for base, the strtoull detects the
+ * base automatically (see man strtoull).
+ */
+int filename__read_ull(const char *filename, unsigned long long *value)
+{
+	return filename__read_ull_base(filename, value, 0);
+}
+
 #define STRERR_BUFSIZE  128     /* For the buffer size of strerror_r */
 
 int filename__read_str(const char *filename, char **buf, size_t *sizep)
diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h
index dda49deefb52..8ebee35a6395 100644
--- a/tools/lib/api/fs/fs.h
+++ b/tools/lib/api/fs/fs.h
@@ -30,6 +30,7 @@ FS(bpf_fs)
 
 int filename__read_int(const char *filename, int *value);
 int filename__read_ull(const char *filename, unsigned long long *value);
+int filename__read_xll(const char *filename, unsigned long long *value);
 int filename__read_str(const char *filename, char **buf, size_t *sizep);
 
 int filename__write_int(const char *filename, int value);
-- 
cgit v1.2.3


From d9c5f32240f503481291a6d4e7246ee0a128d76d Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 6 Feb 2018 19:18:01 +0100
Subject: tools lib api fs: Add sysfs__read_xll function

Adding sysfs__read_xll function to be able to read sysfs files with hex
numbers in, which do not have 0x prefix.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180206181813.10943-6-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/api/fs/fs.c | 15 +++++++++++++--
 tools/lib/api/fs/fs.h |  1 +
 2 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c
index 8b0e4a4315bd..6a12bbf39f7b 100644
--- a/tools/lib/api/fs/fs.c
+++ b/tools/lib/api/fs/fs.c
@@ -432,7 +432,8 @@ int procfs__read_str(const char *entry, char **buf, size_t *sizep)
 	return filename__read_str(path, buf, sizep);
 }
 
-int sysfs__read_ull(const char *entry, unsigned long long *value)
+static int sysfs__read_ull_base(const char *entry,
+				unsigned long long *value, int base)
 {
 	char path[PATH_MAX];
 	const char *sysfs = sysfs__mountpoint();
@@ -442,7 +443,17 @@ int sysfs__read_ull(const char *entry, unsigned long long *value)
 
 	snprintf(path, sizeof(path), "%s/%s", sysfs, entry);
 
-	return filename__read_ull(path, value);
+	return filename__read_ull_base(path, value, base);
+}
+
+int sysfs__read_xll(const char *entry, unsigned long long *value)
+{
+	return sysfs__read_ull_base(entry, value, 16);
+}
+
+int sysfs__read_ull(const char *entry, unsigned long long *value)
+{
+	return sysfs__read_ull_base(entry, value, 0);
 }
 
 int sysfs__read_int(const char *entry, int *value)
diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h
index 8ebee35a6395..92d03b8396b1 100644
--- a/tools/lib/api/fs/fs.h
+++ b/tools/lib/api/fs/fs.h
@@ -40,6 +40,7 @@ int procfs__read_str(const char *entry, char **buf, size_t *sizep);
 int sysctl__read_int(const char *sysctl, int *value);
 int sysfs__read_int(const char *entry, int *value);
 int sysfs__read_ull(const char *entry, unsigned long long *value);
+int sysfs__read_xll(const char *entry, unsigned long long *value);
 int sysfs__read_str(const char *entry, char **buf, size_t *sizep);
 int sysfs__read_bool(const char *entry, bool *value);
 
-- 
cgit v1.2.3


From fdf7c49c200d1b9909e2204cec5bd68b48605c71 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 6 Feb 2018 19:18:12 +0100
Subject: perf tests: Fix dwarf unwind for stripped binaries

When we strip the perf binary, dwarf unwind test stop
to work. The reason is that strip will remove static
function symbols, which we need to check for unwind.

This change will keep this test working in cases where
the global symbols are put into dynamic symbol table,
which is the case on x86. It still won't work on powerpc.

Making those 5 local functions global, and adding
'test_dwarf_unwind__' to their names.

Committer testing:

Before:

  # perf test dwarf
  58: DWARF unwind                               : Ok
  # strip ~/bin/perf
  # perf test dwarf
  58: DWARF unwind                               : FAILED!
  # perf test -v dwarf
  58: DWARF unwind                               :
  --- start ---
  test child forked, pid 6590
  unwind: thread map already set, dso=/home/acme/bin/perf
  <SNIP>
  unwind: access_mem addr 0x7ffce6c48098 val 48563f, offset 1144
  unwind: test__dwarf_unwind:ip = 0x4a54e5 (0xa54e5)
  got: test__dwarf_unwind 0xa54e5, expecting test__dwarf_unwind
  unwind: '':ip = 0x4a50bb (0xa50bb)
  failed: got unresolved address 0xa50bb
  unwind failed
  test child finished with -1
  ---- end ----
  DWARF unwind: FAILED!
  #

After:

  # perf test dwarf
  58: DWARF unwind                               : Ok
  # strip ~/bin/perf
  # perf test dwarf
  58: DWARF unwind                               : Ok
  #
  # perf test -v dwarf
  58: DWARF unwind                               :
  --- start ---
  test child forked, pid 7219
  unwind: thread map already set, dso=/home/acme/bin/perf
  <SNIP>
  unwind: access_mem addr 0x7fff007da2c8 val 48575f, offset 1144
  unwind: test__arch_unwind_sample:ip = 0x589044 (0x189044)
  got: test__arch_unwind_sample 0x189044, expecting test__arch_unwind_sample
  unwind: test_dwarf_unwind__thread:ip = 0x4a52f7 (0xa52f7)
  got: test_dwarf_unwind__thread 0xa52f7, expecting test_dwarf_unwind__thread
  unwind: test_dwarf_unwind__compare:ip = 0x4a5468 (0xa5468)
  got: test_dwarf_unwind__compare 0xa5468, expecting test_dwarf_unwind__compare
  unwind: bsearch:ip = 0x7f6608ae94d8 (0x394d8)
  got: bsearch 0x394d8, expecting bsearch
  unwind: test_dwarf_unwind__krava_3:ip = 0x4a54d1 (0xa54d1)
  got: test_dwarf_unwind__krava_3 0xa54d1, expecting test_dwarf_unwind__krava_3
  unwind: test_dwarf_unwind__krava_2:ip = 0x4a550b (0xa550b)
  got: test_dwarf_unwind__krava_2 0xa550b, expecting test_dwarf_unwind__krava_2
  unwind: test_dwarf_unwind__krava_1:ip = 0x4a554b (0xa554b)
  got: test_dwarf_unwind__krava_1 0xa554b, expecting test_dwarf_unwind__krava_1
  unwind: test__dwarf_unwind:ip = 0x4a5605 (0xa5605)
  got: test__dwarf_unwind 0xa5605, expecting test__dwarf_unwind
  test child finished with 0
  ---- end ----
  DWARF unwind: Ok
  #

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180206181813.10943-17-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/dwarf-unwind.c | 46 +++++++++++++++++++++++++++--------------
 1 file changed, 30 insertions(+), 16 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c
index 260418969120..2f008067d989 100644
--- a/tools/perf/tests/dwarf-unwind.c
+++ b/tools/perf/tests/dwarf-unwind.c
@@ -37,6 +37,19 @@ static int init_live_machine(struct machine *machine)
 						  mmap_handler, machine, true, 500);
 }
 
+/*
+ * We need to keep these functions global, despite the
+ * fact that they are used only locally in this object,
+ * in order to keep them around even if the binary is
+ * stripped. If they are gone, the unwind check for
+ * symbol fails.
+ */
+int test_dwarf_unwind__thread(struct thread *thread);
+int test_dwarf_unwind__compare(void *p1, void *p2);
+int test_dwarf_unwind__krava_3(struct thread *thread);
+int test_dwarf_unwind__krava_2(struct thread *thread);
+int test_dwarf_unwind__krava_1(struct thread *thread);
+
 #define MAX_STACK 8
 
 static int unwind_entry(struct unwind_entry *entry, void *arg)
@@ -45,12 +58,12 @@ static int unwind_entry(struct unwind_entry *entry, void *arg)
 	char *symbol = entry->sym ? entry->sym->name : NULL;
 	static const char *funcs[MAX_STACK] = {
 		"test__arch_unwind_sample",
-		"unwind_thread",
-		"compare",
+		"test_dwarf_unwind__thread",
+		"test_dwarf_unwind__compare",
 		"bsearch",
-		"krava_3",
-		"krava_2",
-		"krava_1",
+		"test_dwarf_unwind__krava_3",
+		"test_dwarf_unwind__krava_2",
+		"test_dwarf_unwind__krava_1",
 		"test__dwarf_unwind"
 	};
 	/*
@@ -77,7 +90,7 @@ static int unwind_entry(struct unwind_entry *entry, void *arg)
 	return strcmp((const char *) symbol, funcs[idx]);
 }
 
-static noinline int unwind_thread(struct thread *thread)
+noinline int test_dwarf_unwind__thread(struct thread *thread)
 {
 	struct perf_sample sample;
 	unsigned long cnt = 0;
@@ -108,7 +121,7 @@ static noinline int unwind_thread(struct thread *thread)
 
 static int global_unwind_retval = -INT_MAX;
 
-static noinline int compare(void *p1, void *p2)
+noinline int test_dwarf_unwind__compare(void *p1, void *p2)
 {
 	/* Any possible value should be 'thread' */
 	struct thread *thread = *(struct thread **)p1;
@@ -117,17 +130,17 @@ static noinline int compare(void *p1, void *p2)
 		/* Call unwinder twice for both callchain orders. */
 		callchain_param.order = ORDER_CALLER;
 
-		global_unwind_retval = unwind_thread(thread);
+		global_unwind_retval = test_dwarf_unwind__thread(thread);
 		if (!global_unwind_retval) {
 			callchain_param.order = ORDER_CALLEE;
-			global_unwind_retval = unwind_thread(thread);
+			global_unwind_retval = test_dwarf_unwind__thread(thread);
 		}
 	}
 
 	return p1 - p2;
 }
 
-static noinline int krava_3(struct thread *thread)
+noinline int test_dwarf_unwind__krava_3(struct thread *thread)
 {
 	struct thread *array[2] = {thread, thread};
 	void *fp = &bsearch;
@@ -141,18 +154,19 @@ static noinline int krava_3(struct thread *thread)
 			size_t, int (*)(void *, void *));
 
 	_bsearch = fp;
-	_bsearch(array, &thread, 2, sizeof(struct thread **), compare);
+	_bsearch(array, &thread, 2, sizeof(struct thread **),
+		 test_dwarf_unwind__compare);
 	return global_unwind_retval;
 }
 
-static noinline int krava_2(struct thread *thread)
+noinline int test_dwarf_unwind__krava_2(struct thread *thread)
 {
-	return krava_3(thread);
+	return test_dwarf_unwind__krava_3(thread);
 }
 
-static noinline int krava_1(struct thread *thread)
+noinline int test_dwarf_unwind__krava_1(struct thread *thread)
 {
-	return krava_2(thread);
+	return test_dwarf_unwind__krava_2(thread);
 }
 
 int test__dwarf_unwind(struct test *test __maybe_unused, int subtest __maybe_unused)
@@ -189,7 +203,7 @@ int test__dwarf_unwind(struct test *test __maybe_unused, int subtest __maybe_unu
 		goto out;
 	}
 
-	err = krava_1(thread);
+	err = test_dwarf_unwind__krava_1(thread);
 	thread__put(thread);
 
  out:
-- 
cgit v1.2.3


From a7402c943bb4657cc0b44453177803fbead70990 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 6 Feb 2018 19:18:13 +0100
Subject: perf tools: Fix comment for sort__* compare functions

In commit 2f15bd8c6c6e ("perf tools: Fix "Command" sort_entry's cmp and
collapse function") we switched from pointer to string comparison.

But failed to remove related comments. Removing them and adding another
one to warn before pointer comparison in here.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180206181813.10943-18-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/sort.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 2da4d0456a03..e8514f651865 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -111,17 +111,20 @@ struct sort_entry sort_thread = {
 
 /* --sort comm */
 
+/*
+ * We can't use pointer comparison in functions below,
+ * because it gives different results based on pointer
+ * values, which could break some sorting assumptions.
+ */
 static int64_t
 sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
 {
-	/* Compare the addr that should be unique among comm */
 	return strcmp(comm__str(right->comm), comm__str(left->comm));
 }
 
 static int64_t
 sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
 {
-	/* Compare the addr that should be unique among comm */
 	return strcmp(comm__str(right->comm), comm__str(left->comm));
 }
 
-- 
cgit v1.2.3


From 8614ada0be7d7be84b85c006d526a9c8f76484fa Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 6 Feb 2018 19:17:57 +0100
Subject: perf report: Ask for ordered events for --tasks option

If we have the time in, keep the events in time order.

Committer notes:

Trying to be more verbose, what actual effect this will have in this particular
case?

Before and after this patch shows the artifacts:

  --- /tmp/before 2018-02-06 15:40:29.536411625 -0300
  +++ /tmp/after  2018-02-06 15:40:51.963403599 -0300
  @@ -5,34 +5,34 @@
         2540     2540     1818 |   gnome-terminal-
         3489     3489     2540 |    bash
        32433    32433     3489 |     perf
  -     32434    32434    32433 |      perf
  +     32434    32434    32433 |      make
        32441    32441    32434 |       make
        32514    32514    32441 |        make
          511      511    32514 |         sh
  -       512      512      511 |          sh
  +       512      512      511 |          install
<SNIP>

We don't have 'perf' calling 'perf' calling 'make', etc, the second
'perf' actually is 'make', i.e.  there was reordering of the relevant
PERF_RECORD_COMM and PERF_RECORD_FORK records.

Ditto for sh/install later on.

Look for FORK and COMM meta events, for those tids:

  # perf report -D | egrep 'PERF_RECORD_(FORK|COMM)' | egrep '3243[34]'
  0 14774650990679 0x1a3cd8 [0x38]: PERF_RECORD_FORK(32433:32433):(3489:3489)
  1 14774652080381 0x1d6568 [0x30]: PERF_RECORD_COMM exec: perf:32433/32433
  1 14774742473340 0x1dbb48 [0x38]: PERF_RECORD_FORK(32434:32434):(32433:32433)
  0 14774752005779 0x1a4af8 [0x30]: PERF_RECORD_COMM exec: make:32434/32434
  0 14774753997960 0x1a5578 [0x38]: PERF_RECORD_FORK(32435:32435):(32434:32434)
  0 14774756070782 0x1a5618 [0x38]: PERF_RECORD_FORK(32438:32438):(32434:32434)
  0 14774757772939 0x1a5680 [0x38]: PERF_RECORD_FORK(32440:32440):(32434:32434)
  0 14774758230600 0x1a56e8 [0x38]: PERF_RECORD_FORK(32441:32441):(32434:32434)
  #

First column is the cpu, second is the timestamp.

So they are on different CPUs, thus ring buffers, and when we don't use
the ordered_events class, we end up mixing that up, use it to take
advantage of the PERF_RECORD_FINISHED_ROUND meta events to go on
ordering the events using the PERF_SAMPLE_TIME present in the
PERF_RECORD_{FORK,COMM,EXIT,SAMPLE,etc} records in the ring buffer.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180206181813.10943-2-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-report.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 4ad5dc649716..8ef71669e7a0 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -614,6 +614,7 @@ static int stats_print(struct report *rep)
 static void tasks_setup(struct report *rep)
 {
 	memset(&rep->tool, 0, sizeof(rep->tool));
+	rep->tool.ordered_events = true;
 	if (rep->mmaps_mode) {
 		rep->tool.mmap = perf_event__process_mmap;
 		rep->tool.mmap2 = perf_event__process_mmap2;
-- 
cgit v1.2.3


From ad52b8cb4886f572b147b02f4c59a648bbf05f9c Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Fri, 9 Feb 2018 10:27:34 +0100
Subject: perf report: Add support to display group output for non group events

Add support to display group output for if non grouped events are
detected and user forces --group option. Now for non-group events
recorded like:

  $ perf record -e 'cycles,instructions' ls

you can still get group output by using --group option
in report:

  $ perf report --group --stdio
  ...
  #         Overhead  Command  Shared Object     Symbol
  # ................  .......  ................  ......................
  #
      17.67%   0.00%  ls       libc-2.25.so      [.] _IO_do_write@@GLIB
      15.59%  25.94%  ls       ls                [.] calculate_columns
      15.41%  31.35%  ls       libc-2.25.so      [.] __strcoll_l
  ...

Committer note:

We should improve on this by making sure that the first line states that
this is not a group, but since the user doesn't have to force group view
when really using grouped events (e.g. '{cycles,instructions}'), the
user better know what is being done...

Requested-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: Stephane Eranian <eranian@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180209092734.GB20449@krava
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-report.txt | 3 ++-
 tools/perf/builtin-report.c              | 6 +++++-
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 907e505b6309..a76b871f78a6 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -354,7 +354,8 @@ OPTIONS
         Path to objdump binary.
 
 --group::
-	Show event group information together.
+	Show event group information together. It forces group output also
+	if there are no groups defined in data file.
 
 --demangle::
 	Demangle symbol names to human readable form. It's enabled by default,
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 8ef71669e7a0..1eedb1815c4c 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -938,6 +938,7 @@ int cmd_report(int argc, const char **argv)
 		"perf report [<options>]",
 		NULL
 	};
+	bool group_set = false;
 	struct report report = {
 		.tool = {
 			.sample		 = process_sample_event,
@@ -1057,7 +1058,7 @@ int cmd_report(int argc, const char **argv)
 		   "Specify disassembler style (e.g. -M intel for intel syntax)"),
 	OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
 		    "Show a column with the sum of periods"),
-	OPT_BOOLEAN(0, "group", &symbol_conf.event_group,
+	OPT_BOOLEAN_SET(0, "group", &symbol_conf.event_group, &group_set,
 		    "Show event group information together"),
 	OPT_CALLBACK_NOOPT('b', "branch-stack", &branch_mode, "",
 		    "use branch records for per branch histogram filling",
@@ -1174,6 +1175,9 @@ repeat:
 	has_br_stack = perf_header__has_feat(&session->header,
 					     HEADER_BRANCH_STACK);
 
+	if (group_set && !session->evlist->nr_groups)
+		perf_evlist__set_leader(session->evlist);
+
 	if (itrace_synth_opts.last_branch)
 		has_br_stack = true;
 
-- 
cgit v1.2.3


From db06a269ecbb1d71d534fc5713624eeeee0b8f92 Mon Sep 17 00:00:00 2001
From: yuzhoujian <yuzhoujian@didichuxing.com>
Date: Mon, 29 Jan 2018 10:25:22 +0100
Subject: perf stat: Add support to print counts for fixed times

Introduce a new option to print counts for fixed number of times and
update 'perf stat' documentation accordingly.

Show below is the output of the new option for perf stat.

  $ perf stat -I 1000 --interval-count 2 -e cycles -a
  #           time             counts unit events
           1.002827089         93,884,870      cycles
           2.004231506         56,573,446      cycles

We can just print the counts for several times with this newly
introduced option. The usage of it is a little like 'vmstat', and it
should be used together with "-I" option.

  $ vmstat -n 1 2
  procs ---------memory-------------- --swap- ----io-- -system-- ------cpu---
   r  b swpd   free   buff   cache    si   so  bi   bo  in   cs us sy id wa st
   0  0    0 78270544 547484 51732076  0   0   0   20    1    1  1  0 99  0 0
   0  0    0 78270512 547484 51732080  0   0   0   16  477 1555  0  0 100 0 0

Changes since v3:
- merge interval_count check and times check to one line.
- fix the wrong indent in stat.h
- use stat_config.times instead of 'times' in cmd_stat function.

Changes since v2:
- none.

Changes since v1:
- change the name of the new option "times-print" to "interval-count".
- keep the new option interval specifically.

Signed-off-by: yuzhoujian <yuzhoujian@didichuxing.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Milian Wolff <milian.wolff@kdab.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1517217923-8302-2-git-send-email-ufo19890607@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-stat.txt |  5 +++++
 tools/perf/builtin-stat.c              | 20 +++++++++++++++++++-
 tools/perf/util/stat.h                 |  1 +
 3 files changed, 25 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 823fce7674bb..47a21645f60c 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -146,6 +146,11 @@ Print count deltas every N milliseconds (minimum: 10ms)
 The overhead percentage could be high in some cases, for instance with small, sub 100ms intervals.  Use with caution.
 	example: 'perf stat -I 1000 -e cycles -a sleep 5'
 
+--interval-count times::
+Print count deltas for fixed number of times.
+This option should be used together with "-I" option.
+	example: 'perf stat -I 1000 --interval-count 2 -e cycles -a'
+
 --metric-only::
 Only print computed metrics. Print them in a single line.
 Don't show any raw values. Not supported with --per-thread.
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 98bf9d32f222..7d1d7613bf56 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -168,6 +168,7 @@ static struct timespec		ref_time;
 static struct cpu_map		*aggr_map;
 static aggr_get_id_t		aggr_get_id;
 static bool			append_file;
+static bool			interval_count;
 static const char		*output_name;
 static int			output_fd;
 static int			print_free_counters_hint;
@@ -571,6 +572,7 @@ static struct perf_evsel *perf_evsel__reset_weak_group(struct perf_evsel *evsel)
 static int __run_perf_stat(int argc, const char **argv)
 {
 	int interval = stat_config.interval;
+	int times = stat_config.times;
 	char msg[BUFSIZ];
 	unsigned long long t0, t1;
 	struct perf_evsel *counter;
@@ -700,6 +702,8 @@ try_again:
 			while (!waitpid(child_pid, &status, WNOHANG)) {
 				nanosleep(&ts, NULL);
 				process_interval();
+				if (interval_count && !(--times))
+					break;
 			}
 		}
 		waitpid(child_pid, &status, 0);
@@ -716,8 +720,11 @@ try_again:
 		enable_counters();
 		while (!done) {
 			nanosleep(&ts, NULL);
-			if (interval)
+			if (interval) {
 				process_interval();
+				if (interval_count && !(--times))
+					break;
+			}
 		}
 	}
 
@@ -1891,6 +1898,8 @@ static const struct option stat_options[] = {
 			"command to run after to the measured command"),
 	OPT_UINTEGER('I', "interval-print", &stat_config.interval,
 		    "print counts at regular interval in ms (>= 10)"),
+	OPT_INTEGER(0, "interval-count", &stat_config.times,
+		    "print counts for fixed number of times"),
 	OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode,
 		     "aggregate counts per processor socket", AGGR_SOCKET),
 	OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode,
@@ -2870,6 +2879,15 @@ int cmd_stat(int argc, const char **argv)
 				   "The overhead percentage could be high in some cases. "
 				   "Please proceed with caution.\n");
 	}
+	if (stat_config.times && interval)
+		interval_count = true;
+	else if (stat_config.times && !interval) {
+		pr_err("interval-count option should be used together with "
+				"interval-print.\n");
+		parse_options_usage(stat_usage, stat_options, "interval-count", 0);
+		parse_options_usage(stat_usage, stat_options, "I", 1);
+		goto out;
+	}
 
 	if (perf_evlist__alloc_stats(evsel_list, interval))
 		goto out;
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index dbc6f7134f61..540fbb350e53 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -90,6 +90,7 @@ struct perf_stat_config {
 	bool		scale;
 	FILE		*output;
 	unsigned int	interval;
+	int		times;
 	struct runtime_stat *stats;
 	int		stats_num;
 };
-- 
cgit v1.2.3


From f1f8ad52f8bf1239282737a2a5c3bd450300cc78 Mon Sep 17 00:00:00 2001
From: yuzhoujian <yuzhoujian@didichuxing.com>
Date: Mon, 29 Jan 2018 10:25:23 +0100
Subject: perf stat: Add support to print counts after a period of time

Introduce a new option to print counts after N milliseconds and update
'perf stat' documentation accordingly.

Show below is the output of the new option for perf stat.

  $ perf stat --time 2000 -e cycles -a
  Performance counter stats for 'system wide':

        157,260,423      cycles

        2.003060766 seconds time elapsed

We can print the count deltas after N milliseconds with this new
introduced option. This option is not supported with "-I" option.

In addition, according to Kangliang's patch(19afd10410957), the
monitoring overhead for system-wide core event could be very high if the
interval-print parameter was below 100ms, and the limitation value is
10ms.

So the same warning will be displayed when the time is set between 10ms
to 100ms, and the minimal time is limited to 10ms. Users can make a
decision according to their spcific cases.

Committer notes:

This actually stops the workload after the specified time, then prints
the counts.

So I renamed the option to --timeout and updated the documentation to
state that it will not just print the counts after the specified time,
but will really stop the 'perf stat' session and print the counts.

The rename from 'time' to 'timeout' also fixes the build in systems
where 'time' is used by glibc and can't be used as a name of a variable,
such as centos:5 and centos:6.

Changes since v3:
- none.

Changes since v2:
- modify the time check in __run_perf_stat func to keep some consistency
  with the workload case.
- add the warning when the time is set between 10ms to 100ms.
- add the pr_err when the time is set below 10ms.

Changes since v1:
- none.

Signed-off-by: yuzhoujian <yuzhoujian@didichuxing.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Milian Wolff <milian.wolff@kdab.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1517217923-8302-3-git-send-email-ufo19890607@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-stat.txt |  5 +++++
 tools/perf/builtin-stat.c              | 33 +++++++++++++++++++++++++++++++--
 tools/perf/util/stat.h                 |  1 +
 3 files changed, 37 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 47a21645f60c..2bbe79a50d3c 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -151,6 +151,11 @@ Print count deltas for fixed number of times.
 This option should be used together with "-I" option.
 	example: 'perf stat -I 1000 --interval-count 2 -e cycles -a'
 
+--timeout msecs::
+Stop the 'perf stat' session and print count deltas after N milliseconds (minimum: 10 ms).
+This option is not supported with the "-I" option.
+	example: 'perf stat --time 2000 -e cycles -a'
+
 --metric-only::
 Only print computed metrics. Print them in a single line.
 Don't show any raw values. Not supported with --per-thread.
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 7d1d7613bf56..2d49eccf98f2 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -573,6 +573,7 @@ static int __run_perf_stat(int argc, const char **argv)
 {
 	int interval = stat_config.interval;
 	int times = stat_config.times;
+	int timeout = stat_config.timeout;
 	char msg[BUFSIZ];
 	unsigned long long t0, t1;
 	struct perf_evsel *counter;
@@ -586,6 +587,9 @@ static int __run_perf_stat(int argc, const char **argv)
 	if (interval) {
 		ts.tv_sec  = interval / USEC_PER_MSEC;
 		ts.tv_nsec = (interval % USEC_PER_MSEC) * NSEC_PER_MSEC;
+	} else if (timeout) {
+		ts.tv_sec  = timeout / USEC_PER_MSEC;
+		ts.tv_nsec = (timeout % USEC_PER_MSEC) * NSEC_PER_MSEC;
 	} else {
 		ts.tv_sec  = 1;
 		ts.tv_nsec = 0;
@@ -698,9 +702,11 @@ try_again:
 		perf_evlist__start_workload(evsel_list);
 		enable_counters();
 
-		if (interval) {
+		if (interval || timeout) {
 			while (!waitpid(child_pid, &status, WNOHANG)) {
 				nanosleep(&ts, NULL);
+				if (timeout)
+					break;
 				process_interval();
 				if (interval_count && !(--times))
 					break;
@@ -720,6 +726,8 @@ try_again:
 		enable_counters();
 		while (!done) {
 			nanosleep(&ts, NULL);
+			if (timeout)
+				break;
 			if (interval) {
 				process_interval();
 				if (interval_count && !(--times))
@@ -1900,6 +1908,8 @@ static const struct option stat_options[] = {
 		    "print counts at regular interval in ms (>= 10)"),
 	OPT_INTEGER(0, "interval-count", &stat_config.times,
 		    "print counts for fixed number of times"),
+	OPT_UINTEGER(0, "timeout", &stat_config.timeout,
+		    "stop workload and print counts after a timeout period in ms (>= 10ms)"),
 	OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode,
 		     "aggregate counts per processor socket", AGGR_SOCKET),
 	OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode,
@@ -2697,7 +2707,7 @@ int cmd_stat(int argc, const char **argv)
 	int status = -EINVAL, run_idx;
 	const char *mode;
 	FILE *output = stderr;
-	unsigned int interval;
+	unsigned int interval, timeout;
 	const char * const stat_subcommands[] = { "record", "report" };
 
 	setlocale(LC_ALL, "");
@@ -2728,6 +2738,7 @@ int cmd_stat(int argc, const char **argv)
 		return __cmd_report(argc, argv);
 
 	interval = stat_config.interval;
+	timeout = stat_config.timeout;
 
 	/*
 	 * For record command the -o is already taken care of.
@@ -2879,6 +2890,7 @@ int cmd_stat(int argc, const char **argv)
 				   "The overhead percentage could be high in some cases. "
 				   "Please proceed with caution.\n");
 	}
+
 	if (stat_config.times && interval)
 		interval_count = true;
 	else if (stat_config.times && !interval) {
@@ -2889,6 +2901,23 @@ int cmd_stat(int argc, const char **argv)
 		goto out;
 	}
 
+	if (timeout && timeout < 100) {
+		if (timeout < 10) {
+			pr_err("timeout must be >= 10ms.\n");
+			parse_options_usage(stat_usage, stat_options, "timeout", 0);
+			goto out;
+		} else
+			pr_warning("timeout < 100ms. "
+				   "The overhead percentage could be high in some cases. "
+				   "Please proceed with caution.\n");
+	}
+	if (timeout && interval) {
+		pr_err("timeout option is not supported with interval-print.\n");
+		parse_options_usage(stat_usage, stat_options, "timeout", 0);
+		parse_options_usage(stat_usage, stat_options, "I", 1);
+		goto out;
+	}
+
 	if (perf_evlist__alloc_stats(evsel_list, interval))
 		goto out;
 
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 540fbb350e53..2f44e386a0e8 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -90,6 +90,7 @@ struct perf_stat_config {
 	bool		scale;
 	FILE		*output;
 	unsigned int	interval;
+	unsigned int	timeout;
 	int		times;
 	struct runtime_stat *stats;
 	int		stats_num;
-- 
cgit v1.2.3


From c53b4bb02b45ceec7a590e47820afbb5cef0bb81 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Thu, 15 Feb 2018 13:26:27 +0100
Subject: tools lib symbol: Skip non-address kallsyms line

Adding check on failed attempt to parse the address and skip the line
parsing early in that case.

The address can be replaced with '(null)' string in case user don't have
enough permissions, like:

  $ cat /proc/kallsyms
      (null) A irq_stack_union
      (null) A __per_cpu_start
      ...

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180215122635.24029-2-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/symbol/kallsyms.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/symbol/kallsyms.c b/tools/lib/symbol/kallsyms.c
index 914cb8e3d40b..689b6a130dd7 100644
--- a/tools/lib/symbol/kallsyms.c
+++ b/tools/lib/symbol/kallsyms.c
@@ -38,6 +38,10 @@ int kallsyms__parse(const char *filename, void *arg,
 
 		len = hex2u64(line, &start);
 
+		/* Skip the line if we failed to parse the address. */
+		if (!len)
+			continue;
+
 		len++;
 		if (len + 2 >= line_len)
 			continue;
-- 
cgit v1.2.3


From c39629614640a7a5331bf156b0d26effade0a67f Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Thu, 15 Feb 2018 13:26:28 +0100
Subject: perf symbols: Check if we read regular file in dso__load()

The current code in dso__load() calls is_regular_file(), but it checks
its return value only after calling symsrc__init().

That can make symsrc__init() block in elf_* functions on reading
the file if the file happens to be device and not regular one.

Call symsrc__init() only for regular files. Also remove the
symsrc__destroy() cleanup, which is not needed now, because we call
symsrc__init() only for regular files.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180215122635.24029-3-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/symbol.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index cc065d4bfafc..e366e3060e6b 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1582,7 +1582,7 @@ int dso__load(struct dso *dso, struct map *map)
 		bool next_slot = false;
 		bool is_reg;
 		bool nsexit;
-		int sirc;
+		int sirc = -1;
 
 		enum dso_binary_type symtab_type = binary_type_symtab[i];
 
@@ -1600,16 +1600,14 @@ int dso__load(struct dso *dso, struct map *map)
 			nsinfo__mountns_exit(&nsc);
 
 		is_reg = is_regular_file(name);
-		sirc = symsrc__init(ss, dso, name, symtab_type);
+		if (is_reg)
+			sirc = symsrc__init(ss, dso, name, symtab_type);
 
 		if (nsexit)
 			nsinfo__mountns_enter(dso->nsinfo, &nsc);
 
-		if (!is_reg || sirc < 0) {
-			if (sirc >= 0)
-				symsrc__destroy(ss);
+		if (!is_reg || sirc < 0)
 			continue;
-		}
 
 		if (!syms_ss && symsrc__has_symtab(ss)) {
 			syms_ss = ss;
-- 
cgit v1.2.3


From 81f981d7ec43ed93901c12b6521d39b06f1ed3d3 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Thu, 15 Feb 2018 13:26:29 +0100
Subject: perf machine: Free root_dir in machine__init() error path

Free root_dir in machine__init() error path.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180215122635.24029-4-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/machine.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index b05a67464c03..c976384f9022 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -50,6 +50,8 @@ static void machine__threads_init(struct machine *machine)
 
 int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
 {
+	int err = -ENOMEM;
+
 	memset(machine, 0, sizeof(*machine));
 	map_groups__init(&machine->kmaps, machine);
 	RB_CLEAR_NODE(&machine->rb_node);
@@ -79,7 +81,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
 		char comm[64];
 
 		if (thread == NULL)
-			return -ENOMEM;
+			goto out;
 
 		snprintf(comm, sizeof(comm), "[guest/%d]", pid);
 		thread__set_comm(thread, comm, 0);
@@ -87,7 +89,11 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
 	}
 
 	machine->current_tid = NULL;
+	err = 0;
 
+out:
+	if (err)
+		zfree(&machine->root_dir);
 	return 0;
 }
 
-- 
cgit v1.2.3


From 8c7f1bb37b29f140e08175132f3abb4d5ad229fc Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Thu, 15 Feb 2018 13:26:30 +0100
Subject: perf machine: Move kernel mmap name into struct machine

It simplifies and centralizes the code. The kernel mmap name is set for
machine type, which we know from the beginning, so there's no reason to
generate it every time we need it.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180215122635.24029-5-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/build-id.c | 10 +++----
 tools/perf/util/event.c    |  5 +---
 tools/perf/util/machine.c  | 67 +++++++++++++++++++++++-----------------------
 tools/perf/util/machine.h  |  3 +--
 tools/perf/util/symbol.c   |  3 +--
 5 files changed, 39 insertions(+), 49 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 7f8553630c4d..537eadd81914 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -316,7 +316,6 @@ static int machine__write_buildid_table(struct machine *machine,
 					struct feat_fd *fd)
 {
 	int err = 0;
-	char nm[PATH_MAX];
 	struct dso *pos;
 	u16 kmisc = PERF_RECORD_MISC_KERNEL,
 	    umisc = PERF_RECORD_MISC_USER;
@@ -338,9 +337,8 @@ static int machine__write_buildid_table(struct machine *machine,
 			name = pos->short_name;
 			name_len = pos->short_name_len;
 		} else if (dso__is_kcore(pos)) {
-			machine__mmap_name(machine, nm, sizeof(nm));
-			name = nm;
-			name_len = strlen(nm);
+			name = machine->mmap_name;
+			name_len = strlen(name);
 		} else {
 			name = pos->long_name;
 			name_len = pos->long_name_len;
@@ -813,12 +811,10 @@ static int dso__cache_build_id(struct dso *dso, struct machine *machine)
 	bool is_kallsyms = dso__is_kallsyms(dso);
 	bool is_vdso = dso__is_vdso(dso);
 	const char *name = dso->long_name;
-	char nm[PATH_MAX];
 
 	if (dso__is_kcore(dso)) {
 		is_kallsyms = true;
-		machine__mmap_name(machine, nm, sizeof(nm));
-		name = nm;
+		name = machine->mmap_name;
 	}
 	return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id), name,
 				     dso->nsinfo, is_kallsyms, is_vdso);
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 44e603c27944..4644e751a3e3 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -894,8 +894,6 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
 				       struct machine *machine)
 {
 	size_t size;
-	const char *mmap_name;
-	char name_buff[PATH_MAX];
 	struct map *map = machine__kernel_map(machine);
 	struct kmap *kmap;
 	int err;
@@ -918,7 +916,6 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
 		return -1;
 	}
 
-	mmap_name = machine__mmap_name(machine, name_buff, sizeof(name_buff));
 	if (machine__is_host(machine)) {
 		/*
 		 * kernel uses PERF_RECORD_MISC_USER for user space maps,
@@ -931,7 +928,7 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
 
 	kmap = map__kmap(map);
 	size = snprintf(event->mmap.filename, sizeof(event->mmap.filename),
-			"%s%s", mmap_name, kmap->ref_reloc_sym->name) + 1;
+			"%s%s", machine->mmap_name, kmap->ref_reloc_sym->name) + 1;
 	size = PERF_ALIGN(size, sizeof(u64));
 	event->mmap.header.type = PERF_RECORD_MMAP;
 	event->mmap.header.size = (sizeof(event->mmap) -
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index c976384f9022..b1f1961b13f4 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -48,6 +48,27 @@ static void machine__threads_init(struct machine *machine)
 	}
 }
 
+static int machine__set_mmap_name(struct machine *machine)
+{
+	if (machine__is_host(machine)) {
+		if (symbol_conf.vmlinux_name)
+			machine->mmap_name = strdup(symbol_conf.vmlinux_name);
+		else
+			machine->mmap_name = strdup("[kernel.kallsyms]");
+	} else if (machine__is_default_guest(machine)) {
+		if (symbol_conf.default_guest_vmlinux_name)
+			machine->mmap_name = strdup(symbol_conf.default_guest_vmlinux_name);
+		else
+			machine->mmap_name = strdup("[guest.kernel.kallsyms]");
+	} else {
+		if (asprintf(&machine->mmap_name, "[guest.kernel.kallsyms.%d]",
+			 machine->pid) < 0)
+			machine->mmap_name = NULL;
+	}
+
+	return machine->mmap_name ? 0 : -ENOMEM;
+}
+
 int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
 {
 	int err = -ENOMEM;
@@ -75,6 +96,9 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
 	if (machine->root_dir == NULL)
 		return -ENOMEM;
 
+	if (machine__set_mmap_name(machine))
+		goto out;
+
 	if (pid != HOST_KERNEL_ID) {
 		struct thread *thread = machine__findnew_thread(machine, -1,
 								pid);
@@ -92,8 +116,10 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
 	err = 0;
 
 out:
-	if (err)
+	if (err) {
 		zfree(&machine->root_dir);
+		zfree(&machine->mmap_name);
+	}
 	return 0;
 }
 
@@ -186,6 +212,7 @@ void machine__exit(struct machine *machine)
 	dsos__exit(&machine->dsos);
 	machine__exit_vdso(machine);
 	zfree(&machine->root_dir);
+	zfree(&machine->mmap_name);
 	zfree(&machine->current_tid);
 
 	for (i = 0; i < THREADS__TABLE_SIZE; i++) {
@@ -328,20 +355,6 @@ void machines__process_guests(struct machines *machines,
 	}
 }
 
-char *machine__mmap_name(struct machine *machine, char *bf, size_t size)
-{
-	if (machine__is_host(machine))
-		snprintf(bf, size, "[%s]", "kernel.kallsyms");
-	else if (machine__is_default_guest(machine))
-		snprintf(bf, size, "[%s]", "guest.kernel.kallsyms");
-	else {
-		snprintf(bf, size, "[%s.%d]", "guest.kernel.kallsyms",
-			 machine->pid);
-	}
-
-	return bf;
-}
-
 void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size)
 {
 	struct rb_node *node;
@@ -777,25 +790,13 @@ size_t machine__fprintf(struct machine *machine, FILE *fp)
 
 static struct dso *machine__get_kernel(struct machine *machine)
 {
-	const char *vmlinux_name = NULL;
+	const char *vmlinux_name = machine->mmap_name;
 	struct dso *kernel;
 
 	if (machine__is_host(machine)) {
-		vmlinux_name = symbol_conf.vmlinux_name;
-		if (!vmlinux_name)
-			vmlinux_name = DSO__NAME_KALLSYMS;
-
 		kernel = machine__findnew_kernel(machine, vmlinux_name,
 						 "[kernel]", DSO_TYPE_KERNEL);
 	} else {
-		char bf[PATH_MAX];
-
-		if (machine__is_default_guest(machine))
-			vmlinux_name = symbol_conf.default_guest_vmlinux_name;
-		if (!vmlinux_name)
-			vmlinux_name = machine__mmap_name(machine, bf,
-							  sizeof(bf));
-
 		kernel = machine__findnew_kernel(machine, vmlinux_name,
 						 "[guest.kernel]",
 						 DSO_TYPE_GUEST_KERNEL);
@@ -1295,7 +1296,6 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
 					      union perf_event *event)
 {
 	struct map *map;
-	char kmmap_prefix[PATH_MAX];
 	enum dso_kernel_type kernel_type;
 	bool is_kernel_mmap;
 
@@ -1303,15 +1303,14 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
 	if (machine__uses_kcore(machine))
 		return 0;
 
-	machine__mmap_name(machine, kmmap_prefix, sizeof(kmmap_prefix));
 	if (machine__is_host(machine))
 		kernel_type = DSO_TYPE_KERNEL;
 	else
 		kernel_type = DSO_TYPE_GUEST_KERNEL;
 
 	is_kernel_mmap = memcmp(event->mmap.filename,
-				kmmap_prefix,
-				strlen(kmmap_prefix) - 1) == 0;
+				machine->mmap_name,
+				strlen(machine->mmap_name) - 1) == 0;
 	if (event->mmap.filename[0] == '/' ||
 	    (!is_kernel_mmap && event->mmap.filename[0] == '[')) {
 		map = machine__findnew_module_map(machine, event->mmap.start,
@@ -1322,7 +1321,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
 		map->end = map->start + event->mmap.len;
 	} else if (is_kernel_mmap) {
 		const char *symbol_name = (event->mmap.filename +
-				strlen(kmmap_prefix));
+				strlen(machine->mmap_name));
 		/*
 		 * Should be there already, from the build-id table in
 		 * the header.
@@ -1363,7 +1362,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
 		up_read(&machine->dsos.lock);
 
 		if (kernel == NULL)
-			kernel = machine__findnew_dso(machine, kmmap_prefix);
+			kernel = machine__findnew_dso(machine, machine->mmap_name);
 		if (kernel == NULL)
 			goto out_problem;
 
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 5ce860b64c74..cb0a20f3a96b 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -43,6 +43,7 @@ struct machine {
 	bool		  comm_exec;
 	bool		  kptr_restrict_warned;
 	char		  *root_dir;
+	char		  *mmap_name;
 	struct threads    threads[THREADS__TABLE_SIZE];
 	struct vdso_info  *vdso_info;
 	struct perf_env   *env;
@@ -142,8 +143,6 @@ struct machine *machines__find(struct machines *machines, pid_t pid);
 struct machine *machines__findnew(struct machines *machines, pid_t pid);
 
 void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size);
-char *machine__mmap_name(struct machine *machine, char *bf, size_t size);
-
 void machines__set_comm_exec(struct machines *machines, bool comm_exec);
 
 struct machine *machine__new_host(void);
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index e366e3060e6b..a1a312d99f30 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1958,8 +1958,7 @@ static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map)
 		pr_debug("Using %s for symbols\n", kallsyms_filename);
 	if (err > 0 && !dso__is_kcore(dso)) {
 		dso->binary_type = DSO_BINARY_TYPE__GUEST_KALLSYMS;
-		machine__mmap_name(machine, path, sizeof(path));
-		dso__set_long_name(dso, strdup(path), true);
+		dso__set_long_name(dso, machine->mmap_name, false);
 		map__fixup_start(map);
 		map__fixup_end(map);
 	}
-- 
cgit v1.2.3


From 05db6ff73d805ecc70947c9eee2ed9948d0be52b Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Thu, 15 Feb 2018 13:26:31 +0100
Subject: perf machine: Generalize machine__set_kernel_mmap()

So it could be called without event object, just with start and end
values. It will be used in following patch.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180215122635.24029-6-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/machine.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index b1f1961b13f4..292e70c774bd 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1262,15 +1262,15 @@ int machine__create_kernel_maps(struct machine *machine)
 	return 0;
 }
 
-static void machine__set_kernel_mmap_len(struct machine *machine,
-					 union perf_event *event)
+static void machine__set_kernel_mmap(struct machine *machine,
+				     u64 start, u64 end)
 {
 	int i;
 
 	for (i = 0; i < MAP__NR_TYPES; i++) {
-		machine->vmlinux_maps[i]->start = event->mmap.start;
-		machine->vmlinux_maps[i]->end   = (event->mmap.start +
-						   event->mmap.len);
+		machine->vmlinux_maps[i]->start = start;
+		machine->vmlinux_maps[i]->end   = end;
+
 		/*
 		 * Be a bit paranoid here, some perf.data file came with
 		 * a zero sized synthesized MMAP event for the kernel.
@@ -1375,7 +1375,8 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
 		if (strstr(kernel->long_name, "vmlinux"))
 			dso__set_short_name(kernel, "[kernel.vmlinux]", false);
 
-		machine__set_kernel_mmap_len(machine, event);
+		machine__set_kernel_mmap(machine, event->mmap.start,
+					 event->mmap.start + event->mmap.len);
 
 		/*
 		 * Avoid using a zero address (kptr_restrict) for the ref reloc
-- 
cgit v1.2.3


From 1fb87b8e9599932e1d8b11c3a1b03b4414aaf7ba Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Thu, 15 Feb 2018 13:26:32 +0100
Subject: perf machine: Don't search for active kernel start in
 __machine__create_kernel_maps

We should not search for the kernel start address in
__machine__create_kernel_maps(), because it's being used in the 'report'
code path, where we are interested in kernel MMAP data address (the one
recorded via 'perf record', possibly on another machine, or an older or
newer kernel on the same machine where analysis is being performed)
instead of in current kernel address.

The __machine__create_kernel_maps() function serves purely for creating
the machines kernel maps and setting up the kmap group. The report code
path then sets the address based on the data from kernel MMAP event in
the machine__set_kernel_mmap() function.

The kallsyms search address logic is used for test code, that calls
machine__create_kernel_maps() to get current maps and calls
machine__get_running_kernel_start() to get kernel starting address.

Use machine__set_kernel_mmap() to set the kernel maps start address and
moving map_groups__fixup_end to be call when all maps are in place.

Also make __machine__create_kernel_maps static, because there's no
external user.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180215122635.24029-7-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/machine.c | 55 ++++++++++++++++++++++-------------------------
 tools/perf/util/machine.h |  1 -
 2 files changed, 26 insertions(+), 30 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 292e70c774bd..2db8d7dd0f80 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -856,13 +856,10 @@ static int machine__get_running_kernel_start(struct machine *machine,
 	return 0;
 }
 
-int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel)
+static int
+__machine__create_kernel_maps(struct machine *machine, struct dso *kernel)
 {
 	int type;
-	u64 start = 0;
-
-	if (machine__get_running_kernel_start(machine, NULL, &start))
-		return -1;
 
 	/* In case of renewal the kernel map, destroy previous one */
 	machine__destroy_kernel_maps(machine);
@@ -871,7 +868,7 @@ int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel)
 		struct kmap *kmap;
 		struct map *map;
 
-		machine->vmlinux_maps[type] = map__new2(start, kernel, type);
+		machine->vmlinux_maps[type] = map__new2(0, kernel, type);
 		if (machine->vmlinux_maps[type] == NULL)
 			return -1;
 
@@ -1222,6 +1219,24 @@ static int machine__create_modules(struct machine *machine)
 	return 0;
 }
 
+static void machine__set_kernel_mmap(struct machine *machine,
+				     u64 start, u64 end)
+{
+	int i;
+
+	for (i = 0; i < MAP__NR_TYPES; i++) {
+		machine->vmlinux_maps[i]->start = start;
+		machine->vmlinux_maps[i]->end   = end;
+
+		/*
+		 * Be a bit paranoid here, some perf.data file came with
+		 * a zero sized synthesized MMAP event for the kernel.
+		 */
+		if (machine->vmlinux_maps[i]->end == 0)
+			machine->vmlinux_maps[i]->end = ~0ULL;
+	}
+}
+
 int machine__create_kernel_maps(struct machine *machine)
 {
 	struct dso *kernel = machine__get_kernel(machine);
@@ -1246,40 +1261,22 @@ int machine__create_kernel_maps(struct machine *machine)
 				 "continuing anyway...\n", machine->pid);
 	}
 
-	/*
-	 * Now that we have all the maps created, just set the ->end of them:
-	 */
-	map_groups__fixup_end(&machine->kmaps);
-
 	if (!machine__get_running_kernel_start(machine, &name, &addr)) {
 		if (name &&
 		    maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, name, addr)) {
 			machine__destroy_kernel_maps(machine);
 			return -1;
 		}
+		machine__set_kernel_mmap(machine, addr, 0);
 	}
 
+	/*
+	 * Now that we have all the maps created, just set the ->end of them:
+	 */
+	map_groups__fixup_end(&machine->kmaps);
 	return 0;
 }
 
-static void machine__set_kernel_mmap(struct machine *machine,
-				     u64 start, u64 end)
-{
-	int i;
-
-	for (i = 0; i < MAP__NR_TYPES; i++) {
-		machine->vmlinux_maps[i]->start = start;
-		machine->vmlinux_maps[i]->end   = end;
-
-		/*
-		 * Be a bit paranoid here, some perf.data file came with
-		 * a zero sized synthesized MMAP event for the kernel.
-		 */
-		if (machine->vmlinux_maps[i]->end == 0)
-			machine->vmlinux_maps[i]->end = ~0ULL;
-	}
-}
-
 static bool machine__uses_kcore(struct machine *machine)
 {
 	struct dso *dso;
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index cb0a20f3a96b..50d587d34459 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -238,7 +238,6 @@ size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp,
 				     bool (skip)(struct dso *dso, int parm), int parm);
 
 void machine__destroy_kernel_maps(struct machine *machine);
-int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel);
 int machine__create_kernel_maps(struct machine *machine);
 
 int machines__create_kernel_maps(struct machines *machines, pid_t pid);
-- 
cgit v1.2.3


From e8f3879f762ffe75a24fd354dd87f073214428fa Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Thu, 15 Feb 2018 13:26:33 +0100
Subject: perf machine: Remove machine__load_kallsyms()

The current machine__load_kallsyms() function has no caller, so replace
it directly with __machine__load_kallsyms().  Also remove the no_kcore
argument as it was always called with a 'true' value.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180215122635.24029-8-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/vmlinux-kallsyms.c |  2 +-
 tools/perf/util/machine.c           | 14 ++++----------
 tools/perf/util/machine.h           |  2 --
 3 files changed, 5 insertions(+), 13 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
index f6789fb029d6..58349297f9fb 100644
--- a/tools/perf/tests/vmlinux-kallsyms.c
+++ b/tools/perf/tests/vmlinux-kallsyms.c
@@ -56,7 +56,7 @@ int test__vmlinux_matches_kallsyms(struct test *test __maybe_unused, int subtest
 	 * be compacted against the list of modules found in the "vmlinux"
 	 * code and with the one got from /proc/modules from the "kallsyms" code.
 	 */
-	if (__machine__load_kallsyms(&kallsyms, "/proc/kallsyms", type, true) <= 0) {
+	if (machine__load_kallsyms(&kallsyms, "/proc/kallsyms", type) <= 0) {
 		pr_debug("dso__load_kallsyms ");
 		goto out;
 	}
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 2db8d7dd0f80..fe27ef55cbb9 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -151,7 +151,7 @@ struct machine *machine__new_kallsyms(void)
 	 *    ask for not using the kcore parsing code, once this one is fixed
 	 *    to create a map per module.
 	 */
-	if (machine && __machine__load_kallsyms(machine, "/proc/kallsyms", MAP__FUNCTION, true) <= 0) {
+	if (machine && machine__load_kallsyms(machine, "/proc/kallsyms", MAP__FUNCTION) <= 0) {
 		machine__delete(machine);
 		machine = NULL;
 	}
@@ -991,11 +991,11 @@ int machines__create_kernel_maps(struct machines *machines, pid_t pid)
 	return machine__create_kernel_maps(machine);
 }
 
-int __machine__load_kallsyms(struct machine *machine, const char *filename,
-			     enum map_type type, bool no_kcore)
+int machine__load_kallsyms(struct machine *machine, const char *filename,
+			     enum map_type type)
 {
 	struct map *map = machine__kernel_map(machine);
-	int ret = __dso__load_kallsyms(map->dso, filename, map, no_kcore);
+	int ret = __dso__load_kallsyms(map->dso, filename, map, true);
 
 	if (ret > 0) {
 		dso__set_loaded(map->dso, type);
@@ -1010,12 +1010,6 @@ int __machine__load_kallsyms(struct machine *machine, const char *filename,
 	return ret;
 }
 
-int machine__load_kallsyms(struct machine *machine, const char *filename,
-			   enum map_type type)
-{
-	return __machine__load_kallsyms(machine, filename, type, false);
-}
-
 int machine__load_vmlinux_path(struct machine *machine, enum map_type type)
 {
 	struct map *map = machine__kernel_map(machine);
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 50d587d34459..66cc200ef86f 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -225,8 +225,6 @@ struct map *machine__findnew_module_map(struct machine *machine, u64 start,
 					const char *filename);
 int arch__fix_module_text_start(u64 *start, const char *name);
 
-int __machine__load_kallsyms(struct machine *machine, const char *filename,
-			     enum map_type type, bool no_kcore);
 int machine__load_kallsyms(struct machine *machine, const char *filename,
 			   enum map_type type);
 int machine__load_vmlinux_path(struct machine *machine, enum map_type type);
-- 
cgit v1.2.3


From a73e24d240bc136619d382b1268f34d75c9d25ce Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Thu, 15 Feb 2018 13:26:34 +0100
Subject: perf tools: Do not create kernel maps in sample__resolve()

There's no need for kernel maps to be allocated at this point - sample
processing.

We search for kernel maps using the kernel map_groups in machine::kmaps
which is static. If vmlinux maps for any reason still don't exist, the
search correctly fails because they are not in the map group.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180215122635.24029-9-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/event.c | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 4644e751a3e3..f0a6cbd033cc 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -1588,17 +1588,6 @@ int machine__resolve(struct machine *machine, struct addr_location *al,
 		return -1;
 
 	dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid);
-	/*
-	 * Have we already created the kernel maps for this machine?
-	 *
-	 * This should have happened earlier, when we processed the kernel MMAP
-	 * events, but for older perf.data files there was no such thing, so do
-	 * it now.
-	 */
-	if (sample->cpumode == PERF_RECORD_MISC_KERNEL &&
-	    machine__kernel_map(machine) == NULL)
-		machine__create_kernel_maps(machine);
-
 	thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->ip, al);
 	dump_printf(" ...... dso: %s\n",
 		    al->map ? al->map->dso->long_name :
-- 
cgit v1.2.3


From ab6e9a99345131cd8e54268d1d0dc04a33f7ed11 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Thu, 15 Feb 2018 13:26:35 +0100
Subject: perf tests: Use arch__compare_symbol_names to compare symbols

The symbol search called by machine__find_kernel_symbol_by_name is using
internally arch__compare_symbol_names function to compare 2 symbol
names, because different archs have different ways of comparing symbols.
Mostly for skipping '.' prefixes and similar.

In test 1 when we try to find matching symbols in kallsyms and vmlinux,
by address and by symbol name. When either is found we compare the pair
symbol names  by simple strcmp, which is not good enough for reasons
explained in previous paragraph.

On powerpc this can cause lockup, because even thought we found the
pair, the compared names are different and don't match simple strcmp.
Following code path is executed, that leads to lockup:

   - we find the pair in kallsyms by sym->start
next_pair:
   - we compare the names and it fails
   - we find the pair by sym->name
   - the pair addresses match so we call goto next_pair
     because we assume the names match in this case

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Acked-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Fixes: 031b84c407c3 ("perf probe ppc: Enable matching against dot symbols automatically")
Link: http://lkml.kernel.org/r/20180215122635.24029-10-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/vmlinux-kallsyms.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
index 58349297f9fb..1e5adb65632a 100644
--- a/tools/perf/tests/vmlinux-kallsyms.c
+++ b/tools/perf/tests/vmlinux-kallsyms.c
@@ -125,7 +125,7 @@ int test__vmlinux_matches_kallsyms(struct test *test __maybe_unused, int subtest
 
 		if (pair && UM(pair->start) == mem_start) {
 next_pair:
-			if (strcmp(sym->name, pair->name) == 0) {
+			if (arch__compare_symbol_names(sym->name, pair->name) == 0) {
 				/*
 				 * kallsyms don't have the symbol end, so we
 				 * set that by using the next symbol start - 1,
-- 
cgit v1.2.3


From 099c1130998fd2234a200b55d69713ec66d88325 Mon Sep 17 00:00:00 2001
From: Mathieu Poirier <mathieu.poirier@linaro.org>
Date: Mon, 12 Feb 2018 13:32:35 -0700
Subject: perf cs-etm: Freeing allocated memory

This patch frees all the memory allocated in function
cs_etm__alloc_queue().

Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/1518467557-18505-2-git-send-email-mathieu.poirier@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/cs-etm.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index b9f0a53dfa65..f2c98774e665 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -174,6 +174,12 @@ static void cs_etm__free_queue(void *priv)
 {
 	struct cs_etm_queue *etmq = priv;
 
+	if (!etmq)
+		return;
+
+	thread__zput(etmq->thread);
+	cs_etm_decoder__free(etmq->decoder);
+	zfree(&etmq->event_buf);
 	free(etmq);
 }
 
-- 
cgit v1.2.3


From 147c508f3004df6e2958f6c8867909531c2a15e2 Mon Sep 17 00:00:00 2001
From: Jin Yao <yao.jin@linux.intel.com>
Date: Mon, 12 Feb 2018 13:32:36 -0700
Subject: perf tools: Use target->per_thread and target->system_wide flags

Mathieu Poirier reports issue in commit ("73c0ca1eee3d perf thread_map:
Enumerate all threads from /proc") that it has negative impact on 'perf
record --per-thread'. It has the effect of creating a kernel event for
each thread in the system for 'perf record --per-thread'.

Mathieu Poirier's patch ("perf util: Do not reuse target->per_thread flag")
can fix this issue by creating a new target->all_threads flag.

This patch is based on Mathieu Poirier's patch but it doesn't use a new
target->all_threads flag. This patch just uses 'target->per_thread &&
target->system_wide' as a condition to check for all threads case.

Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: linux-arm-kernel@lists.infradead.org
Fixes: 73c0ca1eee3d ("perf thread_map: Enumerate all threads from /proc")
Link: http://lkml.kernel.org/r/1518467557-18505-3-git-send-email-mathieu.poirier@linaro.org
Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
[Fixed checkpatch warning about line over 80 characters]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/evlist.c     | 21 ++++++++++++++++++++-
 tools/perf/util/thread_map.c |  4 ++--
 tools/perf/util/thread_map.h |  2 +-
 3 files changed, 23 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index e5fc14e53c05..7b7d535396f7 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1086,11 +1086,30 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages)
 
 int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target)
 {
+	bool all_threads = (target->per_thread && target->system_wide);
 	struct cpu_map *cpus;
 	struct thread_map *threads;
 
+	/*
+	 * If specify '-a' and '--per-thread' to perf record, perf record
+	 * will override '--per-thread'. target->per_thread = false and
+	 * target->system_wide = true.
+	 *
+	 * If specify '--per-thread' only to perf record,
+	 * target->per_thread = true and target->system_wide = false.
+	 *
+	 * So target->per_thread && target->system_wide is false.
+	 * For perf record, thread_map__new_str doesn't call
+	 * thread_map__new_all_cpus. That will keep perf record's
+	 * current behavior.
+	 *
+	 * For perf stat, it allows the case that target->per_thread and
+	 * target->system_wide are all true. It means to collect system-wide
+	 * per-thread data. thread_map__new_str will call
+	 * thread_map__new_all_cpus to enumerate all threads.
+	 */
 	threads = thread_map__new_str(target->pid, target->tid, target->uid,
-				      target->per_thread);
+				      all_threads);
 
 	if (!threads)
 		return -1;
diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c
index 3e1038f6491c..729dad8f412d 100644
--- a/tools/perf/util/thread_map.c
+++ b/tools/perf/util/thread_map.c
@@ -323,7 +323,7 @@ out_free_threads:
 }
 
 struct thread_map *thread_map__new_str(const char *pid, const char *tid,
-				       uid_t uid, bool per_thread)
+				       uid_t uid, bool all_threads)
 {
 	if (pid)
 		return thread_map__new_by_pid_str(pid);
@@ -331,7 +331,7 @@ struct thread_map *thread_map__new_str(const char *pid, const char *tid,
 	if (!tid && uid != UINT_MAX)
 		return thread_map__new_by_uid(uid);
 
-	if (per_thread)
+	if (all_threads)
 		return thread_map__new_all_cpus();
 
 	return thread_map__new_by_tid_str(tid);
diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h
index 0a806b99e73c..5ec91cfd1869 100644
--- a/tools/perf/util/thread_map.h
+++ b/tools/perf/util/thread_map.h
@@ -31,7 +31,7 @@ struct thread_map *thread_map__get(struct thread_map *map);
 void thread_map__put(struct thread_map *map);
 
 struct thread_map *thread_map__new_str(const char *pid,
-		const char *tid, uid_t uid, bool per_thread);
+		const char *tid, uid_t uid, bool all_threads);
 
 struct thread_map *thread_map__new_by_tid_str(const char *tid_str);
 
-- 
cgit v1.2.3


From d2785de15f1bd42d613d56bbac5a007e7293b874 Mon Sep 17 00:00:00 2001
From: Mathieu Poirier <mathieu.poirier@linaro.org>
Date: Mon, 12 Feb 2018 13:32:37 -0700
Subject: perf auxtrace arm: Fixing uninitialised variable

When working natively on arm64 the compiler gets pesky and complains
that variable 'i' is uninitialised, something that breaks the
compilation.  Here no further checks are needed since variable
'found_spe' can only be true if variable 'i' has been initialised as
part of the for loop.

Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/1518467557-18505-4-git-send-email-mathieu.poirier@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/arm/util/auxtrace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/arch/arm/util/auxtrace.c b/tools/perf/arch/arm/util/auxtrace.c
index 2323581b157d..fa639e3e52ac 100644
--- a/tools/perf/arch/arm/util/auxtrace.c
+++ b/tools/perf/arch/arm/util/auxtrace.c
@@ -68,7 +68,7 @@ struct auxtrace_record
 	bool found_spe = false;
 	static struct perf_pmu **arm_spe_pmus = NULL;
 	static int nr_spes = 0;
-	int i;
+	int i = 0;
 
 	if (!evlist)
 		return NULL;
-- 
cgit v1.2.3


From 796bfadd831bdef0de06cd0253398c3fe011e459 Mon Sep 17 00:00:00 2001
From: Mathieu Poirier <mathieu.poirier@linaro.org>
Date: Mon, 12 Feb 2018 16:38:57 -0700
Subject: perf cs-etm: Properly deal with cpu maps

This patch allows the CoreSight AUX info section to fit topologies where
only a subset of all available CPUs are present, avoiding at the same
time accessing the ETM configuration areas of CPUs that have been
offlined.

Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/1518478737-24649-1-git-send-email-mathieu.poirier@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/arm/util/cs-etm.c | 51 +++++++++++++++++++++++++++------------
 1 file changed, 36 insertions(+), 15 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index fbfc055d3f4d..5c655ad4621e 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -298,12 +298,17 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused,
 {
 	int i;
 	int etmv3 = 0, etmv4 = 0;
-	const struct cpu_map *cpus = evlist->cpus;
+	struct cpu_map *event_cpus = evlist->cpus;
+	struct cpu_map *online_cpus = cpu_map__new(NULL);
 
 	/* cpu map is not empty, we have specific CPUs to work with */
-	if (!cpu_map__empty(cpus)) {
-		for (i = 0; i < cpu_map__nr(cpus); i++) {
-			if (cs_etm_is_etmv4(itr, cpus->map[i]))
+	if (!cpu_map__empty(event_cpus)) {
+		for (i = 0; i < cpu__max_cpu(); i++) {
+			if (!cpu_map__has(event_cpus, i) ||
+			    !cpu_map__has(online_cpus, i))
+				continue;
+
+			if (cs_etm_is_etmv4(itr, i))
 				etmv4++;
 			else
 				etmv3++;
@@ -311,6 +316,9 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused,
 	} else {
 		/* get configuration for all CPUs in the system */
 		for (i = 0; i < cpu__max_cpu(); i++) {
+			if (!cpu_map__has(online_cpus, i))
+				continue;
+
 			if (cs_etm_is_etmv4(itr, i))
 				etmv4++;
 			else
@@ -318,6 +326,8 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused,
 		}
 	}
 
+	cpu_map__put(online_cpus);
+
 	return (CS_ETM_HEADER_SIZE +
 	       (etmv4 * CS_ETMV4_PRIV_SIZE) +
 	       (etmv3 * CS_ETMV3_PRIV_SIZE));
@@ -447,7 +457,9 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
 	int i;
 	u32 offset;
 	u64 nr_cpu, type;
-	const struct cpu_map *cpus = session->evlist->cpus;
+	struct cpu_map *cpu_map;
+	struct cpu_map *event_cpus = session->evlist->cpus;
+	struct cpu_map *online_cpus = cpu_map__new(NULL);
 	struct cs_etm_recording *ptr =
 			container_of(itr, struct cs_etm_recording, itr);
 	struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
@@ -458,8 +470,21 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
 	if (!session->evlist->nr_mmaps)
 		return -EINVAL;
 
-	/* If the cpu_map is empty all CPUs are involved */
-	nr_cpu = cpu_map__empty(cpus) ? cpu__max_cpu() : cpu_map__nr(cpus);
+	/* If the cpu_map is empty all online CPUs are involved */
+	if (cpu_map__empty(event_cpus)) {
+		cpu_map = online_cpus;
+	} else {
+		/* Make sure all specified CPUs are online */
+		for (i = 0; i < cpu_map__nr(event_cpus); i++) {
+			if (cpu_map__has(event_cpus, i) &&
+			    !cpu_map__has(online_cpus, i))
+				return -EINVAL;
+		}
+
+		cpu_map = event_cpus;
+	}
+
+	nr_cpu = cpu_map__nr(cpu_map);
 	/* Get PMU type as dynamically assigned by the core */
 	type = cs_etm_pmu->type;
 
@@ -472,15 +497,11 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
 
 	offset = CS_ETM_SNAPSHOT + 1;
 
-	/* cpu map is not empty, we have specific CPUs to work with */
-	if (!cpu_map__empty(cpus)) {
-		for (i = 0; i < cpu_map__nr(cpus) && offset < priv_size; i++)
-			cs_etm_get_metadata(cpus->map[i], &offset, itr, info);
-	} else {
-		/* get configuration for all CPUs in the system */
-		for (i = 0; i < cpu__max_cpu(); i++)
+	for (i = 0; i < cpu__max_cpu() && offset < priv_size; i++)
+		if (cpu_map__has(cpu_map, i))
 			cs_etm_get_metadata(i, &offset, itr, info);
-	}
+
+	cpu_map__put(online_cpus);
 
 	return 0;
 }
-- 
cgit v1.2.3


From ac2c30683803b08bdf35c08d712eafa1ca496164 Mon Sep 17 00:00:00 2001
From: Jaecheol Shin <jcgod413@gmail.com>
Date: Wed, 7 Feb 2018 18:52:05 +0900
Subject: perf annotate: Add missing arguments in Man page

Some options must require an argument. But input, stdio-color, cpu have
no them.  So I added it.

Signed-off-by: Jaecheol Shin <jcgod413@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Taeung Song <treeze.taeung@gmail.com>
Link: http://lkml.kernel.org/r/20180207095205.62715-1-jcgod413@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-annotate.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt
index c635eab6af54..292809c3c0ca 100644
--- a/tools/perf/Documentation/perf-annotate.txt
+++ b/tools/perf/Documentation/perf-annotate.txt
@@ -21,7 +21,7 @@ If there is no debug info in the object, then annotated assembly is displayed.
 OPTIONS
 -------
 -i::
---input=::
+--input=<file>::
         Input file name. (default: perf.data unless stdin is a fifo)
 
 -d::
@@ -69,7 +69,7 @@ OPTIONS
 
 --stdio:: Use the stdio interface.
 
---stdio-color::
+--stdio-color=<mode>::
 	'always', 'never' or 'auto', allowing configuring color output
 	via the command line, in addition to via "color.ui" .perfconfig.
 	Use '--stdio-color always' to generate color even when redirecting
@@ -84,7 +84,7 @@ OPTIONS
 --gtk:: Use the GTK interface.
 
 -C::
---cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
+--cpu=<cpu>:: Only report samples for the list of CPUs provided. Multiple CPUs can
 	be provided as a comma-separated list with no space: 0,1. Ranges of
 	CPUs are specified with -: 0-2. Default is to report samples on all
 	CPUs.
-- 
cgit v1.2.3


From 577980a00063935815a55f461601579fd5e61f59 Mon Sep 17 00:00:00 2001
From: Sangwon Hong <qpakzk@gmail.com>
Date: Mon, 12 Feb 2018 05:38:36 +0900
Subject: perf kmem: Document a missing option & an argument

First, 'perf kmem' has a '--force' option, but didn't document it on the
man page. So add it.

Second, the '--time' option has to get a value, but isn't documented on
the man page. Describe it.

Signed-off-by: Sangwon Hong <qpakzk@gmail.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Taeung Song <treeze.taeung@gmail.com>
Link: http://lkml.kernel.org/r/1518381517-30766-1-git-send-email-qpakzk@gmail.com
[ Add blank like after --force block, as requested by Namhyung ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-kmem.txt | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/Documentation/perf-kmem.txt b/tools/perf/Documentation/perf-kmem.txt
index 479fc3261a50..85b8ac695c87 100644
--- a/tools/perf/Documentation/perf-kmem.txt
+++ b/tools/perf/Documentation/perf-kmem.txt
@@ -25,6 +25,10 @@ OPTIONS
 --input=<file>::
 	Select the input file (default: perf.data unless stdin is a fifo)
 
+-f::
+--force::
+	Don't do ownership validation
+
 -v::
 --verbose::
         Be more verbose. (show symbol address, etc)
@@ -61,7 +65,7 @@ OPTIONS
 	default, but this option shows live (currently allocated) pages
 	instead.  (This option works with --page option only)
 
---time::
+--time=<start>,<stop>::
 	Only analyze samples within given time window: <start>,<stop>. Times
 	have the format seconds.microseconds. If start is not given (i.e., time
 	string is ',x.y') then analysis starts at the beginning of the file. If
-- 
cgit v1.2.3


From 7e99b1972263c2f611d7f2fb67d09f3384006593 Mon Sep 17 00:00:00 2001
From: Sangwon Hong <qpakzk@gmail.com>
Date: Mon, 12 Feb 2018 05:38:37 +0900
Subject: perf mem: Document a missing option

Add the missing --force option on the man page.

Signed-off-by: Sangwon Hong <qpakzk@gmail.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Taeung Song <treeze.taeung@gmail.com>
Link: http://lkml.kernel.org/r/1518381517-30766-2-git-send-email-qpakzk@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-mem.txt | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt
index 4be08a1e3f8d..b0211410969b 100644
--- a/tools/perf/Documentation/perf-mem.txt
+++ b/tools/perf/Documentation/perf-mem.txt
@@ -28,6 +28,10 @@ OPTIONS
 <command>...::
 	Any command you can specify in a shell.
 
+-f::
+--force::
+	Don't do ownership validation
+
 -t::
 --type=::
 	Select the memory operation type: load or store (default: load,store)
-- 
cgit v1.2.3


From e573e978fb12e16094c0b39fad3dc4e6b4803c2c Mon Sep 17 00:00:00 2001
From: Robert Walker <robert.walker@arm.com>
Date: Wed, 14 Feb 2018 11:24:39 +0000
Subject: perf cs-etm: Inject capabilitity for CoreSight traces

Added user space perf functionality to translate CoreSight traces into
instruction events with branch stack.

To invoke the new functionality, use the perf inject tool with
--itrace=il. For example, to translate the ETM trace from perf.data into
last branch records in a new inj.data file:

    $ perf inject --itrace=i100000il128 -i perf.data -o perf.data.new

The 'i' parameter to itrace generates periodic instruction events.  The
period between instruction events can be specified as a number of
instructions suffixed by i (default 100000).

The parameter to 'l' specifies the number of entries in the branch stack
attached to instruction events.

The 'b' parameter to itrace generates events on taken branches.

This patch also fixes the contents of the branch events used in perf
report - previously branch events were generated for each contiguous
range of instructions executed.  These are fixed to generate branch
events between the last address of a range ending in an executed branch
instruction and the start address of the next range.

Based on patches by Sebastian Pop <s.pop@samsung.com> with additional fixes
and support for specifying the instruction period.

Originally-by: Sebastian Pop <s.pop@samsung.com>
Signed-off-by: Robert Walker <robert.walker@arm.com>
Acked-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: coresight@lists.linaro.org
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/1518607481-4059-2-git-send-email-robert.walker@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/cs-etm-decoder/cs-etm-decoder.c |  65 +++-
 tools/perf/util/cs-etm-decoder/cs-etm-decoder.h |   1 +
 tools/perf/util/cs-etm.c                        | 434 +++++++++++++++++++++---
 3 files changed, 436 insertions(+), 64 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index 1fb01849f1c7..8ff69dfd725a 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -78,6 +78,8 @@ int cs_etm_decoder__reset(struct cs_etm_decoder *decoder)
 {
 	ocsd_datapath_resp_t dp_ret;
 
+	decoder->prev_return = OCSD_RESP_CONT;
+
 	dp_ret = ocsd_dt_process_data(decoder->dcd_tree, OCSD_OP_RESET,
 				      0, 0, NULL, NULL);
 	if (OCSD_DATA_RESP_IS_FATAL(dp_ret))
@@ -253,16 +255,16 @@ static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder)
 	decoder->packet_count = 0;
 	for (i = 0; i < MAX_BUFFER; i++) {
 		decoder->packet_buffer[i].start_addr = 0xdeadbeefdeadbeefUL;
-		decoder->packet_buffer[i].end_addr   = 0xdeadbeefdeadbeefUL;
-		decoder->packet_buffer[i].exc	     = false;
-		decoder->packet_buffer[i].exc_ret    = false;
-		decoder->packet_buffer[i].cpu	     = INT_MIN;
+		decoder->packet_buffer[i].end_addr = 0xdeadbeefdeadbeefUL;
+		decoder->packet_buffer[i].last_instr_taken_branch = false;
+		decoder->packet_buffer[i].exc = false;
+		decoder->packet_buffer[i].exc_ret = false;
+		decoder->packet_buffer[i].cpu = INT_MIN;
 	}
 }
 
 static ocsd_datapath_resp_t
 cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder,
-			      const ocsd_generic_trace_elem *elem,
 			      const u8 trace_chan_id,
 			      enum cs_etm_sample_type sample_type)
 {
@@ -278,18 +280,16 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder,
 		return OCSD_RESP_FATAL_SYS_ERR;
 
 	et = decoder->tail;
+	et = (et + 1) & (MAX_BUFFER - 1);
+	decoder->tail = et;
+	decoder->packet_count++;
+
 	decoder->packet_buffer[et].sample_type = sample_type;
-	decoder->packet_buffer[et].start_addr = elem->st_addr;
-	decoder->packet_buffer[et].end_addr = elem->en_addr;
 	decoder->packet_buffer[et].exc = false;
 	decoder->packet_buffer[et].exc_ret = false;
 	decoder->packet_buffer[et].cpu = *((int *)inode->priv);
-
-	/* Wrap around if need be */
-	et = (et + 1) & (MAX_BUFFER - 1);
-
-	decoder->tail = et;
-	decoder->packet_count++;
+	decoder->packet_buffer[et].start_addr = 0xdeadbeefdeadbeefUL;
+	decoder->packet_buffer[et].end_addr = 0xdeadbeefdeadbeefUL;
 
 	if (decoder->packet_count == MAX_BUFFER - 1)
 		return OCSD_RESP_WAIT;
@@ -297,6 +297,40 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder,
 	return OCSD_RESP_CONT;
 }
 
+static ocsd_datapath_resp_t
+cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder,
+			     const ocsd_generic_trace_elem *elem,
+			     const uint8_t trace_chan_id)
+{
+	int ret = 0;
+	struct cs_etm_packet *packet;
+
+	ret = cs_etm_decoder__buffer_packet(decoder, trace_chan_id,
+					    CS_ETM_RANGE);
+	if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT)
+		return ret;
+
+	packet = &decoder->packet_buffer[decoder->tail];
+
+	packet->start_addr = elem->st_addr;
+	packet->end_addr = elem->en_addr;
+	switch (elem->last_i_type) {
+	case OCSD_INSTR_BR:
+	case OCSD_INSTR_BR_INDIRECT:
+		packet->last_instr_taken_branch = elem->last_instr_exec;
+		break;
+	case OCSD_INSTR_ISB:
+	case OCSD_INSTR_DSB_DMB:
+	case OCSD_INSTR_OTHER:
+	default:
+		packet->last_instr_taken_branch = false;
+		break;
+	}
+
+	return ret;
+
+}
+
 static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
 				const void *context,
 				const ocsd_trc_index_t indx __maybe_unused,
@@ -316,9 +350,8 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
 		decoder->trace_on = true;
 		break;
 	case OCSD_GEN_TRC_ELEM_INSTR_RANGE:
-		resp = cs_etm_decoder__buffer_packet(decoder, elem,
-						     trace_chan_id,
-						     CS_ETM_RANGE);
+		resp = cs_etm_decoder__buffer_range(decoder, elem,
+						    trace_chan_id);
 		break;
 	case OCSD_GEN_TRC_ELEM_EXCEPTION:
 		decoder->packet_buffer[decoder->tail].exc = true;
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
index 3d2e6205d186..a4fdd285b145 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
@@ -30,6 +30,7 @@ struct cs_etm_packet {
 	enum cs_etm_sample_type sample_type;
 	u64 start_addr;
 	u64 end_addr;
+	u8 last_instr_taken_branch;
 	u8 exc;
 	u8 exc_ret;
 	int cpu;
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index f2c98774e665..6e595d96c04d 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -32,6 +32,14 @@
 
 #define MAX_TIMESTAMP (~0ULL)
 
+/*
+ * A64 instructions are always 4 bytes
+ *
+ * Only A64 is supported, so can use this constant for converting between
+ * addresses and instruction counts, calculting offsets etc
+ */
+#define A64_INSTR_SIZE 4
+
 struct cs_etm_auxtrace {
 	struct auxtrace auxtrace;
 	struct auxtrace_queues queues;
@@ -45,11 +53,15 @@ struct cs_etm_auxtrace {
 	u8 snapshot_mode;
 	u8 data_queued;
 	u8 sample_branches;
+	u8 sample_instructions;
 
 	int num_cpu;
 	u32 auxtrace_type;
 	u64 branches_sample_type;
 	u64 branches_id;
+	u64 instructions_sample_type;
+	u64 instructions_sample_period;
+	u64 instructions_id;
 	u64 **metadata;
 	u64 kernel_start;
 	unsigned int pmu_type;
@@ -68,6 +80,12 @@ struct cs_etm_queue {
 	u64 time;
 	u64 timestamp;
 	u64 offset;
+	u64 period_instructions;
+	struct branch_stack *last_branch;
+	struct branch_stack *last_branch_rb;
+	size_t last_branch_pos;
+	struct cs_etm_packet *prev_packet;
+	struct cs_etm_packet *packet;
 };
 
 static int cs_etm__update_queues(struct cs_etm_auxtrace *etm);
@@ -180,6 +198,10 @@ static void cs_etm__free_queue(void *priv)
 	thread__zput(etmq->thread);
 	cs_etm_decoder__free(etmq->decoder);
 	zfree(&etmq->event_buf);
+	zfree(&etmq->last_branch);
+	zfree(&etmq->last_branch_rb);
+	zfree(&etmq->prev_packet);
+	zfree(&etmq->packet);
 	free(etmq);
 }
 
@@ -276,11 +298,35 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
 	struct cs_etm_decoder_params d_params;
 	struct cs_etm_trace_params  *t_params;
 	struct cs_etm_queue *etmq;
+	size_t szp = sizeof(struct cs_etm_packet);
 
 	etmq = zalloc(sizeof(*etmq));
 	if (!etmq)
 		return NULL;
 
+	etmq->packet = zalloc(szp);
+	if (!etmq->packet)
+		goto out_free;
+
+	if (etm->synth_opts.last_branch || etm->sample_branches) {
+		etmq->prev_packet = zalloc(szp);
+		if (!etmq->prev_packet)
+			goto out_free;
+	}
+
+	if (etm->synth_opts.last_branch) {
+		size_t sz = sizeof(struct branch_stack);
+
+		sz += etm->synth_opts.last_branch_sz *
+		      sizeof(struct branch_entry);
+		etmq->last_branch = zalloc(sz);
+		if (!etmq->last_branch)
+			goto out_free;
+		etmq->last_branch_rb = zalloc(sz);
+		if (!etmq->last_branch_rb)
+			goto out_free;
+	}
+
 	etmq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
 	if (!etmq->event_buf)
 		goto out_free;
@@ -335,6 +381,7 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
 		goto out_free_decoder;
 
 	etmq->offset = 0;
+	etmq->period_instructions = 0;
 
 	return etmq;
 
@@ -342,6 +389,10 @@ out_free_decoder:
 	cs_etm_decoder__free(etmq->decoder);
 out_free:
 	zfree(&etmq->event_buf);
+	zfree(&etmq->last_branch);
+	zfree(&etmq->last_branch_rb);
+	zfree(&etmq->prev_packet);
+	zfree(&etmq->packet);
 	free(etmq);
 
 	return NULL;
@@ -395,6 +446,129 @@ static int cs_etm__update_queues(struct cs_etm_auxtrace *etm)
 	return 0;
 }
 
+static inline void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq)
+{
+	struct branch_stack *bs_src = etmq->last_branch_rb;
+	struct branch_stack *bs_dst = etmq->last_branch;
+	size_t nr = 0;
+
+	/*
+	 * Set the number of records before early exit: ->nr is used to
+	 * determine how many branches to copy from ->entries.
+	 */
+	bs_dst->nr = bs_src->nr;
+
+	/*
+	 * Early exit when there is nothing to copy.
+	 */
+	if (!bs_src->nr)
+		return;
+
+	/*
+	 * As bs_src->entries is a circular buffer, we need to copy from it in
+	 * two steps.  First, copy the branches from the most recently inserted
+	 * branch ->last_branch_pos until the end of bs_src->entries buffer.
+	 */
+	nr = etmq->etm->synth_opts.last_branch_sz - etmq->last_branch_pos;
+	memcpy(&bs_dst->entries[0],
+	       &bs_src->entries[etmq->last_branch_pos],
+	       sizeof(struct branch_entry) * nr);
+
+	/*
+	 * If we wrapped around at least once, the branches from the beginning
+	 * of the bs_src->entries buffer and until the ->last_branch_pos element
+	 * are older valid branches: copy them over.  The total number of
+	 * branches copied over will be equal to the number of branches asked by
+	 * the user in last_branch_sz.
+	 */
+	if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) {
+		memcpy(&bs_dst->entries[nr],
+		       &bs_src->entries[0],
+		       sizeof(struct branch_entry) * etmq->last_branch_pos);
+	}
+}
+
+static inline void cs_etm__reset_last_branch_rb(struct cs_etm_queue *etmq)
+{
+	etmq->last_branch_pos = 0;
+	etmq->last_branch_rb->nr = 0;
+}
+
+static inline u64 cs_etm__last_executed_instr(struct cs_etm_packet *packet)
+{
+	/*
+	 * The packet records the execution range with an exclusive end address
+	 *
+	 * A64 instructions are constant size, so the last executed
+	 * instruction is A64_INSTR_SIZE before the end address
+	 * Will need to do instruction level decode for T32 instructions as
+	 * they can be variable size (not yet supported).
+	 */
+	return packet->end_addr - A64_INSTR_SIZE;
+}
+
+static inline u64 cs_etm__instr_count(const struct cs_etm_packet *packet)
+{
+	/*
+	 * Only A64 instructions are currently supported, so can get
+	 * instruction count by dividing.
+	 * Will need to do instruction level decode for T32 instructions as
+	 * they can be variable size (not yet supported).
+	 */
+	return (packet->end_addr - packet->start_addr) / A64_INSTR_SIZE;
+}
+
+static inline u64 cs_etm__instr_addr(const struct cs_etm_packet *packet,
+				     u64 offset)
+{
+	/*
+	 * Only A64 instructions are currently supported, so can get
+	 * instruction address by muliplying.
+	 * Will need to do instruction level decode for T32 instructions as
+	 * they can be variable size (not yet supported).
+	 */
+	return packet->start_addr + offset * A64_INSTR_SIZE;
+}
+
+static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq)
+{
+	struct branch_stack *bs = etmq->last_branch_rb;
+	struct branch_entry *be;
+
+	/*
+	 * The branches are recorded in a circular buffer in reverse
+	 * chronological order: we start recording from the last element of the
+	 * buffer down.  After writing the first element of the stack, move the
+	 * insert position back to the end of the buffer.
+	 */
+	if (!etmq->last_branch_pos)
+		etmq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz;
+
+	etmq->last_branch_pos -= 1;
+
+	be       = &bs->entries[etmq->last_branch_pos];
+	be->from = cs_etm__last_executed_instr(etmq->prev_packet);
+	be->to	 = etmq->packet->start_addr;
+	/* No support for mispredict */
+	be->flags.mispred = 0;
+	be->flags.predicted = 1;
+
+	/*
+	 * Increment bs->nr until reaching the number of last branches asked by
+	 * the user on the command line.
+	 */
+	if (bs->nr < etmq->etm->synth_opts.last_branch_sz)
+		bs->nr += 1;
+}
+
+static int cs_etm__inject_event(union perf_event *event,
+			       struct perf_sample *sample, u64 type)
+{
+	event->header.size = perf_event__sample_event_size(sample, type, 0);
+	return perf_event__synthesize_sample(event, type, 0, sample);
+}
+
+
 static int
 cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq)
 {
@@ -459,35 +633,105 @@ static void  cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm,
 	}
 }
 
+static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
+					    u64 addr, u64 period)
+{
+	int ret = 0;
+	struct cs_etm_auxtrace *etm = etmq->etm;
+	union perf_event *event = etmq->event_buf;
+	struct perf_sample sample = {.ip = 0,};
+
+	event->sample.header.type = PERF_RECORD_SAMPLE;
+	event->sample.header.misc = PERF_RECORD_MISC_USER;
+	event->sample.header.size = sizeof(struct perf_event_header);
+
+	sample.ip = addr;
+	sample.pid = etmq->pid;
+	sample.tid = etmq->tid;
+	sample.id = etmq->etm->instructions_id;
+	sample.stream_id = etmq->etm->instructions_id;
+	sample.period = period;
+	sample.cpu = etmq->packet->cpu;
+	sample.flags = 0;
+	sample.insn_len = 1;
+	sample.cpumode = event->header.misc;
+
+	if (etm->synth_opts.last_branch) {
+		cs_etm__copy_last_branch_rb(etmq);
+		sample.branch_stack = etmq->last_branch;
+	}
+
+	if (etm->synth_opts.inject) {
+		ret = cs_etm__inject_event(event, &sample,
+					   etm->instructions_sample_type);
+		if (ret)
+			return ret;
+	}
+
+	ret = perf_session__deliver_synth_event(etm->session, event, &sample);
+
+	if (ret)
+		pr_err(
+			"CS ETM Trace: failed to deliver instruction event, error %d\n",
+			ret);
+
+	if (etm->synth_opts.last_branch)
+		cs_etm__reset_last_branch_rb(etmq);
+
+	return ret;
+}
+
 /*
  * The cs etm packet encodes an instruction range between a branch target
  * and the next taken branch. Generate sample accordingly.
  */
-static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
-				       struct cs_etm_packet *packet)
+static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq)
 {
 	int ret = 0;
 	struct cs_etm_auxtrace *etm = etmq->etm;
 	struct perf_sample sample = {.ip = 0,};
 	union perf_event *event = etmq->event_buf;
-	u64 start_addr = packet->start_addr;
-	u64 end_addr = packet->end_addr;
+	struct dummy_branch_stack {
+		u64			nr;
+		struct branch_entry	entries;
+	} dummy_bs;
 
 	event->sample.header.type = PERF_RECORD_SAMPLE;
 	event->sample.header.misc = PERF_RECORD_MISC_USER;
 	event->sample.header.size = sizeof(struct perf_event_header);
 
-	sample.ip = start_addr;
+	sample.ip = cs_etm__last_executed_instr(etmq->prev_packet);
 	sample.pid = etmq->pid;
 	sample.tid = etmq->tid;
-	sample.addr = end_addr;
+	sample.addr = etmq->packet->start_addr;
 	sample.id = etmq->etm->branches_id;
 	sample.stream_id = etmq->etm->branches_id;
 	sample.period = 1;
-	sample.cpu = packet->cpu;
+	sample.cpu = etmq->packet->cpu;
 	sample.flags = 0;
 	sample.cpumode = PERF_RECORD_MISC_USER;
 
+	/*
+	 * perf report cannot handle events without a branch stack
+	 */
+	if (etm->synth_opts.last_branch) {
+		dummy_bs = (struct dummy_branch_stack){
+			.nr = 1,
+			.entries = {
+				.from = sample.ip,
+				.to = sample.addr,
+			},
+		};
+		sample.branch_stack = (struct branch_stack *)&dummy_bs;
+	}
+
+	if (etm->synth_opts.inject) {
+		ret = cs_etm__inject_event(event, &sample,
+					   etm->branches_sample_type);
+		if (ret)
+			return ret;
+	}
+
 	ret = perf_session__deliver_synth_event(etm->session, event, &sample);
 
 	if (ret)
@@ -584,6 +828,24 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
 		etm->sample_branches = true;
 		etm->branches_sample_type = attr.sample_type;
 		etm->branches_id = id;
+		id += 1;
+		attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR;
+	}
+
+	if (etm->synth_opts.last_branch)
+		attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
+
+	if (etm->synth_opts.instructions) {
+		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
+		attr.sample_period = etm->synth_opts.period;
+		etm->instructions_sample_period = attr.sample_period;
+		err = cs_etm__synth_event(session, &attr, id);
+		if (err)
+			return err;
+		etm->sample_instructions = true;
+		etm->instructions_sample_type = attr.sample_type;
+		etm->instructions_id = id;
+		id += 1;
 	}
 
 	return 0;
@@ -591,20 +853,68 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
 
 static int cs_etm__sample(struct cs_etm_queue *etmq)
 {
+	struct cs_etm_auxtrace *etm = etmq->etm;
+	struct cs_etm_packet *tmp;
 	int ret;
-	struct cs_etm_packet packet;
+	u64 instrs_executed;
 
-	while (1) {
-		ret = cs_etm_decoder__get_packet(etmq->decoder, &packet);
-		if (ret <= 0)
+	instrs_executed = cs_etm__instr_count(etmq->packet);
+	etmq->period_instructions += instrs_executed;
+
+	/*
+	 * Record a branch when the last instruction in
+	 * PREV_PACKET is a branch.
+	 */
+	if (etm->synth_opts.last_branch &&
+	    etmq->prev_packet &&
+	    etmq->prev_packet->last_instr_taken_branch)
+		cs_etm__update_last_branch_rb(etmq);
+
+	if (etm->sample_instructions &&
+	    etmq->period_instructions >= etm->instructions_sample_period) {
+		/*
+		 * Emit instruction sample periodically
+		 * TODO: allow period to be defined in cycles and clock time
+		 */
+
+		/* Get number of instructions executed after the sample point */
+		u64 instrs_over = etmq->period_instructions -
+			etm->instructions_sample_period;
+
+		/*
+		 * Calculate the address of the sampled instruction (-1 as
+		 * sample is reported as though instruction has just been
+		 * executed, but PC has not advanced to next instruction)
+		 */
+		u64 offset = (instrs_executed - instrs_over - 1);
+		u64 addr = cs_etm__instr_addr(etmq->packet, offset);
+
+		ret = cs_etm__synth_instruction_sample(
+			etmq, addr, etm->instructions_sample_period);
+		if (ret)
+			return ret;
+
+		/* Carry remaining instructions into next sample period */
+		etmq->period_instructions = instrs_over;
+	}
+
+	if (etm->sample_branches &&
+	    etmq->prev_packet &&
+	    etmq->prev_packet->sample_type == CS_ETM_RANGE &&
+	    etmq->prev_packet->last_instr_taken_branch) {
+		ret = cs_etm__synth_branch_sample(etmq);
+		if (ret)
 			return ret;
+	}
 
+	if (etm->sample_branches || etm->synth_opts.last_branch) {
 		/*
-		 * If the packet contains an instruction range, generate an
-		 * instruction sequence event.
+		 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
+		 * the next incoming packet.
 		 */
-		if (packet.sample_type & CS_ETM_RANGE)
-			cs_etm__synth_branch_sample(etmq, &packet);
+		tmp = etmq->packet;
+		etmq->packet = etmq->prev_packet;
+		etmq->prev_packet = tmp;
 	}
 
 	return 0;
@@ -621,45 +931,73 @@ static int cs_etm__run_decoder(struct cs_etm_queue *etmq)
 		etm->kernel_start = machine__kernel_start(etm->machine);
 
 	/* Go through each buffer in the queue and decode them one by one */
-more:
-	buffer_used = 0;
-	memset(&buffer, 0, sizeof(buffer));
-	err = cs_etm__get_trace(&buffer, etmq);
-	if (err <= 0)
-		return err;
-	/*
-	 * We cannot assume consecutive blocks in the data file are contiguous,
-	 * reset the decoder to force re-sync.
-	 */
-	err = cs_etm_decoder__reset(etmq->decoder);
-	if (err != 0)
-		return err;
-
-	/* Run trace decoder until buffer consumed or end of trace */
-	do {
-		processed = 0;
-
-		err = cs_etm_decoder__process_data_block(
-						etmq->decoder,
-						etmq->offset,
-						&buffer.buf[buffer_used],
-						buffer.len - buffer_used,
-						&processed);
-
-		if (err)
+	while (1) {
+		buffer_used = 0;
+		memset(&buffer, 0, sizeof(buffer));
+		err = cs_etm__get_trace(&buffer, etmq);
+		if (err <= 0)
+			return err;
+		/*
+		 * We cannot assume consecutive blocks in the data file are
+		 * contiguous, reset the decoder to force re-sync.
+		 */
+		err = cs_etm_decoder__reset(etmq->decoder);
+		if (err != 0)
 			return err;
 
-		etmq->offset += processed;
-		buffer_used += processed;
+		/* Run trace decoder until buffer consumed or end of trace */
+		do {
+			processed = 0;
+			err = cs_etm_decoder__process_data_block(
+				etmq->decoder,
+				etmq->offset,
+				&buffer.buf[buffer_used],
+				buffer.len - buffer_used,
+				&processed);
+			if (err)
+				return err;
+
+			etmq->offset += processed;
+			buffer_used += processed;
+
+			/* Process each packet in this chunk */
+			while (1) {
+				err = cs_etm_decoder__get_packet(etmq->decoder,
+								 etmq->packet);
+				if (err <= 0)
+					/*
+					 * Stop processing this chunk on
+					 * end of data or error
+					 */
+					break;
+
+				/*
+				 * If the packet contains an instruction
+				 * range, generate instruction sequence
+				 * events.
+				 */
+				if (etmq->packet->sample_type & CS_ETM_RANGE)
+					err = cs_etm__sample(etmq);
+			}
+		} while (buffer.len > buffer_used);
 
 		/*
-		 * Nothing to do with an error condition, let's hope the next
-		 * chunk will be better.
+		 * Generate a last branch event for the branches left in
+		 * the circular buffer at the end of the trace.
 		 */
-		err = cs_etm__sample(etmq);
-	} while (buffer.len > buffer_used);
+		if (etm->sample_instructions &&
+		    etmq->etm->synth_opts.last_branch) {
+			struct branch_stack *bs = etmq->last_branch_rb;
+			struct branch_entry *be =
+				&bs->entries[etmq->last_branch_pos];
+
+			err = cs_etm__synth_instruction_sample(
+				etmq, be->to, etmq->period_instructions);
+			if (err)
+				return err;
+		}
 
-goto more;
+	}
 
 	return err;
 }
-- 
cgit v1.2.3


From 256e751cac78739a4de2232450d3681b68b5845e Mon Sep 17 00:00:00 2001
From: Robert Walker <robert.walker@arm.com>
Date: Wed, 14 Feb 2018 11:24:40 +0000
Subject: perf inject: Emit instruction records on ETM trace discontinuity

There may be discontinuities in the ETM trace stream due to overflows or
ETM configuration for selective trace.  This patch emits an instruction
sample with the pending branch stack when a TRACE ON packet occurs
indicating a discontinuity in the trace data.

A new packet type CS_ETM_TRACE_ON is added, which is emitted by the low
level decoder when a TRACE ON occurs.  The higher level decoder flushes
the branch stack when this packet is emitted.

Signed-off-by: Robert Walker <robert.walker@arm.com>
Acked-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: coresight@lists.linaro.org
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/1518607481-4059-3-git-send-email-robert.walker@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/cs-etm-decoder/cs-etm-decoder.c |  9 +++
 tools/perf/util/cs-etm-decoder/cs-etm-decoder.h |  1 +
 tools/perf/util/cs-etm.c                        | 80 ++++++++++++++++++-------
 3 files changed, 67 insertions(+), 23 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index 8ff69dfd725a..640af88331b4 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -328,7 +328,14 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder,
 	}
 
 	return ret;
+}
 
+static ocsd_datapath_resp_t
+cs_etm_decoder__buffer_trace_on(struct cs_etm_decoder *decoder,
+				const uint8_t trace_chan_id)
+{
+	return cs_etm_decoder__buffer_packet(decoder, trace_chan_id,
+					     CS_ETM_TRACE_ON);
 }
 
 static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
@@ -347,6 +354,8 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
 		decoder->trace_on = false;
 		break;
 	case OCSD_GEN_TRC_ELEM_TRACE_ON:
+		resp = cs_etm_decoder__buffer_trace_on(decoder,
+						       trace_chan_id);
 		decoder->trace_on = true;
 		break;
 	case OCSD_GEN_TRC_ELEM_INSTR_RANGE:
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
index a4fdd285b145..743f5f444304 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
@@ -24,6 +24,7 @@ struct cs_etm_buffer {
 
 enum cs_etm_sample_type {
 	CS_ETM_RANGE = 1 << 0,
+	CS_ETM_TRACE_ON = 1 << 1,
 };
 
 struct cs_etm_packet {
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 6e595d96c04d..1b0d422373be 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -867,6 +867,7 @@ static int cs_etm__sample(struct cs_etm_queue *etmq)
 	 */
 	if (etm->synth_opts.last_branch &&
 	    etmq->prev_packet &&
+	    etmq->prev_packet->sample_type == CS_ETM_RANGE &&
 	    etmq->prev_packet->last_instr_taken_branch)
 		cs_etm__update_last_branch_rb(etmq);
 
@@ -920,6 +921,40 @@ static int cs_etm__sample(struct cs_etm_queue *etmq)
 	return 0;
 }
 
+static int cs_etm__flush(struct cs_etm_queue *etmq)
+{
+	int err = 0;
+	struct cs_etm_packet *tmp;
+
+	if (etmq->etm->synth_opts.last_branch &&
+	    etmq->prev_packet &&
+	    etmq->prev_packet->sample_type == CS_ETM_RANGE) {
+		/*
+		 * Generate a last branch event for the branches left in the
+		 * circular buffer at the end of the trace.
+		 *
+		 * Use the address of the end of the last reported execution
+		 * range
+		 */
+		u64 addr = cs_etm__last_executed_instr(etmq->prev_packet);
+
+		err = cs_etm__synth_instruction_sample(
+			etmq, addr,
+			etmq->period_instructions);
+		etmq->period_instructions = 0;
+
+		/*
+		 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
+		 * the next incoming packet.
+		 */
+		tmp = etmq->packet;
+		etmq->packet = etmq->prev_packet;
+		etmq->prev_packet = tmp;
+	}
+
+	return err;
+}
+
 static int cs_etm__run_decoder(struct cs_etm_queue *etmq)
 {
 	struct cs_etm_auxtrace *etm = etmq->etm;
@@ -971,32 +1006,31 @@ static int cs_etm__run_decoder(struct cs_etm_queue *etmq)
 					 */
 					break;
 
-				/*
-				 * If the packet contains an instruction
-				 * range, generate instruction sequence
-				 * events.
-				 */
-				if (etmq->packet->sample_type & CS_ETM_RANGE)
-					err = cs_etm__sample(etmq);
+				switch (etmq->packet->sample_type) {
+				case CS_ETM_RANGE:
+					/*
+					 * If the packet contains an instruction
+					 * range, generate instruction sequence
+					 * events.
+					 */
+					cs_etm__sample(etmq);
+					break;
+				case CS_ETM_TRACE_ON:
+					/*
+					 * Discontinuity in trace, flush
+					 * previous branch stack
+					 */
+					cs_etm__flush(etmq);
+					break;
+				default:
+					break;
+				}
 			}
 		} while (buffer.len > buffer_used);
 
-		/*
-		 * Generate a last branch event for the branches left in
-		 * the circular buffer at the end of the trace.
-		 */
-		if (etm->sample_instructions &&
-		    etmq->etm->synth_opts.last_branch) {
-			struct branch_stack *bs = etmq->last_branch_rb;
-			struct branch_entry *be =
-				&bs->entries[etmq->last_branch_pos];
-
-			err = cs_etm__synth_instruction_sample(
-				etmq, be->to, etmq->period_instructions);
-			if (err)
-				return err;
-		}
-
+		if (err == 0)
+			/* Flush any remaining branch stack entries */
+			err = cs_etm__flush(etmq);
 	}
 
 	return err;
-- 
cgit v1.2.3


From fc2f52379b9d2232487d5e1dadb77883c10cdf47 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Fri, 9 Feb 2018 08:39:09 -0800
Subject: perf report: Fix description for --mem-mode

The "mem-loads" event only works when PEBS is enabled, so add the "/p"
("precise") suffix to the examples.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
LPU-Reference: 20180209163909.9240-1-andi@firstfloor.org
Link: https://lkml.kernel.org/n/tip-v0gcd4u9tktrvjjsp6y7ouv4@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-report.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index a76b871f78a6..cba16d8a970e 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -368,7 +368,7 @@ OPTIONS
 	Use the data addresses of samples in addition to instruction addresses
 	to build the histograms.  To generate meaningful output, the perf.data
 	file must have been obtained using perf record -d -W and using a
-	special event -e cpu/mem-loads/ or -e cpu/mem-stores/. See
+	special event -e cpu/mem-loads/p or -e cpu/mem-stores/p. See
 	'perf mem' for simpler access.
 
 --percent-limit::
-- 
cgit v1.2.3


From b40982e8468b46b8f7f5bba5a7e541ec04a29d7d Mon Sep 17 00:00:00 2001
From: Jin Yao <yao.jin@linux.intel.com>
Date: Mon, 29 Jan 2018 18:57:53 +0800
Subject: perf report: Fix wrong jump arrow
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When we use perf report interactive annotate view, we can see
the position of jump arrow is not correct. For example,

1. perf record -b ...
2. perf report
3. In interactive mode, select Annotate 'function'

Percent│ IPC Cycle
       │                                if (flag)
  1.37 │0.4┌──   1      ↓ je     82
       │   │                                    x += x / y + y / x;
  0.00 │0.4│  1310        movsd  (%rsp),%xmm0
  0.00 │0.4│   565        movsd  0x8(%rsp),%xmm4
       │0.4│              movsd  0x8(%rsp),%xmm1
       │0.4│              movsd  (%rsp),%xmm3
       │0.4│              divsd  %xmm4,%xmm0
  0.00 │0.4│   579        divsd  %xmm3,%xmm1
       │0.4│              movsd  (%rsp),%xmm2
       │0.4│              addsd  %xmm1,%xmm0
       │0.4│              addsd  %xmm2,%xmm0
  0.00 │0.4│              movsd  %xmm0,(%rsp)
       │   │                    volatile double x = 1212121212, y = 121212;
       │   │
       │   │                    s_randseed = time(0);
       │   │                    srand(s_randseed);
       │   │
       │   │                    for (i = 0; i < 2000000000; i++) {
  1.37 │0.4└─→      82:   sub    $0x1,%ebx
 28.21 │0.48    17      ↑ jne    38

The jump arrow in above example is not correct. It should add the
width of IPC and Cycle.

With this patch, the result is:

Percent│ IPC Cycle
       │                                if (flag)
  1.37 │0.48     1     ┌──je     82
       │               │                        x += x / y + y / x;
  0.00 │0.48  1310     │  movsd  (%rsp),%xmm0
  0.00 │0.48   565     │  movsd  0x8(%rsp),%xmm4
       │0.48           │  movsd  0x8(%rsp),%xmm1
       │0.48           │  movsd  (%rsp),%xmm3
       │0.48           │  divsd  %xmm4,%xmm0
  0.00 │0.48   579     │  divsd  %xmm3,%xmm1
       │0.48           │  movsd  (%rsp),%xmm2
       │0.48           │  addsd  %xmm1,%xmm0
       │0.48           │  addsd  %xmm2,%xmm0
  0.00 │0.48           │  movsd  %xmm0,(%rsp)
       │               │        volatile double x = 1212121212, y = 121212;
       │               │
       │               │        s_randseed = time(0);
       │               │        srand(s_randseed);
       │               │
       │               │        for (i = 0; i < 2000000000; i++) {
  1.37 │0.48        82:└─→sub    $0x1,%ebx
 28.21 │0.48    17      ↑ jne    38

Committer notes:

Please note that only from LBRv5 (according to Jiri) onwards, i.e. >=
Skylake is that we'll have the cycles counts in each branch record
entry, so to see the Cycles and IPC columns, and be able to test this
patch, one need a capable hardware.

While applying this I first tested it on a Broadwell class machine and
couldn't get those columns, will add code to the annotate browser to
warn the user about that, i.e. you have branch records, but no cycles,
use a more recent hardware to get the cycles and IPC columns.

Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jin Yao <yao.jin@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1517223473-14750-1-git-send-email-yao.jin@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/annotate.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 286427975112..e2f666391ac4 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -319,6 +319,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
 	struct map_symbol *ms = ab->b.priv;
 	struct symbol *sym = ms->sym;
 	u8 pcnt_width = annotate_browser__pcnt_width(ab);
+	int width = 0;
 
 	/* PLT symbols contain external offsets */
 	if (strstr(sym->name, "@plt"))
@@ -340,13 +341,17 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
 		to = (u64)btarget->idx;
 	}
 
+	if (ab->have_cycles)
+		width = IPC_WIDTH + CYCLES_WIDTH;
+
 	ui_browser__set_color(browser, HE_COLORSET_JUMP_ARROWS);
-	__ui_browser__line_arrow(browser, pcnt_width + 2 + ab->addr_width,
+	__ui_browser__line_arrow(browser,
+				 pcnt_width + 2 + ab->addr_width + width,
 				 from, to);
 
 	if (is_fused(ab, cursor)) {
 		ui_browser__mark_fused(browser,
-				       pcnt_width + 3 + ab->addr_width,
+				       pcnt_width + 3 + ab->addr_width + width,
 				       from - 1,
 				       to > from ? true : false);
 	}
-- 
cgit v1.2.3


From e3ebaa465136ecfedf9c6f4671df02bf625f8125 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Fri, 16 Feb 2018 13:36:19 +0100
Subject: perf report: Fix memory corruption in --branch-history mode
 --branch-history

Jin Yao reported memory corrupton in perf report with
branch info used for stack trace:

  > Following command lines will cause perf crash.

  > perf record -j call -g -a <application>
  > perf report --branch-history
  >
  > *** Error in `perf': double free or corruption (!prev): 0x00000000104aa040 ***
  > ======= Backtrace: =========
  > /lib/x86_64-linux-gnu/libc.so.6(+0x77725)[0x7f6b37254725]
  > /lib/x86_64-linux-gnu/libc.so.6(+0x7ff4a)[0x7f6b3725cf4a]
  > /lib/x86_64-linux-gnu/libc.so.6(cfree+0x4c)[0x7f6b37260abc]
  > perf[0x51b914]
  > perf(hist_entry_iter__add+0x1e5)[0x51f305]
  > perf[0x43cf01]
  > perf[0x4fa3bf]
  > perf[0x4fa923]
  > perf[0x4fd396]
  > perf[0x4f9614]
  > perf(perf_session__process_events+0x89e)[0x4fc38e]
  > perf(cmd_report+0x15d2)[0x43f202]
  > perf[0x4a059f]
  > perf(main+0x631)[0x427b71]
  > /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f6b371fd830]
  > perf(_start+0x29)[0x427d89]

For the cumulative output, we allocate the he_cache array based on the
--max-stack option value and populate it with data from 'callchain_cursor'.

The --max-stack option value does not ensure now the limit for number of
callchain_cursor nodes, so the cumulative iter code will allocate smaller array
than it's actually needed and cause above corruption.

I think the --max-stack limit does not apply here anyway, because we add
callchain data as normal hist entries, while the --max-stack control the limit
of single entry callchain depth.

Using the callchain_cursor.nr as he_cache array count to fix this. Also
removing struct hist_entry_iter::max_stack, because there's no longer any use
for it.

We need more fixes to ensure that the branch stack code follows properly the
logic of --max-stack, which is not the case at the moment.

Original-patch-by: Jin Yao <yao.jin@linux.intel.com>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Reported-by: Jin Yao <yao.jin@linux.intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180216123619.GA9945@krava
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/hist.c | 4 +---
 tools/perf/util/hist.h | 1 -
 2 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index b6140950301e..44a8456cea10 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -879,7 +879,7 @@ iter_prepare_cumulative_entry(struct hist_entry_iter *iter,
 	 * cumulated only one time to prevent entries more than 100%
 	 * overhead.
 	 */
-	he_cache = malloc(sizeof(*he_cache) * (iter->max_stack + 1));
+	he_cache = malloc(sizeof(*he_cache) * (callchain_cursor.nr + 1));
 	if (he_cache == NULL)
 		return -ENOMEM;
 
@@ -1045,8 +1045,6 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
 	if (err)
 		return err;
 
-	iter->max_stack = max_stack_depth;
-
 	err = iter->ops->prepare_entry(iter, al);
 	if (err)
 		goto out;
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 02721b579746..e869cad4d89f 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -107,7 +107,6 @@ struct hist_entry_iter {
 	int curr;
 
 	bool hide_unresolved;
-	int max_stack;
 
 	struct perf_evsel *evsel;
 	struct perf_sample *sample;
-- 
cgit v1.2.3


From 1350fb7d1b484afc1556579fb27400d036683453 Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Date: Mon, 29 Jan 2018 14:04:15 +0530
Subject: tools include powerpc: Grab a copy of
 arch/powerpc/include/uapi/asm/unistd.h

Will be used for generating the syscall id/string translation table.

Committer notes:

Update it already to catch with these csets applied since Ravi first
submitted this patch:

  3350eb2ea127 powerpc: sys_pkey_mprotect() system call
  9499ec1b5e82 powerpc: sys_pkey_alloc() and sys_pkey_free() system calls

So now 'perf trace' on ppc now knows about the pkey_ syscals.

Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Thomas Richter <tmricht@linux.vnet.ibm.com>
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lkml.kernel.org/r/20180129083417.31240-2-ravi.bangoria@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/powerpc/include/uapi/asm/unistd.h | 402 +++++++++++++++++++++++++++
 tools/perf/check-headers.sh                  |   1 +
 2 files changed, 403 insertions(+)
 create mode 100644 tools/arch/powerpc/include/uapi/asm/unistd.h

(limited to 'tools')

diff --git a/tools/arch/powerpc/include/uapi/asm/unistd.h b/tools/arch/powerpc/include/uapi/asm/unistd.h
new file mode 100644
index 000000000000..389c36fd8299
--- /dev/null
+++ b/tools/arch/powerpc/include/uapi/asm/unistd.h
@@ -0,0 +1,402 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * This file contains the system call numbers.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#ifndef _UAPI_ASM_POWERPC_UNISTD_H_
+#define _UAPI_ASM_POWERPC_UNISTD_H_
+
+
+#define __NR_restart_syscall	  0
+#define __NR_exit		  1
+#define __NR_fork		  2
+#define __NR_read		  3
+#define __NR_write		  4
+#define __NR_open		  5
+#define __NR_close		  6
+#define __NR_waitpid		  7
+#define __NR_creat		  8
+#define __NR_link		  9
+#define __NR_unlink		 10
+#define __NR_execve		 11
+#define __NR_chdir		 12
+#define __NR_time		 13
+#define __NR_mknod		 14
+#define __NR_chmod		 15
+#define __NR_lchown		 16
+#define __NR_break		 17
+#define __NR_oldstat		 18
+#define __NR_lseek		 19
+#define __NR_getpid		 20
+#define __NR_mount		 21
+#define __NR_umount		 22
+#define __NR_setuid		 23
+#define __NR_getuid		 24
+#define __NR_stime		 25
+#define __NR_ptrace		 26
+#define __NR_alarm		 27
+#define __NR_oldfstat		 28
+#define __NR_pause		 29
+#define __NR_utime		 30
+#define __NR_stty		 31
+#define __NR_gtty		 32
+#define __NR_access		 33
+#define __NR_nice		 34
+#define __NR_ftime		 35
+#define __NR_sync		 36
+#define __NR_kill		 37
+#define __NR_rename		 38
+#define __NR_mkdir		 39
+#define __NR_rmdir		 40
+#define __NR_dup		 41
+#define __NR_pipe		 42
+#define __NR_times		 43
+#define __NR_prof		 44
+#define __NR_brk		 45
+#define __NR_setgid		 46
+#define __NR_getgid		 47
+#define __NR_signal		 48
+#define __NR_geteuid		 49
+#define __NR_getegid		 50
+#define __NR_acct		 51
+#define __NR_umount2		 52
+#define __NR_lock		 53
+#define __NR_ioctl		 54
+#define __NR_fcntl		 55
+#define __NR_mpx		 56
+#define __NR_setpgid		 57
+#define __NR_ulimit		 58
+#define __NR_oldolduname	 59
+#define __NR_umask		 60
+#define __NR_chroot		 61
+#define __NR_ustat		 62
+#define __NR_dup2		 63
+#define __NR_getppid		 64
+#define __NR_getpgrp		 65
+#define __NR_setsid		 66
+#define __NR_sigaction		 67
+#define __NR_sgetmask		 68
+#define __NR_ssetmask		 69
+#define __NR_setreuid		 70
+#define __NR_setregid		 71
+#define __NR_sigsuspend		 72
+#define __NR_sigpending		 73
+#define __NR_sethostname	 74
+#define __NR_setrlimit		 75
+#define __NR_getrlimit		 76
+#define __NR_getrusage		 77
+#define __NR_gettimeofday	 78
+#define __NR_settimeofday	 79
+#define __NR_getgroups		 80
+#define __NR_setgroups		 81
+#define __NR_select		 82
+#define __NR_symlink		 83
+#define __NR_oldlstat		 84
+#define __NR_readlink		 85
+#define __NR_uselib		 86
+#define __NR_swapon		 87
+#define __NR_reboot		 88
+#define __NR_readdir		 89
+#define __NR_mmap		 90
+#define __NR_munmap		 91
+#define __NR_truncate		 92
+#define __NR_ftruncate		 93
+#define __NR_fchmod		 94
+#define __NR_fchown		 95
+#define __NR_getpriority	 96
+#define __NR_setpriority	 97
+#define __NR_profil		 98
+#define __NR_statfs		 99
+#define __NR_fstatfs		100
+#define __NR_ioperm		101
+#define __NR_socketcall		102
+#define __NR_syslog		103
+#define __NR_setitimer		104
+#define __NR_getitimer		105
+#define __NR_stat		106
+#define __NR_lstat		107
+#define __NR_fstat		108
+#define __NR_olduname		109
+#define __NR_iopl		110
+#define __NR_vhangup		111
+#define __NR_idle		112
+#define __NR_vm86		113
+#define __NR_wait4		114
+#define __NR_swapoff		115
+#define __NR_sysinfo		116
+#define __NR_ipc		117
+#define __NR_fsync		118
+#define __NR_sigreturn		119
+#define __NR_clone		120
+#define __NR_setdomainname	121
+#define __NR_uname		122
+#define __NR_modify_ldt		123
+#define __NR_adjtimex		124
+#define __NR_mprotect		125
+#define __NR_sigprocmask	126
+#define __NR_create_module	127
+#define __NR_init_module	128
+#define __NR_delete_module	129
+#define __NR_get_kernel_syms	130
+#define __NR_quotactl		131
+#define __NR_getpgid		132
+#define __NR_fchdir		133
+#define __NR_bdflush		134
+#define __NR_sysfs		135
+#define __NR_personality	136
+#define __NR_afs_syscall	137 /* Syscall for Andrew File System */
+#define __NR_setfsuid		138
+#define __NR_setfsgid		139
+#define __NR__llseek		140
+#define __NR_getdents		141
+#define __NR__newselect		142
+#define __NR_flock		143
+#define __NR_msync		144
+#define __NR_readv		145
+#define __NR_writev		146
+#define __NR_getsid		147
+#define __NR_fdatasync		148
+#define __NR__sysctl		149
+#define __NR_mlock		150
+#define __NR_munlock		151
+#define __NR_mlockall		152
+#define __NR_munlockall		153
+#define __NR_sched_setparam		154
+#define __NR_sched_getparam		155
+#define __NR_sched_setscheduler		156
+#define __NR_sched_getscheduler		157
+#define __NR_sched_yield		158
+#define __NR_sched_get_priority_max	159
+#define __NR_sched_get_priority_min	160
+#define __NR_sched_rr_get_interval	161
+#define __NR_nanosleep		162
+#define __NR_mremap		163
+#define __NR_setresuid		164
+#define __NR_getresuid		165
+#define __NR_query_module	166
+#define __NR_poll		167
+#define __NR_nfsservctl		168
+#define __NR_setresgid		169
+#define __NR_getresgid		170
+#define __NR_prctl		171
+#define __NR_rt_sigreturn	172
+#define __NR_rt_sigaction	173
+#define __NR_rt_sigprocmask	174
+#define __NR_rt_sigpending	175
+#define __NR_rt_sigtimedwait	176
+#define __NR_rt_sigqueueinfo	177
+#define __NR_rt_sigsuspend	178
+#define __NR_pread64		179
+#define __NR_pwrite64		180
+#define __NR_chown		181
+#define __NR_getcwd		182
+#define __NR_capget		183
+#define __NR_capset		184
+#define __NR_sigaltstack	185
+#define __NR_sendfile		186
+#define __NR_getpmsg		187	/* some people actually want streams */
+#define __NR_putpmsg		188	/* some people actually want streams */
+#define __NR_vfork		189
+#define __NR_ugetrlimit		190	/* SuS compliant getrlimit */
+#define __NR_readahead		191
+#ifndef __powerpc64__			/* these are 32-bit only */
+#define __NR_mmap2		192
+#define __NR_truncate64		193
+#define __NR_ftruncate64	194
+#define __NR_stat64		195
+#define __NR_lstat64		196
+#define __NR_fstat64		197
+#endif
+#define __NR_pciconfig_read	198
+#define __NR_pciconfig_write	199
+#define __NR_pciconfig_iobase	200
+#define __NR_multiplexer	201
+#define __NR_getdents64		202
+#define __NR_pivot_root		203
+#ifndef __powerpc64__
+#define __NR_fcntl64		204
+#endif
+#define __NR_madvise		205
+#define __NR_mincore		206
+#define __NR_gettid		207
+#define __NR_tkill		208
+#define __NR_setxattr		209
+#define __NR_lsetxattr		210
+#define __NR_fsetxattr		211
+#define __NR_getxattr		212
+#define __NR_lgetxattr		213
+#define __NR_fgetxattr		214
+#define __NR_listxattr		215
+#define __NR_llistxattr		216
+#define __NR_flistxattr		217
+#define __NR_removexattr	218
+#define __NR_lremovexattr	219
+#define __NR_fremovexattr	220
+#define __NR_futex		221
+#define __NR_sched_setaffinity	222
+#define __NR_sched_getaffinity	223
+/* 224 currently unused */
+#define __NR_tuxcall		225
+#ifndef __powerpc64__
+#define __NR_sendfile64		226
+#endif
+#define __NR_io_setup		227
+#define __NR_io_destroy		228
+#define __NR_io_getevents	229
+#define __NR_io_submit		230
+#define __NR_io_cancel		231
+#define __NR_set_tid_address	232
+#define __NR_fadvise64		233
+#define __NR_exit_group		234
+#define __NR_lookup_dcookie	235
+#define __NR_epoll_create	236
+#define __NR_epoll_ctl		237
+#define __NR_epoll_wait		238
+#define __NR_remap_file_pages	239
+#define __NR_timer_create	240
+#define __NR_timer_settime	241
+#define __NR_timer_gettime	242
+#define __NR_timer_getoverrun	243
+#define __NR_timer_delete	244
+#define __NR_clock_settime	245
+#define __NR_clock_gettime	246
+#define __NR_clock_getres	247
+#define __NR_clock_nanosleep	248
+#define __NR_swapcontext	249
+#define __NR_tgkill		250
+#define __NR_utimes		251
+#define __NR_statfs64		252
+#define __NR_fstatfs64		253
+#ifndef __powerpc64__
+#define __NR_fadvise64_64	254
+#endif
+#define __NR_rtas		255
+#define __NR_sys_debug_setcontext 256
+/* Number 257 is reserved for vserver */
+#define __NR_migrate_pages	258
+#define __NR_mbind		259
+#define __NR_get_mempolicy	260
+#define __NR_set_mempolicy	261
+#define __NR_mq_open		262
+#define __NR_mq_unlink		263
+#define __NR_mq_timedsend	264
+#define __NR_mq_timedreceive	265
+#define __NR_mq_notify		266
+#define __NR_mq_getsetattr	267
+#define __NR_kexec_load		268
+#define __NR_add_key		269
+#define __NR_request_key	270
+#define __NR_keyctl		271
+#define __NR_waitid		272
+#define __NR_ioprio_set		273
+#define __NR_ioprio_get		274
+#define __NR_inotify_init	275
+#define __NR_inotify_add_watch	276
+#define __NR_inotify_rm_watch	277
+#define __NR_spu_run		278
+#define __NR_spu_create		279
+#define __NR_pselect6		280
+#define __NR_ppoll		281
+#define __NR_unshare		282
+#define __NR_splice		283
+#define __NR_tee		284
+#define __NR_vmsplice		285
+#define __NR_openat		286
+#define __NR_mkdirat		287
+#define __NR_mknodat		288
+#define __NR_fchownat		289
+#define __NR_futimesat		290
+#ifdef __powerpc64__
+#define __NR_newfstatat		291
+#else
+#define __NR_fstatat64		291
+#endif
+#define __NR_unlinkat		292
+#define __NR_renameat		293
+#define __NR_linkat		294
+#define __NR_symlinkat		295
+#define __NR_readlinkat		296
+#define __NR_fchmodat		297
+#define __NR_faccessat		298
+#define __NR_get_robust_list	299
+#define __NR_set_robust_list	300
+#define __NR_move_pages		301
+#define __NR_getcpu		302
+#define __NR_epoll_pwait	303
+#define __NR_utimensat		304
+#define __NR_signalfd		305
+#define __NR_timerfd_create	306
+#define __NR_eventfd		307
+#define __NR_sync_file_range2	308
+#define __NR_fallocate		309
+#define __NR_subpage_prot	310
+#define __NR_timerfd_settime	311
+#define __NR_timerfd_gettime	312
+#define __NR_signalfd4		313
+#define __NR_eventfd2		314
+#define __NR_epoll_create1	315
+#define __NR_dup3		316
+#define __NR_pipe2		317
+#define __NR_inotify_init1	318
+#define __NR_perf_event_open	319
+#define __NR_preadv		320
+#define __NR_pwritev		321
+#define __NR_rt_tgsigqueueinfo	322
+#define __NR_fanotify_init	323
+#define __NR_fanotify_mark	324
+#define __NR_prlimit64		325
+#define __NR_socket		326
+#define __NR_bind		327
+#define __NR_connect		328
+#define __NR_listen		329
+#define __NR_accept		330
+#define __NR_getsockname	331
+#define __NR_getpeername	332
+#define __NR_socketpair		333
+#define __NR_send		334
+#define __NR_sendto		335
+#define __NR_recv		336
+#define __NR_recvfrom		337
+#define __NR_shutdown		338
+#define __NR_setsockopt		339
+#define __NR_getsockopt		340
+#define __NR_sendmsg		341
+#define __NR_recvmsg		342
+#define __NR_recvmmsg		343
+#define __NR_accept4		344
+#define __NR_name_to_handle_at	345
+#define __NR_open_by_handle_at	346
+#define __NR_clock_adjtime	347
+#define __NR_syncfs		348
+#define __NR_sendmmsg		349
+#define __NR_setns		350
+#define __NR_process_vm_readv	351
+#define __NR_process_vm_writev	352
+#define __NR_finit_module	353
+#define __NR_kcmp		354
+#define __NR_sched_setattr	355
+#define __NR_sched_getattr	356
+#define __NR_renameat2		357
+#define __NR_seccomp		358
+#define __NR_getrandom		359
+#define __NR_memfd_create	360
+#define __NR_bpf		361
+#define __NR_execveat		362
+#define __NR_switch_endian	363
+#define __NR_userfaultfd	364
+#define __NR_membarrier		365
+#define __NR_mlock2		378
+#define __NR_copy_file_range	379
+#define __NR_preadv2		380
+#define __NR_pwritev2		381
+#define __NR_kexec_file_load	382
+#define __NR_statx		383
+#define __NR_pkey_alloc		384
+#define __NR_pkey_free		385
+#define __NR_pkey_mprotect	386
+
+#endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index 790ec25919a0..bf206ffe5c45 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -42,6 +42,7 @@ arch/parisc/include/uapi/asm/errno.h
 arch/powerpc/include/uapi/asm/errno.h
 arch/sparc/include/uapi/asm/errno.h
 arch/x86/include/uapi/asm/errno.h
+arch/powerpc/include/uapi/asm/unistd.h
 include/asm-generic/bitops/arch_hweight.h
 include/asm-generic/bitops/const_hweight.h
 include/asm-generic/bitops/__fls.h
-- 
cgit v1.2.3


From 8e2ff72aa35e1a8a53894ae46751a83543338e14 Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Date: Mon, 29 Jan 2018 14:04:16 +0530
Subject: perf powerpc: Generate system call table from asm/unistd.h

This should speed up accessing new system calls introduced with the
kernel rather than waiting for libaudit updates to include them.

Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Thomas Richter <tmricht@linux.vnet.ibm.com>
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lkml.kernel.org/r/20180129083417.31240-3-ravi.bangoria@linux.vnet.ibm.com
[ Made it generate syscall_32.c as well to fix the build on 32-bit ppc ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/powerpc/Makefile                   | 25 +++++++++++++++
 .../perf/arch/powerpc/entry/syscalls/mksyscalltbl  | 37 ++++++++++++++++++++++
 2 files changed, 62 insertions(+)
 create mode 100755 tools/perf/arch/powerpc/entry/syscalls/mksyscalltbl

(limited to 'tools')

diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile
index 42dab7c8f508..a111239df182 100644
--- a/tools/perf/arch/powerpc/Makefile
+++ b/tools/perf/arch/powerpc/Makefile
@@ -6,3 +6,28 @@ endif
 HAVE_KVM_STAT_SUPPORT := 1
 PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
 PERF_HAVE_JITDUMP := 1
+
+#
+# Syscall table generation for perf
+#
+
+out    := $(OUTPUT)arch/powerpc/include/generated/asm
+header32 := $(out)/syscalls_32.c
+header64 := $(out)/syscalls_64.c
+sysdef := $(srctree)/tools/arch/powerpc/include/uapi/asm/unistd.h
+sysprf := $(srctree)/tools/perf/arch/powerpc/entry/syscalls/
+systbl := $(sysprf)/mksyscalltbl
+
+# Create output directory if not already present
+_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
+
+$(header64): $(sysdef) $(systbl)
+	$(Q)$(SHELL) '$(systbl)' '64' '$(CC)' $(sysdef) > $@
+
+$(header32): $(sysdef) $(systbl)
+	$(Q)$(SHELL) '$(systbl)' '32' '$(CC)' $(sysdef) > $@
+
+clean::
+	$(call QUIET_CLEAN, powerpc) $(RM) $(header32) $(header64)
+
+archheaders: $(header32) $(header64)
diff --git a/tools/perf/arch/powerpc/entry/syscalls/mksyscalltbl b/tools/perf/arch/powerpc/entry/syscalls/mksyscalltbl
new file mode 100755
index 000000000000..ef52e1dd694b
--- /dev/null
+++ b/tools/perf/arch/powerpc/entry/syscalls/mksyscalltbl
@@ -0,0 +1,37 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Generate system call table for perf. Derived from
+# s390 script.
+#
+# Copyright IBM Corp. 2017
+# Author(s):  Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+# Changed by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
+
+wordsize=$1
+gcc=$2
+input=$3
+
+if ! test -r $input; then
+	echo "Could not read input file" >&2
+	exit 1
+fi
+
+create_table()
+{
+	local wordsize=$1
+	local max_nr
+
+	echo "static const char *syscalltbl_powerpc_${wordsize}[] = {"
+	while read sc nr; do
+		printf '\t[%d] = "%s",\n' $nr $sc
+		max_nr=$nr
+	done
+	echo '};'
+	echo "#define SYSCALLTBL_POWERPC_${wordsize}_MAX_ID $max_nr"
+}
+
+$gcc -m${wordsize} -E -dM -x c  $input	       \
+	|sed -ne 's/^#define __NR_//p' \
+	|sort -t' ' -k2 -nu	       \
+	|create_table ${wordsize}
-- 
cgit v1.2.3


From 4281da235e3de91bb8deae44bc6506336ceaa88a Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Date: Mon, 29 Jan 2018 14:04:17 +0530
Subject: perf trace powerpc: Use generated syscall table

This should speed up accessing new system calls introduced with the
kernel rather than waiting for libaudit updates to include them.

It also enables users to specify wildcards, for example, perf trace -e
'open*', just like was already possible on x86 and s390.

Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Thomas Richter <tmricht@linux.vnet.ibm.com>
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lkml.kernel.org/r/20180129083417.31240-4-ravi.bangoria@linux.vnet.ibm.com
[ Do it for ppc32 as well ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Makefile.config   | 2 ++
 tools/perf/util/syscalltbl.c | 8 ++++++++
 2 files changed, 10 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 0dfdaa9fa81e..577a5d2988fe 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -27,6 +27,8 @@ NO_SYSCALL_TABLE := 1
 # Additional ARCH settings for ppc
 ifeq ($(SRCARCH),powerpc)
   NO_PERF_REGS := 0
+  NO_SYSCALL_TABLE := 0
+  CFLAGS += -I$(OUTPUT)arch/powerpc/include/generated
   LIBUNWIND_LIBS := -lunwind -lunwind-ppc64
 endif
 
diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c
index 303bdb84ab5a..895122d638dd 100644
--- a/tools/perf/util/syscalltbl.c
+++ b/tools/perf/util/syscalltbl.c
@@ -30,6 +30,14 @@ static const char **syscalltbl_native = syscalltbl_x86_64;
 #include <asm/syscalls_64.c>
 const int syscalltbl_native_max_id = SYSCALLTBL_S390_64_MAX_ID;
 static const char **syscalltbl_native = syscalltbl_s390_64;
+#elif defined(__powerpc64__)
+#include <asm/syscalls_64.c>
+const int syscalltbl_native_max_id = SYSCALLTBL_POWERPC_64_MAX_ID;
+static const char **syscalltbl_native = syscalltbl_powerpc_64;
+#elif defined(__powerpc__)
+#include <asm/syscalls_32.c>
+const int syscalltbl_native_max_id = SYSCALLTBL_POWERPC_32_MAX_ID;
+static const char **syscalltbl_native = syscalltbl_powerpc_32;
 #endif
 
 struct syscall {
-- 
cgit v1.2.3


From eca0fa28cd0df7369701dbee0e30ddce19c039b8 Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.vnet.ibm.com>
Date: Tue, 13 Feb 2018 16:14:16 +0100
Subject: perf record: Provide detailed information on s390 CPU

When perf record ... is setup to record data, the s390 cpu information
was a fixed string "IBM/S390".

Replace this string with one containing more information about the
machine. The information included in the cpuid is a comma separated
list:

   manufacturer,type,model-capacity,model[,version,authorization]
with

- manufacturer: up to 16 byte name of the manufacturer (IBM).
- type: a four digit number refering to the machine
  generation.
- model-capacitiy: up to 16 characters describing number
  of cpus etc.
- model: up to 16 characters describing model.
- version: the CPU-MF counter facility version number,
  available on LPARs only, omitted on z/VM guests.
- authorization: the CPU-MF counter facility authorization level,
  available on LPARs only, omitted on z/VM guests.

Before:

  [root@s8360047 perf]# ./perf record -- sleep 1
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.001 MB perf.data (4 samples) ]
  [root@s8360047 perf]# ./perf report --header | fgrep cpuid
   # cpuid : IBM/S390
  [root@s8360047 perf]#

After:

  [root@s35lp76 perf]# ./perf report --header|fgrep cpuid
   # cpuid : IBM,3906,704,M03,3.5,002f
  [root@s35lp76 perf]#

Signed-off-by: Thomas Richter <tmricht@linux.vnet.ibm.com>
Reviewed-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Link: http://lkml.kernel.org/r/20180213151419.80737-1-tmricht@linux.vnet.ibm.com
[ Use scnprintf instead of strncat to fix build errors on gcc GNU C99 5.4.0 20160609 -march=zEC12 -m64 -mzarch -ggdb3 -O6 -std=gnu99 -fPIC -fno-omit-frame-pointer -funwind-tables -fstack-protector-all ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/s390/util/header.c | 130 +++++++++++++++++++++++++++++++++++--
 1 file changed, 125 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/arch/s390/util/header.c b/tools/perf/arch/s390/util/header.c
index 9fa6c3e5782c..a78064c25ced 100644
--- a/tools/perf/arch/s390/util/header.c
+++ b/tools/perf/arch/s390/util/header.c
@@ -1,8 +1,9 @@
 /*
  * Implementation of get_cpuid().
  *
- * Copyright 2014 IBM Corp.
+ * Copyright IBM Corp. 2014, 2018
  * Author(s): Alexander Yarygin <yarygin@linux.vnet.ibm.com>
+ *	      Thomas Richter <tmricht@linux.vnet.ibm.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License (version 2 only)
@@ -13,16 +14,135 @@
 #include <unistd.h>
 #include <stdio.h>
 #include <string.h>
+#include <ctype.h>
 
 #include "../../util/header.h"
+#include "../../util/util.h"
+
+#define SYSINFO_MANU	"Manufacturer:"
+#define SYSINFO_TYPE	"Type:"
+#define SYSINFO_MODEL	"Model:"
+#define SRVLVL_CPUMF	"CPU-MF:"
+#define SRVLVL_VERSION	"version="
+#define SRVLVL_AUTHORIZATION	"authorization="
+#define SYSINFO		"/proc/sysinfo"
+#define SRVLVL		"/proc/service_levels"
 
 int get_cpuid(char *buffer, size_t sz)
 {
-	const char *cpuid = "IBM/S390";
+	char *cp, *line = NULL, *line2;
+	char type[8], model[33], version[8], manufacturer[32], authorization[8];
+	int tpsize = 0, mdsize = 0, vssize = 0, mfsize = 0, atsize = 0;
+	int read;
+	unsigned long line_sz;
+	size_t nbytes;
+	FILE *sysinfo;
+
+	/*
+	 * Scan /proc/sysinfo line by line and read out values for
+	 * Manufacturer:, Type: and Model:, for example:
+	 * Manufacturer:    IBM
+	 * Type:            2964
+	 * Model:           702              N96
+	 * The first word is the Model Capacity and the second word is
+	 * Model (can be omitted). Both words have a maximum size of 16
+	 * bytes.
+	 */
+	memset(manufacturer, 0, sizeof(manufacturer));
+	memset(type, 0, sizeof(type));
+	memset(model, 0, sizeof(model));
+	memset(version, 0, sizeof(version));
+	memset(authorization, 0, sizeof(authorization));
+
+	sysinfo = fopen(SYSINFO, "r");
+	if (sysinfo == NULL)
+		return -1;
+
+	while ((read = getline(&line, &line_sz, sysinfo)) != -1) {
+		if (!strncmp(line, SYSINFO_MANU, strlen(SYSINFO_MANU))) {
+			line2 = line + strlen(SYSINFO_MANU);
+
+			while ((cp = strtok_r(line2, "\n ", &line2))) {
+				mfsize += scnprintf(manufacturer + mfsize,
+						    sizeof(manufacturer) - mfsize, "%s", cp);
+			}
+		}
+
+		if (!strncmp(line, SYSINFO_TYPE, strlen(SYSINFO_TYPE))) {
+			line2 = line + strlen(SYSINFO_TYPE);
+
+			while ((cp = strtok_r(line2, "\n ", &line2))) {
+				tpsize += scnprintf(type + tpsize,
+						    sizeof(type) - tpsize, "%s", cp);
+			}
+		}
+
+		if (!strncmp(line, SYSINFO_MODEL, strlen(SYSINFO_MODEL))) {
+			line2 = line + strlen(SYSINFO_MODEL);
+
+			while ((cp = strtok_r(line2, "\n ", &line2))) {
+				mdsize += scnprintf(model + mdsize, sizeof(type) - mdsize,
+						    "%s%s", model[0] ? "," : "", cp);
+			}
+			break;
+		}
+	}
+	fclose(sysinfo);
 
-	if (strlen(cpuid) + 1 > sz)
+	/* Missing manufacturer, type or model information should not happen */
+	if (!manufacturer[0] || !type[0] || !model[0])
 		return -1;
 
-	strcpy(buffer, cpuid);
-	return 0;
+	/*
+	 * Scan /proc/service_levels and return the CPU-MF counter facility
+	 * version number and authorization level.
+	 * Optional, does not exist on z/VM guests.
+	 */
+	sysinfo = fopen(SRVLVL, "r");
+	if (sysinfo == NULL)
+		goto skip_sysinfo;
+	while ((read = getline(&line, &line_sz, sysinfo)) != -1) {
+		if (strncmp(line, SRVLVL_CPUMF, strlen(SRVLVL_CPUMF)))
+			continue;
+
+		line2 = line + strlen(SRVLVL_CPUMF);
+		while ((cp = strtok_r(line2, "\n ", &line2))) {
+			if (!strncmp(cp, SRVLVL_VERSION,
+				     strlen(SRVLVL_VERSION))) {
+				char *sep = strchr(cp, '=');
+
+				vssize += scnprintf(version + vssize,
+						    sizeof(version) - vssize, "%s", sep + 1);
+			}
+			if (!strncmp(cp, SRVLVL_AUTHORIZATION,
+				     strlen(SRVLVL_AUTHORIZATION))) {
+				char *sep = strchr(cp, '=');
+
+				atsize += scnprintf(authorization + atsize,
+						    sizeof(authorization) - atsize, "%s", sep + 1);
+			}
+		}
+	}
+	fclose(sysinfo);
+
+skip_sysinfo:
+	free(line);
+
+	if (version[0] && authorization[0] )
+		nbytes = snprintf(buffer, sz, "%s,%s,%s,%s,%s",
+				  manufacturer, type, model, version,
+				  authorization);
+	else
+		nbytes = snprintf(buffer, sz, "%s,%s,%s", manufacturer, type,
+				  model);
+	return (nbytes >= sz) ? -1 : 0;
+}
+
+char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
+{
+	char *buf = malloc(128);
+
+	if (buf && get_cpuid(buf, 128) < 0)
+		zfree(&buf);
+	return buf;
 }
-- 
cgit v1.2.3


From c59124fa59757fadc80ad881056a21f98c71b146 Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.vnet.ibm.com>
Date: Tue, 13 Feb 2018 16:14:17 +0100
Subject: perf annotate: Scan cpuid for s390 and save machine type

Scan the cpuid string and extract the type number for later use.

Signed-off-by: Thomas Richter <tmricht@linux.vnet.ibm.com>
Reviewed-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Link: http://lkml.kernel.org/r/20180213151419.80737-2-tmricht@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/s390/annotate/instructions.c | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/arch/s390/annotate/instructions.c b/tools/perf/arch/s390/annotate/instructions.c
index 8c72b44444cb..01df9d8303e1 100644
--- a/tools/perf/arch/s390/annotate/instructions.c
+++ b/tools/perf/arch/s390/annotate/instructions.c
@@ -23,12 +23,37 @@ static struct ins_ops *s390__associate_ins_ops(struct arch *arch, const char *na
 	return ops;
 }
 
+static int s390__cpuid_parse(struct arch *arch, char *cpuid)
+{
+	unsigned int family;
+	char model[16], model_c[16], cpumf_v[16], cpumf_a[16];
+	int ret;
+
+	/*
+	 * cpuid string format:
+	 * "IBM,family,model-capacity,model[,cpum_cf-version,cpum_cf-authorization]"
+	 */
+	ret = sscanf(cpuid, "%*[^,],%u,%[^,],%[^,],%[^,],%s", &family, model_c,
+		     model, cpumf_v, cpumf_a);
+	if (ret >= 2) {
+		arch->family = family;
+		arch->model = 0;
+		return 0;
+	}
+
+	return -1;
+}
+
 static int s390__annotate_init(struct arch *arch, char *cpuid __maybe_unused)
 {
+	int err = 0;
+
 	if (!arch->initialized) {
 		arch->initialized = true;
 		arch->associate_instruction_ops = s390__associate_ins_ops;
+		if (cpuid)
+			err = s390__cpuid_parse(arch, cpuid);
 	}
 
-	return 0;
+	return err;
 }
-- 
cgit v1.2.3


From 4cb7d3ecfca90684ad00f893c34a2028fcc5f764 Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.vnet.ibm.com>
Date: Tue, 13 Feb 2018 16:14:18 +0100
Subject: perf cpuid: Introduce a platform specific cpuid compare function

The function get_cpuid_str() is called by perf_pmu__getcpuid() and on
s390 returns a complete description of the CPU and its capabilities,
which is a comma separated list.

To map the CPU type with the value defined in the
pmu-events/arch/s390/mapfile.csv, introduce an architecture specific
cpuid compare function named strcmp_cpuid_str()

The currently used regex algorithm is defined as the weak default and
will be used if no platform specific one is defined. This matches the
current behavior.

Signed-off-by: Thomas Richter <tmricht@linux.vnet.ibm.com>
Reviewed-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Link: http://lkml.kernel.org/r/20180213151419.80737-3-tmricht@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/s390/util/header.c | 18 +++++++++++++++
 tools/perf/util/header.h           |  1 +
 tools/perf/util/pmu.c              | 47 +++++++++++++++++++++++---------------
 3 files changed, 48 insertions(+), 18 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/arch/s390/util/header.c b/tools/perf/arch/s390/util/header.c
index a78064c25ced..231294b80dc4 100644
--- a/tools/perf/arch/s390/util/header.c
+++ b/tools/perf/arch/s390/util/header.c
@@ -146,3 +146,21 @@ char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
 		zfree(&buf);
 	return buf;
 }
+
+/*
+ * Compare the cpuid string returned by get_cpuid() function
+ * with the name generated by the jevents file read from
+ * pmu-events/arch/s390/mapfile.csv.
+ *
+ * Parameter mapcpuid is the cpuid as stored in the
+ * pmu-events/arch/s390/mapfile.csv. This is just the type number.
+ * Parameter cpuid is the cpuid returned by function get_cpuid().
+ */
+int strcmp_cpuid_str(const char *mapcpuid, const char *cpuid)
+{
+	char *cp = strchr(cpuid, ',');
+
+	if (cp == NULL)
+		return -1;
+	return strncmp(cp + 1, mapcpuid, strlen(mapcpuid));
+}
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index f28aaaa3a440..942bdec6d70d 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -174,4 +174,5 @@ int write_padded(struct feat_fd *fd, const void *bf,
 int get_cpuid(char *buffer, size_t sz);
 
 char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused);
+int strcmp_cpuid_str(const char *s1, const char *s2);
 #endif /* __PERF_HEADER_H */
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 57e38fdf0b34..1111d5bf15ca 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -576,6 +576,34 @@ char * __weak get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
 	return NULL;
 }
 
+/* Return zero when the cpuid from the mapfile.csv matches the
+ * cpuid string generated on this platform.
+ * Otherwise return non-zero.
+ */
+int __weak strcmp_cpuid_str(const char *mapcpuid, const char *cpuid)
+{
+	regex_t re;
+	regmatch_t pmatch[1];
+	int match;
+
+	if (regcomp(&re, mapcpuid, REG_EXTENDED) != 0) {
+		/* Warn unable to generate match particular string. */
+		pr_info("Invalid regular expression %s\n", mapcpuid);
+		return 1;
+	}
+
+	match = !regexec(&re, cpuid, 1, pmatch, 0);
+	regfree(&re);
+	if (match) {
+		size_t match_len = (pmatch[0].rm_eo - pmatch[0].rm_so);
+
+		/* Verify the entire string matched. */
+		if (match_len == strlen(cpuid))
+			return 0;
+	}
+	return 1;
+}
+
 static char *perf_pmu__getcpuid(struct perf_pmu *pmu)
 {
 	char *cpuid;
@@ -610,31 +638,14 @@ struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu)
 
 	i = 0;
 	for (;;) {
-		regex_t re;
-		regmatch_t pmatch[1];
-		int match;
-
 		map = &pmu_events_map[i++];
 		if (!map->table) {
 			map = NULL;
 			break;
 		}
 
-		if (regcomp(&re, map->cpuid, REG_EXTENDED) != 0) {
-			/* Warn unable to generate match particular string. */
-			pr_info("Invalid regular expression %s\n", map->cpuid);
+		if (!strcmp_cpuid_str(map->cpuid, cpuid))
 			break;
-		}
-
-		match = !regexec(&re, cpuid, 1, pmatch, 0);
-		regfree(&re);
-		if (match) {
-			size_t match_len = (pmatch[0].rm_eo - pmatch[0].rm_so);
-
-			/* Verify the entire string matched. */
-			if (match_len == strlen(cpuid))
-				break;
-		}
 	}
 	free(cpuid);
 	return map;
-- 
cgit v1.2.3


From b3be39c51cc58eb60c698cf64987e9d465a9263a Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.vnet.ibm.com>
Date: Tue, 13 Feb 2018 16:14:19 +0100
Subject: perf test: Fix test case 23 for s390 z/VM or KVM guests

On s390 perf can be executed on a LPAR with support for hardware events
(i. e. cycles) or on a z/VM or KVM guest where no hardware events are
supported. In this environment use software event named cpu-clock for
this test case.

Use the cpuid infrastructure functions to determine the cpuid on s390
which contains an indication of the cpu counter facility availability.

Signed-off-by: Thomas Richter <tmricht@linux.vnet.ibm.com>
Reviewed-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Link: http://lkml.kernel.org/r/20180213151419.80737-4-tmricht@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/code-reading.c | 33 +++++++++++++++++++++++++++++----
 1 file changed, 29 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 3bf7b145b826..c7115d369511 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -482,6 +482,34 @@ static void fs_something(void)
 	}
 }
 
+static const char *do_determine_event(bool excl_kernel)
+{
+	const char *event = excl_kernel ? "cycles:u" : "cycles";
+
+#ifdef __s390x__
+	char cpuid[128], model[16], model_c[16], cpum_cf_v[16];
+	unsigned int family;
+	int ret, cpum_cf_a;
+
+	if (get_cpuid(cpuid, sizeof(cpuid)))
+		goto out_clocks;
+	ret = sscanf(cpuid, "%*[^,],%u,%[^,],%[^,],%[^,],%x", &family, model_c,
+		     model, cpum_cf_v, &cpum_cf_a);
+	if (ret != 5)		 /* Not available */
+		goto out_clocks;
+	if (excl_kernel && (cpum_cf_a & 4))
+		return event;
+	if (!excl_kernel && (cpum_cf_a & 2))
+		return event;
+
+	/* Fall through: missing authorization */
+out_clocks:
+	event = excl_kernel ? "cpu-clock:u" : "cpu-clock";
+
+#endif
+	return event;
+}
+
 static void do_something(void)
 {
 	fs_something();
@@ -592,10 +620,7 @@ static int do_test_code_reading(bool try_kcore)
 
 		perf_evlist__set_maps(evlist, cpus, threads);
 
-		if (excl_kernel)
-			str = "cycles:u";
-		else
-			str = "cycles";
+		str = do_determine_event(excl_kernel);
 		pr_debug("Parsing event '%s'\n", str);
 		ret = parse_events(evlist, str, NULL);
 		if (ret < 0) {
-- 
cgit v1.2.3


From 0f19a038afdc592176c9a302f0d08be6a68ad74a Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.vnet.ibm.com>
Date: Wed, 14 Feb 2018 08:03:03 +0100
Subject: perf test: Fix test case inet_pton to accept inlines.

Using Fedora 27 and latest Linux kernel the test case
trace+probe_libc_inet_pton.sh fails again on s390.  This time is the
inlining of functions which does not match.  After an update of the
glibc (from 2.26-16 to 2.26-24) the output is different

The expected output is:

             __inet_pton (/usr/lib64/libc-2.26.so)
             gaih_inet (inlined)
             ....

The actual output is:

  1 packets transmitted, 1 received, 0% packet loss, time 0ms
  rtt min/avg/max/mdev = 0.061/0.061/0.061/0.000 ms
       0.000 probe_libc:inet_pton:(3ffb2140448))
             __inet_pton (inlined)
             gaih_inet.constprop.7 (/usr/lib64/libc-2.26.so)
             ...

Fix this by being less strict on 'inlined' verses library name and
accept both

Signed-off-by: Thomas Richter <tmricht@linux.vnet.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Link: http://lkml.kernel.org/r/20180214070303.55757-1-tmricht@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/shell/trace+probe_libc_inet_pton.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh b/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh
index c446c894b297..8c4ab0b390c0 100755
--- a/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh
+++ b/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh
@@ -21,12 +21,12 @@ trace_libc_inet_pton_backtrace() {
 	expected[3]=".*packets transmitted.*"
 	expected[4]="rtt min.*"
 	expected[5]="[0-9]+\.[0-9]+[[:space:]]+probe_libc:inet_pton:\([[:xdigit:]]+\)"
-	expected[6]=".*inet_pton[[:space:]]\($libc\)$"
+	expected[6]=".*inet_pton[[:space:]]\($libc|inlined\)$"
 	case "$(uname -m)" in
 	s390x)
 		eventattr='call-graph=dwarf'
-		expected[7]="gaih_inet[[:space:]]\(inlined\)$"
-		expected[8]="__GI_getaddrinfo[[:space:]]\(inlined\)$"
+		expected[7]="gaih_inet.*[[:space:]]\($libc|inlined\)$"
+		expected[8]="__GI_getaddrinfo[[:space:]]\($libc|inlined\)$"
 		expected[9]="main[[:space:]]\(.*/bin/ping.*\)$"
 		expected[10]="__libc_start_main[[:space:]]\($libc\)$"
 		expected[11]="_start[[:space:]]\(.*/bin/ping.*\)$"
-- 
cgit v1.2.3


From 21316ac6803d4a1aadd74b896db8d60a92cd1140 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 16 Feb 2018 15:26:51 -0300
Subject: perf tests shell lib: Use a wildcard to remove the vfs_getname probe

In some situations the vfs_getname is being added both as requested and
with a _1 suffix (inlines?):

  probe:vfs_getname_1  (on getname_flags:63@acme/git/linux/fs/namei.c with pathname)

This ends up making the cleanup to miss that one, as it removes just
'probe:vfs_getname', which makes the second test to use this probe point
to fail, since it finds that leftover from the first test, use a
wildcard to remove both.

Before:

  # perf test 60 61 62 63
  60: Use vfs_getname probe to get syscall args filenames   : FAILED!
  61: probe libc's inet_pton & backtrace it with ping       : Ok
  62: Check open filename arg using perf trace + vfs_getname: FAILED!
  63: Add vfs_getname probe to get syscall args filenames   : Ok

After:

  # perf test 60 61 62 63
  60: Use vfs_getname probe to get syscall args filenames   : Ok
  61: probe libc's inet_pton & backtrace it with ping       : Ok
  62: Check open filename arg using perf trace + vfs_getname: Ok
  63: Add vfs_getname probe to get syscall args filenames   : Ok
  #

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Thomas Richter <tmricht@linux.vnet.ibm.com>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-2k5kutwr4ds36adiakyb4yvy@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/shell/lib/probe_vfs_getname.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/tests/shell/lib/probe_vfs_getname.sh b/tools/perf/tests/shell/lib/probe_vfs_getname.sh
index 30a950c9d407..1c16e56cd93e 100644
--- a/tools/perf/tests/shell/lib/probe_vfs_getname.sh
+++ b/tools/perf/tests/shell/lib/probe_vfs_getname.sh
@@ -5,7 +5,7 @@ had_vfs_getname=$?
 
 cleanup_probe_vfs_getname() {
 	if [ $had_vfs_getname -eq 1 ] ; then
-		perf probe -q -d probe:vfs_getname
+		perf probe -q -d probe:vfs_getname*
 	fi
 }
 
-- 
cgit v1.2.3


From 47812e00910407a0f14906c0a3bf2d803a616c6f Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.vnet.ibm.com>
Date: Mon, 19 Feb 2018 11:24:44 +0100
Subject: perf s390: Fix reading cpuid model information

Commit eca0fa28cd0d (perf record: Provide detailed information on s390
CPU") fixed a  build error on Ubuntu. However the fix uses the wrong
size to print the model information.

Signed-off-by: Thomas Richter <tmricht@linux.vnet.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Fixes: eca0fa28cd0d ("perf record: Provide detailed information on s390 CPU")
Link: http://lkml.kernel.org/r/20180219102444.96900-1-tmricht@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/s390/util/header.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/arch/s390/util/header.c b/tools/perf/arch/s390/util/header.c
index 231294b80dc4..a4c30f1c70be 100644
--- a/tools/perf/arch/s390/util/header.c
+++ b/tools/perf/arch/s390/util/header.c
@@ -81,7 +81,7 @@ int get_cpuid(char *buffer, size_t sz)
 			line2 = line + strlen(SYSINFO_MODEL);
 
 			while ((cp = strtok_r(line2, "\n ", &line2))) {
-				mdsize += scnprintf(model + mdsize, sizeof(type) - mdsize,
+				mdsize += scnprintf(model + mdsize, sizeof(model) - mdsize,
 						    "%s%s", model[0] ? "," : "", cp);
 			}
 			break;
-- 
cgit v1.2.3


From 1d12cec6ce99614297e10945d917fd8a62cd2b09 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 19 Feb 2018 19:00:46 +0900
Subject: perf machine: Fix paranoid check in machine__set_kernel_mmap()

The machine__set_kernel_mmap() is to setup addresses of the kernel map
using external info.  But it has a check when the address is given from
an incorrect input which should have the start and end address of 0
(i.e. machine__process_kernel_mmap_event).

But we also use the end address of 0 for a valid input so change it to
check both start and end addresses.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: kernel-team@lge.com
Link: http://lkml.kernel.org/r/20180219101936.GD1583@sejong
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/machine.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index fe27ef55cbb9..12b7427444a3 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1226,7 +1226,7 @@ static void machine__set_kernel_mmap(struct machine *machine,
 		 * Be a bit paranoid here, some perf.data file came with
 		 * a zero sized synthesized MMAP event for the kernel.
 		 */
-		if (machine->vmlinux_maps[i]->end == 0)
+		if (start == 0 && end == 0)
 			machine->vmlinux_maps[i]->end = ~0ULL;
 	}
 }
-- 
cgit v1.2.3


From 63cd02d84be5f7b3bc4f8fbb93cc1f871f84ae1d Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@intel.com>
Date: Mon, 19 Feb 2018 10:33:29 +0800
Subject: perf ftrace: Append an EOL when write tracing files

Before this change, the '--graph-funcs', '--nograph-funcs' and
'--trace-funcs' options didn't work as expected when the <func> doesn't
exist. Because the kernel side hid possible errors.

  $ sudo ./perf ftrace -a --graph-depth 1 --graph-funcs abcdefg
   0)   0.140 us    |  rcu_all_qs();
   3)   0.304 us    |  mutex_unlock();
   0)   0.153 us    |  find_vma();
   3)   0.088 us    |  __fsnotify_parent();
   0)   6.145 us    |  handle_mm_fault();
   3)   0.089 us    |  fsnotify();
   3)   0.161 us    |  __sb_end_write();
   3)   0.710 us    |  SyS_close();
   3)   7.848 us    |  exit_to_usermode_loop();

On the example above, I specified the function filter 'abcdefg' but all
functions are enabled. The expected result is for all functions to be
filtered, since there is no such function ('abcdefg')

The original fix is to make the kernel support '\0' as end of string:
https://lkml.org/lkml/2018/1/16/116

But above fix cannot be compatible with old kernels. Then Namhyung Kim
suggest adding a space after function name.

This patch will append an '\n' when write tracing file. After this fix,
the perf will report correct error state. Also let it print an error if
reset_tracing_files() fails.

Committer testing:

Now it prints:

  # perf ftrace -a --graph-depth 1 --graph-funcs abcdefg
  failed to set tracing filters
  #

And for an existing function:

  # perf ftrace -a --graph-depth 1 --graph-funcs SyS_open
   3)               |  SyS_open() {
   3) ! 494.899 us  |  }
   0) + 23.910 us   |  SyS_open();
   1) + 17.115 us   |  SyS_open();
   1) + 13.900 us   |  SyS_open();
   ------------------------------------------
   3)  qemu-sy-2817  =>  pickup-1290
   ------------------------------------------

   3) + 20.021 us   |  SyS_open();
  #

Signed-off-by: Changbin Du <changbin.du@intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1519007609-14551-1-git-send-email-changbin.du@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-ftrace.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index 25a42acabee1..f42f228e8899 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -72,6 +72,7 @@ static int __write_tracing_file(const char *name, const char *val, bool append)
 	ssize_t size = strlen(val);
 	int flags = O_WRONLY;
 	char errbuf[512];
+	char *val_copy;
 
 	file = get_tracing_file(name);
 	if (!file) {
@@ -91,12 +92,23 @@ static int __write_tracing_file(const char *name, const char *val, bool append)
 		goto out;
 	}
 
-	if (write(fd, val, size) == size)
+	/*
+	 * Copy the original value and append a '\n'. Without this,
+	 * the kernel can hide possible errors.
+	 */
+	val_copy = strdup(val);
+	if (!val_copy)
+		goto out_close;
+	val_copy[size] = '\n';
+
+	if (write(fd, val_copy, size + 1) == size + 1)
 		ret = 0;
 	else
 		pr_debug("write '%s' to tracing/%s failed: %s\n",
 			 val, name, str_error_r(errno, errbuf, sizeof(errbuf)));
 
+	free(val_copy);
+out_close:
 	close(fd);
 out:
 	put_tracing_file(file);
@@ -280,8 +292,10 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
 	signal(SIGCHLD, sig_handler);
 	signal(SIGPIPE, sig_handler);
 
-	if (reset_tracing_files(ftrace) < 0)
+	if (reset_tracing_files(ftrace) < 0) {
+		pr_err("failed to reset ftrace\n");
 		goto out;
+	}
 
 	/* reset ftrace buffer */
 	if (write_tracing_file("trace", "0") < 0)
-- 
cgit v1.2.3


From d2ed5d2bdc5cd30b44dc52c44c63f08c0a31b845 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 19 Feb 2018 12:24:13 -0300
Subject: perf python: Make twatch.py work with both python2 and python3
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Will be used to test patches allowing to build perf with python3, so
that we make sure that we can build with both versions.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jaroslav Škarvada <jskarvad@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-c2ynv0ozr3eifzsyit6qgh3h@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/python/twatch.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/python/twatch.py b/tools/perf/python/twatch.py
index c235c22b107a..0a29c5c3079f 100755
--- a/tools/perf/python/twatch.py
+++ b/tools/perf/python/twatch.py
@@ -42,10 +42,10 @@ def main(context_switch = 0, thread = -1):
 			event = evlist.read_on_cpu(cpu)
 			if not event:
 				continue
-			print "cpu: %2d, pid: %4d, tid: %4d" % (event.sample_cpu,
-								event.sample_pid,
-								event.sample_tid),
-			print event
+			print("cpu: {0}, pid: {1}, tid: {2} {3}".format(event.sample_cpu,
+                                                                        event.sample_pid,
+                                                                        event.sample_tid,
+                                                                        event))
 
 if __name__ == '__main__':
     """
-- 
cgit v1.2.3


From 66dfdff03d196e51322c6a85c0d8db8bb2bdd655 Mon Sep 17 00:00:00 2001
From: Jaroslav Škarvada <jskarvad@redhat.com>
Date: Fri, 19 Jan 2018 21:56:41 +0100
Subject: perf tools: Add Python 3 support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Added Python 3 support while keeping Python 2.7 compatibility.

Committer notes:

This doesn't make it to auto detect python 3, one has to explicitely ask
it to build with python 3 devel files, here are the instructions
provided by Jaroslav:

 ---
  $ cp -a tools/perf tools/python3-perf
  $ make V=1 prefix=/usr -C tools/perf PYTHON=/usr/bin/python2 all
  $ make V=1 prefix=/usr -C tools/python3-perf PYTHON=/usr/bin/python3 all
  $ make V=1 prefix=/usr -C tools/python3-perf PYTHON=/usr/bin/python3 DESTDIR=%{buildroot} install-python_ext
  $ make V=1 prefix=/usr -C tools/perf PYTHON=/usr/bin/python2 DESTDIR=%{buildroot} install-python_ext
 ---

We need to make this automatic, just like the existing tests for checking if
the python2 devel files are in place, allowing the build with python3 if
available, fallbacking to python2 and then just disabling it if none are
available.

So, using the PYTHON variable to build it using O= we get:

Before this patch:

  $ rpm -q python3 python3-devel
  python3-3.6.4-7.fc27.x86_64
  python3-devel-3.6.4-7.fc27.x86_64
  $ rm -rf /tmp/build/perf/ ; mkdir -p /tmp/build/perf ; make O=/tmp/build/perf PYTHON=/usr/bin/python3 -C tools/perf install-bin
  make: Entering directory '/home/acme/git/linux/tools/perf'
  <SNIP>
  Makefile.config:670: Python 3 is not yet supported; please set
  Makefile.config:671: PYTHON and/or PYTHON_CONFIG appropriately.
  Makefile.config:672: If you also have Python 2 installed, then
  Makefile.config:673: try something like:
  Makefile.config:674:
  Makefile.config:675:   make PYTHON=python2
  Makefile.config:676:
  Makefile.config:677: Otherwise, disable Python support entirely:
  Makefile.config:678:
  Makefile.config:679:   make NO_LIBPYTHON=1
  Makefile.config:680:
  Makefile.config:681: *** .  Stop.
  make[1]: *** [Makefile.perf:212: sub-make] Error 2
  make: *** [Makefile:110: install-bin] Error 2
  make: Leaving directory '/home/acme/git/linux/tools/perf'
  $

After:

  $ make O=/tmp/build/perf PYTHON=python3 -C tools/perf install-bin
  $ ldd ~/bin/perf | grep python
	libpython3.6m.so.1.0 => /lib64/libpython3.6m.so.1.0 (0x00007f58a31e8000)
  $ rpm -qf /lib64/libpython3.6m.so.1.0
  python3-libs-3.6.4-7.fc27.x86_64
  $

Now verify that when using the binding the right ELF file is loaded,
using perf trace:

  $ perf trace -e open* perf test python
     0.051 ( 0.016 ms): perf/3927 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC           ) = 3
<SNIP>
  18: 'import perf' in python                               :
     8.849 ( 0.013 ms): sh/3929 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC           ) = 3
<SNIP>
    25.572 ( 0.008 ms): python3/3931 openat(dfd: CWD, filename: /tmp/build/perf/python/perf.cpython-36m-x86_64-linux-gnu.so, flags: CLOEXEC) = 3
<SNIP>
 Ok
<SNIP>
  $

And using tools/perf/python/twatch.py, to show PERF_RECORD_ metaevents:

  $ python3 tools/perf/python/twatch.py
  cpu: 3, pid: 16060, tid: 16060 { type: fork, pid: 5207, ppid: 16060, tid: 5207, ptid: 16060, time: 10798513015459}
  cpu: 3, pid: 16060, tid: 16060 { type: fork, pid: 5208, ppid: 16060, tid: 5208, ptid: 16060, time: 10798513562503}
  cpu: 0, pid: 5208, tid: 5208 { type: comm, pid: 5208, tid: 5208, comm: grep }
  cpu: 2, pid: 5207, tid: 5207 { type: comm, pid: 5207, tid: 5207, comm: ps }
  cpu: 2, pid: 5207, tid: 5207 { type: exit, pid: 5207, ppid: 5207, tid: 5207, ptid: 5207, time: 10798551337484}
  cpu: 3, pid: 5208, tid: 5208 { type: exit, pid: 5208, ppid: 5208, tid: 5208, ptid: 5208, time: 10798551292153}
  cpu: 3, pid: 601, tid: 601 { type: fork, pid: 5209, ppid: 601, tid: 5209, ptid: 601, time: 10801779977324}
  ^CTraceback (most recent call last):
    File "tools/perf/python/twatch.py", line 68, in <module>
      main()
    File "tools/perf/python/twatch.py", line 40, in main
      evlist.poll(timeout = -1)
  KeyboardInterrupt
  $

  # ps ax|grep twatch
 5197 pts/8    S+     0:00 python3 tools/perf/python/twatch.py
  # ls -la /proc/5197/smaps
  -r--r--r--. 1 acme acme 0 Feb 19 13:14 /proc/5197/smaps
  # grep python /proc/5197/smaps
  558111307000-558111309000 r-xp 00000000 fd:00 3151710  /usr/bin/python3.6
  558111508000-558111509000 r--p 00001000 fd:00 3151710  /usr/bin/python3.6
  558111509000-55811150a000 rw-p 00002000 fd:00 3151710  /usr/bin/python3.6
  7ffad6fc1000-7ffad7008000 r-xp 00000000 00:2d 220196   /tmp/build/perf/python/perf.cpython-36m-x86_64-linux-gnu.so
  7ffad7008000-7ffad7207000 ---p 00047000 00:2d 220196   /tmp/build/perf/python/perf.cpython-36m-x86_64-linux-gnu.so
  7ffad7207000-7ffad7208000 r--p 00046000 00:2d 220196   /tmp/build/perf/python/perf.cpython-36m-x86_64-linux-gnu.so
  7ffad7208000-7ffad7215000 rw-p 00047000 00:2d 220196   /tmp/build/perf/python/perf.cpython-36m-x86_64-linux-gnu.so
  7ffadea77000-7ffaded3d000 r-xp 00000000 fd:00 3151795  /usr/lib64/libpython3.6m.so.1.0
  7ffaded3d000-7ffadef3c000 ---p 002c6000 fd:00 3151795  /usr/lib64/libpython3.6m.so.1.0
  7ffadef3c000-7ffadef42000 r--p 002c5000 fd:00 3151795  /usr/lib64/libpython3.6m.so.1.0
  7ffadef42000-7ffadefa5000 rw-p 002cb000 fd:00 3151795  /usr/lib64/libpython3.6m.so.1.0
  #

And with this patch, but building normally, without specifying the
PYTHON=python3 part, which will make it use python2 if its devel files are
available, like in this test:

  $ make O=/tmp/build/perf -C tools/perf install-bin
  $ ldd ~/bin/perf | grep python
	libpython2.7.so.1.0 => /lib64/libpython2.7.so.1.0 (0x00007f6a44410000)
  $ ldd /tmp/build/perf/python_ext_build/lib/perf.so  | grep python
	libpython2.7.so.1.0 => /lib64/libpython2.7.so.1.0 (0x00007fed28a2c000)
  $

  [acme@jouet perf]$ tools/perf/python/twatch.py
  cpu: 0, pid: 2817, tid: 2817 { type: fork, pid: 2817, ppid: 2817, tid: 8910, ptid: 2817, time: 11126454335306}
  cpu: 0, pid: 2817, tid: 2817 { type: comm, pid: 2817, tid: 8910, comm: worker }
  $ ps ax | grep twatch.py
   8909 pts/8    S+     0:00 /usr/bin/python tools/perf/python/twatch.py
  $ grep python /proc/8909/smaps
  5579de658000-5579de659000 r-xp 00000000 fd:00 3156044  /usr/bin/python2.7
  5579de858000-5579de859000 r--p 00000000 fd:00 3156044  /usr/bin/python2.7
  5579de859000-5579de85a000 rw-p 00001000 fd:00 3156044  /usr/bin/python2.7
  7f0de01f7000-7f0de023e000 r-xp 00000000 00:2d 230695   /tmp/build/perf/python/perf.so
  7f0de023e000-7f0de043d000 ---p 00047000 00:2d 230695   /tmp/build/perf/python/perf.so
  7f0de043d000-7f0de043e000 r--p 00046000 00:2d 230695   /tmp/build/perf/python/perf.so
  7f0de043e000-7f0de044b000 rw-p 00047000 00:2d 230695   /tmp/build/perf/python/perf.so
  7f0de6f0f000-7f0de6f13000 r-xp 00000000 fd:00 134975   /usr/lib64/python2.7/lib-dynload/_localemodule.so
  7f0de6f13000-7f0de7113000 ---p 00004000 fd:00 134975   /usr/lib64/python2.7/lib-dynload/_localemodule.so
  7f0de7113000-7f0de7114000 r--p 00004000 fd:00 134975   /usr/lib64/python2.7/lib-dynload/_localemodule.so
  7f0de7114000-7f0de7115000 rw-p 00005000 fd:00 134975   /usr/lib64/python2.7/lib-dynload/_localemodule.so
  7f0de7e73000-7f0de8052000 r-xp 00000000 fd:00 3173292  /usr/lib64/libpython2.7.so.1.0
  7f0de8052000-7f0de8251000 ---p 001df000 fd:00 3173292  /usr/lib64/libpython2.7.so.1.0
  7f0de8251000-7f0de8255000 r--p 001de000 fd:00 3173292  /usr/lib64/libpython2.7.so.1.0
  7f0de8255000-7f0de8291000 rw-p 001e2000 fd:00 3173292  /usr/lib64/libpython2.7.so.1.0
  $

Signed-off-by: Jaroslav Škarvada <jskarvad@redhat.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
LPU-Reference: 20180119205641.24242-1-jskarvad@redhat.com
Link: https://lkml.kernel.org/n/tip-8d7dt9kqp83vsz25hagug8fu@git.kernel.org
[ Removed explicit check for python version, allowing it to really build with python3 ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Makefile.config                         |  23 +---
 tools/perf/Makefile.perf                           |   4 +-
 .../perf/scripts/python/Perf-Trace-Util/Context.c  |  34 ++++-
 tools/perf/util/python.c                           |  95 ++++++++++---
 .../util/scripting-engines/trace-event-python.c    | 147 +++++++++++++++------
 tools/perf/util/setup.py                           |   6 +-
 6 files changed, 221 insertions(+), 88 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 577a5d2988fe..89cb2a36b8ff 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -668,25 +668,10 @@ else
       ifneq ($(feature-libpython), 1)
         $(call disable-python,No 'Python.h' (for Python 2.x support) was found: disables Python support - please install python-devel/python-dev)
       else
-        ifneq ($(feature-libpython-version), 1)
-          $(warning Python 3 is not yet supported; please set)
-          $(warning PYTHON and/or PYTHON_CONFIG appropriately.)
-          $(warning If you also have Python 2 installed, then)
-          $(warning try something like:)
-          $(warning $(and ,))
-          $(warning $(and ,)  make PYTHON=python2)
-          $(warning $(and ,))
-          $(warning Otherwise, disable Python support entirely:)
-          $(warning $(and ,))
-          $(warning $(and ,)  make NO_LIBPYTHON=1)
-          $(warning $(and ,))
-          $(error   $(and ,))
-        else
-          LDFLAGS += $(PYTHON_EMBED_LDFLAGS)
-          EXTLIBS += $(PYTHON_EMBED_LIBADD)
-          LANG_BINDINGS += $(obj-perf)python/perf.so
-          $(call detected,CONFIG_LIBPYTHON)
-        endif
+         LDFLAGS += $(PYTHON_EMBED_LDFLAGS)
+         EXTLIBS += $(PYTHON_EMBED_LIBADD)
+         LANG_BINDINGS += $(obj-perf)python/perf.so
+         $(call detected,CONFIG_LIBPYTHON)
       endif
     endif
   endif
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 9b0351d3ce34..126ef6e0475c 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -302,7 +302,7 @@ PYTHON_EXTBUILD_LIB := $(PYTHON_EXTBUILD)lib/
 PYTHON_EXTBUILD_TMP := $(PYTHON_EXTBUILD)tmp/
 export PYTHON_EXTBUILD_LIB PYTHON_EXTBUILD_TMP
 
-python-clean := $(call QUIET_CLEAN, python) $(RM) -r $(PYTHON_EXTBUILD) $(OUTPUT)python/perf.so
+python-clean := $(call QUIET_CLEAN, python) $(RM) -r $(PYTHON_EXTBUILD) $(OUTPUT)python/perf*.so
 
 PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources)
 PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT) $(LIBAPI)
@@ -479,7 +479,7 @@ $(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(LIBTRACEEVENT_D
 	  $(PYTHON_WORD) util/setup.py \
 	  --quiet build_ext; \
 	mkdir -p $(OUTPUT)python && \
-	cp $(PYTHON_EXTBUILD_LIB)perf.so $(OUTPUT)python/
+	cp $(PYTHON_EXTBUILD_LIB)perf*.so $(OUTPUT)python/
 
 please_set_SHELL_PATH_to_a_more_modern_shell:
 	$(Q)$$(:)
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/Context.c b/tools/perf/scripts/python/Perf-Trace-Util/Context.c
index fcd1dd667906..1a0d27757eec 100644
--- a/tools/perf/scripts/python/Perf-Trace-Util/Context.c
+++ b/tools/perf/scripts/python/Perf-Trace-Util/Context.c
@@ -23,7 +23,17 @@
 #include "../../../perf.h"
 #include "../../../util/trace-event.h"
 
+#if PY_MAJOR_VERSION < 3
+#define _PyCapsule_GetPointer(arg1, arg2) \
+  PyCObject_AsVoidPtr(arg1)
+
 PyMODINIT_FUNC initperf_trace_context(void);
+#else
+#define _PyCapsule_GetPointer(arg1, arg2) \
+  PyCapsule_GetPointer((arg1), (arg2))
+
+PyMODINIT_FUNC PyInit_perf_trace_context(void);
+#endif
 
 static PyObject *perf_trace_context_common_pc(PyObject *obj, PyObject *args)
 {
@@ -34,7 +44,7 @@ static PyObject *perf_trace_context_common_pc(PyObject *obj, PyObject *args)
 	if (!PyArg_ParseTuple(args, "O", &context))
 		return NULL;
 
-	scripting_context = PyCObject_AsVoidPtr(context);
+	scripting_context = _PyCapsule_GetPointer(context, NULL);
 	retval = common_pc(scripting_context);
 
 	return Py_BuildValue("i", retval);
@@ -50,7 +60,7 @@ static PyObject *perf_trace_context_common_flags(PyObject *obj,
 	if (!PyArg_ParseTuple(args, "O", &context))
 		return NULL;
 
-	scripting_context = PyCObject_AsVoidPtr(context);
+	scripting_context = _PyCapsule_GetPointer(context, NULL);
 	retval = common_flags(scripting_context);
 
 	return Py_BuildValue("i", retval);
@@ -66,7 +76,7 @@ static PyObject *perf_trace_context_common_lock_depth(PyObject *obj,
 	if (!PyArg_ParseTuple(args, "O", &context))
 		return NULL;
 
-	scripting_context = PyCObject_AsVoidPtr(context);
+	scripting_context = _PyCapsule_GetPointer(context, NULL);
 	retval = common_lock_depth(scripting_context);
 
 	return Py_BuildValue("i", retval);
@@ -82,7 +92,25 @@ static PyMethodDef ContextMethods[] = {
 	{ NULL, NULL, 0, NULL}
 };
 
+#if PY_MAJOR_VERSION < 3
 PyMODINIT_FUNC initperf_trace_context(void)
 {
 	(void) Py_InitModule("perf_trace_context", ContextMethods);
 }
+#else
+PyMODINIT_FUNC PyInit_perf_trace_context(void)
+{
+	static struct PyModuleDef moduledef = {
+		PyModuleDef_HEAD_INIT,
+		"perf_trace_context",	/* m_name */
+		"",			/* m_doc */
+		-1,			/* m_size */
+		ContextMethods,		/* m_methods */
+		NULL,			/* m_reload */
+		NULL,			/* m_traverse */
+		NULL,			/* m_clear */
+		NULL,			/* m_free */
+	};
+	return PyModule_Create(&moduledef);
+}
+#endif
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index b1e999bd21ef..2918cac7a142 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -12,6 +12,30 @@
 #include "print_binary.h"
 #include "thread_map.h"
 
+#if PY_MAJOR_VERSION < 3
+#define _PyUnicode_FromString(arg) \
+  PyString_FromString(arg)
+#define _PyUnicode_AsString(arg) \
+  PyString_AsString(arg)
+#define _PyUnicode_FromFormat(...) \
+  PyString_FromFormat(__VA_ARGS__)
+#define _PyLong_FromLong(arg) \
+  PyInt_FromLong(arg)
+
+#else
+
+#define _PyUnicode_FromString(arg) \
+  PyUnicode_FromString(arg)
+#define _PyUnicode_FromFormat(...) \
+  PyUnicode_FromFormat(__VA_ARGS__)
+#define _PyLong_FromLong(arg) \
+  PyLong_FromLong(arg)
+#endif
+
+#ifndef Py_TYPE
+#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
+#endif
+
 /*
  * Provide these two so that we don't have to link against callchain.c and
  * start dragging hist.c, etc.
@@ -49,7 +73,11 @@ int eprintf(int level, int var, const char *fmt, ...)
 # define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size,
 #endif
 
+#if PY_MAJOR_VERSION < 3
 PyMODINIT_FUNC initperf(void);
+#else
+PyMODINIT_FUNC PyInit_perf(void);
+#endif
 
 #define member_def(type, member, ptype, help) \
 	{ #member, ptype, \
@@ -107,7 +135,7 @@ static PyObject *pyrf_mmap_event__repr(struct pyrf_event *pevent)
 		     pevent->event.mmap.pgoff, pevent->event.mmap.filename) < 0) {
 		ret = PyErr_NoMemory();
 	} else {
-		ret = PyString_FromString(s);
+		ret = _PyUnicode_FromString(s);
 		free(s);
 	}
 	return ret;
@@ -138,7 +166,7 @@ static PyMemberDef pyrf_task_event__members[] = {
 
 static PyObject *pyrf_task_event__repr(struct pyrf_event *pevent)
 {
-	return PyString_FromFormat("{ type: %s, pid: %u, ppid: %u, tid: %u, "
+	return _PyUnicode_FromFormat("{ type: %s, pid: %u, ppid: %u, tid: %u, "
 				   "ptid: %u, time: %" PRIu64 "}",
 				   pevent->event.header.type == PERF_RECORD_FORK ? "fork" : "exit",
 				   pevent->event.fork.pid,
@@ -171,7 +199,7 @@ static PyMemberDef pyrf_comm_event__members[] = {
 
 static PyObject *pyrf_comm_event__repr(struct pyrf_event *pevent)
 {
-	return PyString_FromFormat("{ type: comm, pid: %u, tid: %u, comm: %s }",
+	return _PyUnicode_FromFormat("{ type: comm, pid: %u, tid: %u, comm: %s }",
 				   pevent->event.comm.pid,
 				   pevent->event.comm.tid,
 				   pevent->event.comm.comm);
@@ -202,7 +230,7 @@ static PyObject *pyrf_throttle_event__repr(struct pyrf_event *pevent)
 {
 	struct throttle_event *te = (struct throttle_event *)(&pevent->event.header + 1);
 
-	return PyString_FromFormat("{ type: %sthrottle, time: %" PRIu64 ", id: %" PRIu64
+	return _PyUnicode_FromFormat("{ type: %sthrottle, time: %" PRIu64 ", id: %" PRIu64
 				   ", stream_id: %" PRIu64 " }",
 				   pevent->event.header.type == PERF_RECORD_THROTTLE ? "" : "un",
 				   te->time, te->id, te->stream_id);
@@ -237,7 +265,7 @@ static PyObject *pyrf_lost_event__repr(struct pyrf_event *pevent)
 		     pevent->event.lost.id, pevent->event.lost.lost) < 0) {
 		ret = PyErr_NoMemory();
 	} else {
-		ret = PyString_FromString(s);
+		ret = _PyUnicode_FromString(s);
 		free(s);
 	}
 	return ret;
@@ -264,7 +292,7 @@ static PyMemberDef pyrf_read_event__members[] = {
 
 static PyObject *pyrf_read_event__repr(struct pyrf_event *pevent)
 {
-	return PyString_FromFormat("{ type: read, pid: %u, tid: %u }",
+	return _PyUnicode_FromFormat("{ type: read, pid: %u, tid: %u }",
 				   pevent->event.read.pid,
 				   pevent->event.read.tid);
 	/*
@@ -299,7 +327,7 @@ static PyObject *pyrf_sample_event__repr(struct pyrf_event *pevent)
 	if (asprintf(&s, "{ type: sample }") < 0) {
 		ret = PyErr_NoMemory();
 	} else {
-		ret = PyString_FromString(s);
+		ret = _PyUnicode_FromString(s);
 		free(s);
 	}
 	return ret;
@@ -330,7 +358,7 @@ tracepoint_field(struct pyrf_event *pe, struct format_field *field)
 		}
 		if (field->flags & FIELD_IS_STRING &&
 		    is_printable_array(data + offset, len)) {
-			ret = PyString_FromString((char *)data + offset);
+			ret = _PyUnicode_FromString((char *)data + offset);
 		} else {
 			ret = PyByteArray_FromStringAndSize((const char *) data + offset, len);
 			field->flags &= ~FIELD_IS_STRING;
@@ -352,7 +380,7 @@ tracepoint_field(struct pyrf_event *pe, struct format_field *field)
 static PyObject*
 get_tracepoint_field(struct pyrf_event *pevent, PyObject *attr_name)
 {
-	const char *str = PyString_AsString(PyObject_Str(attr_name));
+	const char *str = _PyUnicode_AsString(PyObject_Str(attr_name));
 	struct perf_evsel *evsel = pevent->evsel;
 	struct format_field *field;
 
@@ -416,7 +444,7 @@ static PyObject *pyrf_context_switch_event__repr(struct pyrf_event *pevent)
 		     !!(pevent->event.header.misc & PERF_RECORD_MISC_SWITCH_OUT)) < 0) {
 		ret = PyErr_NoMemory();
 	} else {
-		ret = PyString_FromString(s);
+		ret = _PyUnicode_FromString(s);
 		free(s);
 	}
 	return ret;
@@ -528,7 +556,7 @@ static int pyrf_cpu_map__init(struct pyrf_cpu_map *pcpus,
 static void pyrf_cpu_map__delete(struct pyrf_cpu_map *pcpus)
 {
 	cpu_map__put(pcpus->cpus);
-	pcpus->ob_type->tp_free((PyObject*)pcpus);
+	Py_TYPE(pcpus)->tp_free((PyObject*)pcpus);
 }
 
 static Py_ssize_t pyrf_cpu_map__length(PyObject *obj)
@@ -597,7 +625,7 @@ static int pyrf_thread_map__init(struct pyrf_thread_map *pthreads,
 static void pyrf_thread_map__delete(struct pyrf_thread_map *pthreads)
 {
 	thread_map__put(pthreads->threads);
-	pthreads->ob_type->tp_free((PyObject*)pthreads);
+	Py_TYPE(pthreads)->tp_free((PyObject*)pthreads);
 }
 
 static Py_ssize_t pyrf_thread_map__length(PyObject *obj)
@@ -759,7 +787,7 @@ static int pyrf_evsel__init(struct pyrf_evsel *pevsel,
 static void pyrf_evsel__delete(struct pyrf_evsel *pevsel)
 {
 	perf_evsel__exit(&pevsel->evsel);
-	pevsel->ob_type->tp_free((PyObject*)pevsel);
+	Py_TYPE(pevsel)->tp_free((PyObject*)pevsel);
 }
 
 static PyObject *pyrf_evsel__open(struct pyrf_evsel *pevsel,
@@ -850,7 +878,7 @@ static int pyrf_evlist__init(struct pyrf_evlist *pevlist,
 static void pyrf_evlist__delete(struct pyrf_evlist *pevlist)
 {
 	perf_evlist__exit(&pevlist->evlist);
-	pevlist->ob_type->tp_free((PyObject*)pevlist);
+	Py_TYPE(pevlist)->tp_free((PyObject*)pevlist);
 }
 
 static PyObject *pyrf_evlist__mmap(struct pyrf_evlist *pevlist,
@@ -902,12 +930,16 @@ static PyObject *pyrf_evlist__get_pollfd(struct pyrf_evlist *pevlist,
 
 	for (i = 0; i < evlist->pollfd.nr; ++i) {
 		PyObject *file;
+#if PY_MAJOR_VERSION < 3
 		FILE *fp = fdopen(evlist->pollfd.entries[i].fd, "r");
 
 		if (fp == NULL)
 			goto free_list;
 
 		file = PyFile_FromFile(fp, "perf", "r", NULL);
+#else
+		file = PyFile_FromFd(evlist->pollfd.entries[i].fd, "perf", "r", -1, NULL, NULL, NULL, 1);
+#endif
 		if (file == NULL)
 			goto free_list;
 
@@ -1194,9 +1226,9 @@ static PyObject *pyrf__tracepoint(struct pyrf_evsel *pevsel,
 
 	tp_format = trace_event__tp_format(sys, name);
 	if (IS_ERR(tp_format))
-		return PyInt_FromLong(-1);
+		return _PyLong_FromLong(-1);
 
-	return PyInt_FromLong(tp_format->id);
+	return _PyLong_FromLong(tp_format->id);
 }
 
 static PyMethodDef perf__methods[] = {
@@ -1209,11 +1241,31 @@ static PyMethodDef perf__methods[] = {
 	{ .ml_name = NULL, }
 };
 
+#if PY_MAJOR_VERSION < 3
 PyMODINIT_FUNC initperf(void)
+#else
+PyMODINIT_FUNC PyInit_perf(void)
+#endif
 {
 	PyObject *obj;
 	int i;
-	PyObject *dict, *module = Py_InitModule("perf", perf__methods);
+	PyObject *dict;
+#if PY_MAJOR_VERSION < 3
+	PyObject *module = Py_InitModule("perf", perf__methods);
+#else
+	static struct PyModuleDef moduledef = {
+		PyModuleDef_HEAD_INIT,
+		"perf",			/* m_name */
+		"",			/* m_doc */
+		-1,			/* m_size */
+		perf__methods,		/* m_methods */
+		NULL,			/* m_reload */
+		NULL,			/* m_traverse */
+		NULL,			/* m_clear */
+		NULL,			/* m_free */
+	};
+	PyObject *module = PyModule_Create(&moduledef);
+#endif
 
 	if (module == NULL ||
 	    pyrf_event__setup_types() < 0 ||
@@ -1221,7 +1273,11 @@ PyMODINIT_FUNC initperf(void)
 	    pyrf_evsel__setup_types() < 0 ||
 	    pyrf_thread_map__setup_types() < 0 ||
 	    pyrf_cpu_map__setup_types() < 0)
+#if PY_MAJOR_VERSION < 3
 		return;
+#else
+		return module;
+#endif
 
 	/* The page_size is placed in util object. */
 	page_size = sysconf(_SC_PAGE_SIZE);
@@ -1270,7 +1326,7 @@ PyMODINIT_FUNC initperf(void)
 		goto error;
 
 	for (i = 0; perf__constants[i].name != NULL; i++) {
-		obj = PyInt_FromLong(perf__constants[i].value);
+		obj = _PyLong_FromLong(perf__constants[i].value);
 		if (obj == NULL)
 			goto error;
 		PyDict_SetItemString(dict, perf__constants[i].name, obj);
@@ -1280,6 +1336,9 @@ PyMODINIT_FUNC initperf(void)
 error:
 	if (PyErr_Occurred())
 		PyErr_SetString(PyExc_ImportError, "perf: Init failed!");
+#if PY_MAJOR_VERSION >= 3
+	return module;
+#endif
 }
 
 /*
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index ea070883c593..10dd5fce082b 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -49,7 +49,37 @@
 #include "print_binary.h"
 #include "stat.h"
 
+#if PY_MAJOR_VERSION < 3
+#define _PyUnicode_FromString(arg) \
+  PyString_FromString(arg)
+#define _PyUnicode_FromStringAndSize(arg1, arg2) \
+  PyString_FromStringAndSize((arg1), (arg2))
+#define _PyBytes_FromStringAndSize(arg1, arg2) \
+  PyString_FromStringAndSize((arg1), (arg2))
+#define _PyLong_FromLong(arg) \
+  PyInt_FromLong(arg)
+#define _PyLong_AsLong(arg) \
+  PyInt_AsLong(arg)
+#define _PyCapsule_New(arg1, arg2, arg3) \
+  PyCObject_FromVoidPtr((arg1), (arg2))
+
 PyMODINIT_FUNC initperf_trace_context(void);
+#else
+#define _PyUnicode_FromString(arg) \
+  PyUnicode_FromString(arg)
+#define _PyUnicode_FromStringAndSize(arg1, arg2) \
+  PyUnicode_FromStringAndSize((arg1), (arg2))
+#define _PyBytes_FromStringAndSize(arg1, arg2) \
+  PyBytes_FromStringAndSize((arg1), (arg2))
+#define _PyLong_FromLong(arg) \
+  PyLong_FromLong(arg)
+#define _PyLong_AsLong(arg) \
+  PyLong_AsLong(arg)
+#define _PyCapsule_New(arg1, arg2, arg3) \
+  PyCapsule_New((arg1), (arg2), (arg3))
+
+PyMODINIT_FUNC PyInit_perf_trace_context(void);
+#endif
 
 #define TRACE_EVENT_TYPE_MAX				\
 	((1 << (sizeof(unsigned short) * 8)) - 1)
@@ -135,7 +165,7 @@ static int get_argument_count(PyObject *handler)
 		PyObject *arg_count_obj = PyObject_GetAttrString(code_obj,
 			"co_argcount");
 		if (arg_count_obj) {
-			arg_count = (int) PyInt_AsLong(arg_count_obj);
+			arg_count = (int) _PyLong_AsLong(arg_count_obj);
 			Py_DECREF(arg_count_obj);
 		}
 		Py_DECREF(code_obj);
@@ -182,10 +212,10 @@ static void define_value(enum print_arg_type field_type,
 
 	value = eval_flag(field_value);
 
-	PyTuple_SetItem(t, n++, PyString_FromString(ev_name));
-	PyTuple_SetItem(t, n++, PyString_FromString(field_name));
-	PyTuple_SetItem(t, n++, PyInt_FromLong(value));
-	PyTuple_SetItem(t, n++, PyString_FromString(field_str));
+	PyTuple_SetItem(t, n++, _PyUnicode_FromString(ev_name));
+	PyTuple_SetItem(t, n++, _PyUnicode_FromString(field_name));
+	PyTuple_SetItem(t, n++, _PyLong_FromLong(value));
+	PyTuple_SetItem(t, n++, _PyUnicode_FromString(field_str));
 
 	try_call_object(handler_name, t);
 
@@ -223,10 +253,10 @@ static void define_field(enum print_arg_type field_type,
 	if (!t)
 		Py_FatalError("couldn't create Python tuple");
 
-	PyTuple_SetItem(t, n++, PyString_FromString(ev_name));
-	PyTuple_SetItem(t, n++, PyString_FromString(field_name));
+	PyTuple_SetItem(t, n++, _PyUnicode_FromString(ev_name));
+	PyTuple_SetItem(t, n++, _PyUnicode_FromString(field_name));
 	if (field_type == PRINT_FLAGS)
-		PyTuple_SetItem(t, n++, PyString_FromString(delim));
+		PyTuple_SetItem(t, n++, _PyUnicode_FromString(delim));
 
 	try_call_object(handler_name, t);
 
@@ -325,12 +355,12 @@ static PyObject *get_field_numeric_entry(struct event_format *event,
 		if (field->flags & FIELD_IS_SIGNED) {
 			if ((long long)val >= LONG_MIN &&
 					(long long)val <= LONG_MAX)
-				obj = PyInt_FromLong(val);
+				obj = _PyLong_FromLong(val);
 			else
 				obj = PyLong_FromLongLong(val);
 		} else {
 			if (val <= LONG_MAX)
-				obj = PyInt_FromLong(val);
+				obj = _PyLong_FromLong(val);
 			else
 				obj = PyLong_FromUnsignedLongLong(val);
 		}
@@ -389,9 +419,9 @@ static PyObject *python_process_callchain(struct perf_sample *sample,
 			pydict_set_item_string_decref(pysym, "end",
 					PyLong_FromUnsignedLongLong(node->sym->end));
 			pydict_set_item_string_decref(pysym, "binding",
-					PyInt_FromLong(node->sym->binding));
+					_PyLong_FromLong(node->sym->binding));
 			pydict_set_item_string_decref(pysym, "name",
-					PyString_FromStringAndSize(node->sym->name,
+					_PyUnicode_FromStringAndSize(node->sym->name,
 							node->sym->namelen));
 			pydict_set_item_string_decref(pyelem, "sym", pysym);
 		}
@@ -406,7 +436,7 @@ static PyObject *python_process_callchain(struct perf_sample *sample,
 					dsoname = map->dso->name;
 			}
 			pydict_set_item_string_decref(pyelem, "dso",
-					PyString_FromString(dsoname));
+					_PyUnicode_FromString(dsoname));
 		}
 
 		callchain_cursor_advance(&callchain_cursor);
@@ -483,16 +513,16 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample,
 	if (!dict_sample)
 		Py_FatalError("couldn't create Python dictionary");
 
-	pydict_set_item_string_decref(dict, "ev_name", PyString_FromString(perf_evsel__name(evsel)));
-	pydict_set_item_string_decref(dict, "attr", PyString_FromStringAndSize(
+	pydict_set_item_string_decref(dict, "ev_name", _PyUnicode_FromString(perf_evsel__name(evsel)));
+	pydict_set_item_string_decref(dict, "attr", _PyUnicode_FromStringAndSize(
 			(const char *)&evsel->attr, sizeof(evsel->attr)));
 
 	pydict_set_item_string_decref(dict_sample, "pid",
-			PyInt_FromLong(sample->pid));
+			_PyLong_FromLong(sample->pid));
 	pydict_set_item_string_decref(dict_sample, "tid",
-			PyInt_FromLong(sample->tid));
+			_PyLong_FromLong(sample->tid));
 	pydict_set_item_string_decref(dict_sample, "cpu",
-			PyInt_FromLong(sample->cpu));
+			_PyLong_FromLong(sample->cpu));
 	pydict_set_item_string_decref(dict_sample, "ip",
 			PyLong_FromUnsignedLongLong(sample->ip));
 	pydict_set_item_string_decref(dict_sample, "time",
@@ -504,17 +534,17 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample,
 	set_sample_read_in_dict(dict_sample, sample, evsel);
 	pydict_set_item_string_decref(dict, "sample", dict_sample);
 
-	pydict_set_item_string_decref(dict, "raw_buf", PyString_FromStringAndSize(
+	pydict_set_item_string_decref(dict, "raw_buf", _PyBytes_FromStringAndSize(
 			(const char *)sample->raw_data, sample->raw_size));
 	pydict_set_item_string_decref(dict, "comm",
-			PyString_FromString(thread__comm_str(al->thread)));
+			_PyUnicode_FromString(thread__comm_str(al->thread)));
 	if (al->map) {
 		pydict_set_item_string_decref(dict, "dso",
-			PyString_FromString(al->map->dso->name));
+			_PyUnicode_FromString(al->map->dso->name));
 	}
 	if (al->sym) {
 		pydict_set_item_string_decref(dict, "symbol",
-			PyString_FromString(al->sym->name));
+			_PyUnicode_FromString(al->sym->name));
 	}
 
 	pydict_set_item_string_decref(dict, "callchain", callchain);
@@ -574,9 +604,9 @@ static void python_process_tracepoint(struct perf_sample *sample,
 	scripting_context->event_data = data;
 	scripting_context->pevent = evsel->tp_format->pevent;
 
-	context = PyCObject_FromVoidPtr(scripting_context, NULL);
+	context = _PyCapsule_New(scripting_context, NULL, NULL);
 
-	PyTuple_SetItem(t, n++, PyString_FromString(handler_name));
+	PyTuple_SetItem(t, n++, _PyUnicode_FromString(handler_name));
 	PyTuple_SetItem(t, n++, context);
 
 	/* ip unwinding */
@@ -585,18 +615,18 @@ static void python_process_tracepoint(struct perf_sample *sample,
 	Py_INCREF(callchain);
 
 	if (!dict) {
-		PyTuple_SetItem(t, n++, PyInt_FromLong(cpu));
-		PyTuple_SetItem(t, n++, PyInt_FromLong(s));
-		PyTuple_SetItem(t, n++, PyInt_FromLong(ns));
-		PyTuple_SetItem(t, n++, PyInt_FromLong(pid));
-		PyTuple_SetItem(t, n++, PyString_FromString(comm));
+		PyTuple_SetItem(t, n++, _PyLong_FromLong(cpu));
+		PyTuple_SetItem(t, n++, _PyLong_FromLong(s));
+		PyTuple_SetItem(t, n++, _PyLong_FromLong(ns));
+		PyTuple_SetItem(t, n++, _PyLong_FromLong(pid));
+		PyTuple_SetItem(t, n++, _PyUnicode_FromString(comm));
 		PyTuple_SetItem(t, n++, callchain);
 	} else {
-		pydict_set_item_string_decref(dict, "common_cpu", PyInt_FromLong(cpu));
-		pydict_set_item_string_decref(dict, "common_s", PyInt_FromLong(s));
-		pydict_set_item_string_decref(dict, "common_ns", PyInt_FromLong(ns));
-		pydict_set_item_string_decref(dict, "common_pid", PyInt_FromLong(pid));
-		pydict_set_item_string_decref(dict, "common_comm", PyString_FromString(comm));
+		pydict_set_item_string_decref(dict, "common_cpu", _PyLong_FromLong(cpu));
+		pydict_set_item_string_decref(dict, "common_s", _PyLong_FromLong(s));
+		pydict_set_item_string_decref(dict, "common_ns", _PyLong_FromLong(ns));
+		pydict_set_item_string_decref(dict, "common_pid", _PyLong_FromLong(pid));
+		pydict_set_item_string_decref(dict, "common_comm", _PyUnicode_FromString(comm));
 		pydict_set_item_string_decref(dict, "common_callchain", callchain);
 	}
 	for (field = event->format.fields; field; field = field->next) {
@@ -615,7 +645,7 @@ static void python_process_tracepoint(struct perf_sample *sample,
 			}
 			if (field->flags & FIELD_IS_STRING &&
 			    is_printable_array(data + offset, len)) {
-				obj = PyString_FromString((char *) data + offset);
+				obj = _PyUnicode_FromString((char *) data + offset);
 			} else {
 				obj = PyByteArray_FromStringAndSize((const char *) data + offset, len);
 				field->flags &= ~FIELD_IS_STRING;
@@ -668,7 +698,7 @@ static PyObject *tuple_new(unsigned int sz)
 static int tuple_set_u64(PyObject *t, unsigned int pos, u64 val)
 {
 #if BITS_PER_LONG == 64
-	return PyTuple_SetItem(t, pos, PyInt_FromLong(val));
+	return PyTuple_SetItem(t, pos, _PyLong_FromLong(val));
 #endif
 #if BITS_PER_LONG == 32
 	return PyTuple_SetItem(t, pos, PyLong_FromLongLong(val));
@@ -677,12 +707,12 @@ static int tuple_set_u64(PyObject *t, unsigned int pos, u64 val)
 
 static int tuple_set_s32(PyObject *t, unsigned int pos, s32 val)
 {
-	return PyTuple_SetItem(t, pos, PyInt_FromLong(val));
+	return PyTuple_SetItem(t, pos, _PyLong_FromLong(val));
 }
 
 static int tuple_set_string(PyObject *t, unsigned int pos, const char *s)
 {
-	return PyTuple_SetItem(t, pos, PyString_FromString(s));
+	return PyTuple_SetItem(t, pos, _PyUnicode_FromString(s));
 }
 
 static int python_export_evsel(struct db_export *dbe, struct perf_evsel *evsel)
@@ -1029,8 +1059,8 @@ process_stat(struct perf_evsel *counter, int cpu, int thread, u64 tstamp,
 		return;
 	}
 
-	PyTuple_SetItem(t, n++, PyInt_FromLong(cpu));
-	PyTuple_SetItem(t, n++, PyInt_FromLong(thread));
+	PyTuple_SetItem(t, n++, _PyLong_FromLong(cpu));
+	PyTuple_SetItem(t, n++, _PyLong_FromLong(thread));
 
 	tuple_set_u64(t, n++, tstamp);
 	tuple_set_u64(t, n++, count->val);
@@ -1212,27 +1242,58 @@ static void set_table_handlers(struct tables *tables)
 	SET_TABLE_HANDLER(call_return);
 }
 
+#if PY_MAJOR_VERSION < 3
+static void _free_command_line(const char **command_line, int num)
+{
+	free(command_line);
+}
+#else
+static void _free_command_line(wchar_t **command_line, int num)
+{
+	int i;
+	for (i = 0; i < num; i++)
+		PyMem_RawFree(command_line[i]);
+	free(command_line);
+}
+#endif
+
+
 /*
  * Start trace script
  */
 static int python_start_script(const char *script, int argc, const char **argv)
 {
 	struct tables *tables = &tables_global;
+#if PY_MAJOR_VERSION < 3
 	const char **command_line;
+#else
+	wchar_t **command_line;
+#endif
 	char buf[PATH_MAX];
 	int i, err = 0;
 	FILE *fp;
 
+#if PY_MAJOR_VERSION < 3
 	command_line = malloc((argc + 1) * sizeof(const char *));
 	command_line[0] = script;
 	for (i = 1; i < argc + 1; i++)
 		command_line[i] = argv[i - 1];
+#else
+	command_line = malloc((argc + 1) * sizeof(wchar_t *));
+	command_line[0] = Py_DecodeLocale(script, NULL);
+	for (i = 1; i < argc + 1; i++)
+		command_line[i] = Py_DecodeLocale(argv[i - 1], NULL);
+#endif
 
 	Py_Initialize();
 
+#if PY_MAJOR_VERSION < 3
 	initperf_trace_context();
-
 	PySys_SetArgv(argc + 1, (char **)command_line);
+#else
+	PyInit_perf_trace_context();
+	PySys_SetArgv(argc + 1, command_line);
+#endif
 
 	fp = fopen(script, "r");
 	if (!fp) {
@@ -1262,12 +1323,12 @@ static int python_start_script(const char *script, int argc, const char **argv)
 			goto error;
 	}
 
-	free(command_line);
+	_free_command_line(command_line, argc + 1);
 
 	return err;
 error:
 	Py_Finalize();
-	free(command_line);
+	_free_command_line(command_line, argc + 1);
 
 	return err;
 }
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
index af415febbc46..6891635b50c3 100644
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python2
+#!/usr/bin/python
 
 from os import getenv
 
@@ -35,11 +35,11 @@ build_tmp = getenv('PYTHON_EXTBUILD_TMP')
 libtraceevent = getenv('LIBTRACEEVENT')
 libapikfs = getenv('LIBAPI')
 
-ext_sources = [f.strip() for f in file('util/python-ext-sources')
+ext_sources = [f.strip() for f in open('util/python-ext-sources')
 				if len(f.strip()) > 0 and f[0] != '#']
 
 # use full paths with source files
-ext_sources = map(lambda x: '%s/%s' % (src_perf, x) , ext_sources)
+ext_sources = list(map(lambda x: '%s/%s' % (src_perf, x) , ext_sources))
 
 perf = Extension('perf',
 		  sources = ext_sources,
-- 
cgit v1.2.3


From de7112868829b3286def38297848d5d2592b4a70 Mon Sep 17 00:00:00 2001
From: Sangwon Hong <qpakzk@gmail.com>
Date: Mon, 12 Feb 2018 04:37:44 +0900
Subject: perf kallsyms: Fix the usage on the man page

First, all man pages highlight only perf and subcommands except 'perf
kallsyms', which includes the full usage. Fix it for commands to
monopolize underlines.

Second, options can be ommited when executing 'perf kallsyms', so add
square brackets between <option>.

Signed-off-by: Sangwon Hong <qpakzk@gmail.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Taeung Song <treeze.taeung@gmail.com>
Link: http://lkml.kernel.org/r/1518377864-20353-1-git-send-email-qpakzk@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-kallsyms.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/Documentation/perf-kallsyms.txt b/tools/perf/Documentation/perf-kallsyms.txt
index 954ea9e21236..cf9f4040ea5c 100644
--- a/tools/perf/Documentation/perf-kallsyms.txt
+++ b/tools/perf/Documentation/perf-kallsyms.txt
@@ -8,7 +8,7 @@ perf-kallsyms - Searches running kernel for symbols
 SYNOPSIS
 --------
 [verse]
-'perf kallsyms <options> symbol_name[,symbol_name...]'
+'perf kallsyms' [<options>] symbol_name[,symbol_name...]
 
 DESCRIPTION
 -----------
-- 
cgit v1.2.3


From 42811d509d6e0b0118918ce6be346be54d8e8801 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 5 Oct 2017 19:00:28 -0700
Subject: perf stat: Use xyarray dimensions to iterate fds

Now that the xyarray stores the dimensions we can use those
to iterate over the FDs for a evsel.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: http://lkml.kernel.org/r/20171006020029.13339-1-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 2d49eccf98f2..fadcff52cd09 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -508,14 +508,13 @@ static int perf_stat_synthesize_config(bool is_pipe)
 
 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
 
-static int __store_counter_ids(struct perf_evsel *counter,
-			       struct cpu_map *cpus,
-			       struct thread_map *threads)
+static int __store_counter_ids(struct perf_evsel *counter)
 {
 	int cpu, thread;
 
-	for (cpu = 0; cpu < cpus->nr; cpu++) {
-		for (thread = 0; thread < threads->nr; thread++) {
+	for (cpu = 0; cpu < xyarray__max_x(counter->fd); cpu++) {
+		for (thread = 0; thread < xyarray__max_y(counter->fd);
+		     thread++) {
 			int fd = FD(counter, cpu, thread);
 
 			if (perf_evlist__id_add_fd(evsel_list, counter,
@@ -535,7 +534,7 @@ static int store_counter_ids(struct perf_evsel *counter)
 	if (perf_evsel__alloc_id(counter, cpus->nr, threads->nr))
 		return -ENOMEM;
 
-	return __store_counter_ids(counter, cpus, threads);
+	return __store_counter_ids(counter);
 }
 
 static bool perf_evsel__should_store_id(struct perf_evsel *counter)
-- 
cgit v1.2.3


From 25f72f9ed88d5be86c92432fc779e2725e3506cd Mon Sep 17 00:00:00 2001
From: weiping zhang <zhangweiping@didichuxing.com>
Date: Mon, 29 Jan 2018 23:48:09 +0800
Subject: perf cgroup: Simplify arguments when tracking multiple events

When using -G with one cgroup and -e with multiple events, only the
first event gets the correct cgroup setting, all events from the second
onwards will track system-wide events.

If the user wants to track multiple events for a specific cgroup, the
user must give parameters like the following:

  $ perf stat -e e1 -e e2 -e e3 -G test,test,test

This patch simplify this case, just type one cgroup:

  $ perf stat -e e1 -e e2 -e e3 -G test

  $ mkdir -p /sys/fs/cgroup/perf_event/empty_cgroup
  $ perf stat -e cycles -e cache-misses -a -I 1000 -G empty_cgroup

Before:

     1.001007226   <not counted>      cycles	   empty_cgroup
     1.001007226           7,506      cache-misses

After:

     1.000834097   <not counted>      cycles	   empty_cgroup
     1.000834097   <not counted>      cache-misses empty_cgroup

Signed-off-by: weiping zhang <zhangweiping@didichuxing.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180129154805.GA6284@localhost.didichuxing.com
[ Improved the doc text a bit, providing an example for cgroup + system wide counting ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-record.txt |  6 +++++-
 tools/perf/Documentation/perf-stat.txt   |  6 +++++-
 tools/perf/util/cgroup.c                 | 17 ++++++++++++++++-
 3 files changed, 26 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 3eea6de35a38..76bc2181d214 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -308,7 +308,11 @@ can be provided. Each cgroup is applied to the corresponding event, i.e., first
 to first event, second cgroup to second event and so on. It is possible to provide
 an empty cgroup (monitor all the time) using, e.g., -G foo,,bar. Cgroups must have
 corresponding events, i.e., they always refer to events defined earlier on the command
-line.
+line. If the user wants to track multiple events for a specific cgroup, the user can
+use '-e e1 -e e2 -G foo,foo' or just use '-e e1 -e e2 -G foo'.
+
+If wanting to monitor, say, 'cycles' for a cgroup and also for system wide, this
+command line can be used: 'perf stat -e cycles -G cgroup_name -a -e cycles'.
 
 -b::
 --branch-any::
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 2bbe79a50d3c..2b38e222016a 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -118,7 +118,11 @@ can be provided. Each cgroup is applied to the corresponding event, i.e., first
 to first event, second cgroup to second event and so on. It is possible to provide
 an empty cgroup (monitor all the time) using, e.g., -G foo,,bar. Cgroups must have
 corresponding events, i.e., they always refer to events defined earlier on the command
-line.
+line. If the user wants to track multiple events for a specific cgroup, the user can
+use '-e e1 -e e2 -G foo,foo' or just use '-e e1 -e e2 -G foo'.
+
+If wanting to monitor, say, 'cycles' for a cgroup and also for system wide, this
+command line can be used: 'perf stat -e cycles -G cgroup_name -a -e cycles'.
 
 -o file::
 --output file::
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index 984f69144f87..5dd9b5ea314d 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -157,9 +157,11 @@ int parse_cgroups(const struct option *opt __maybe_unused, const char *str,
 		  int unset __maybe_unused)
 {
 	struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
+	struct perf_evsel *counter;
+	struct cgroup_sel *cgrp = NULL;
 	const char *p, *e, *eos = str + strlen(str);
 	char *s;
-	int ret;
+	int ret, i;
 
 	if (list_empty(&evlist->entries)) {
 		fprintf(stderr, "must define events before cgroups\n");
@@ -188,5 +190,18 @@ int parse_cgroups(const struct option *opt __maybe_unused, const char *str,
 			break;
 		str = p+1;
 	}
+	/* for the case one cgroup combine to multiple events */
+	i = 0;
+	if (nr_cgroups == 1) {
+		evlist__for_each_entry(evlist, counter) {
+			if (i == 0)
+				cgrp = counter->cgrp;
+			else {
+				counter->cgrp = cgrp;
+				refcount_inc(&cgrp->refcnt);
+			}
+			i++;
+		}
+	}
 	return 0;
 }
-- 
cgit v1.2.3


From 853745f5e6d95faaae6381c9a01dbd43de992fb3 Mon Sep 17 00:00:00 2001
From: Kan Liang <Kan.liang@intel.com>
Date: Mon, 26 Feb 2018 10:17:10 -0800
Subject: perf top: Fix annoying fallback message on older kernels
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On older (e.g. v4.4) kernels, an annoying fallback message can be
observed in 'perf top':

	┌─Warning:──────────────────────┐
	│fall back to non-overwrite mode│
	│                               │
	│                               │
	│Press any key...               │
	└───────────────────────────────┘

The 'perf top' utility has been changed to overwrite mode since commit
ebebbf082357 ("perf top: Switch default mode to overwrite mode").

For older kernels which don't have overwrite mode support, 'perf top'
will fall back to non-overwrite mode and print out the fallback message
using ui__warning(), which needs user's input to close.

The fallback message is not critical for end users. Turning it to debug
message which is printed when running with -vv.

Reported-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Kan Liang <kan.liang@intel.com>
Cc: Kan Liang <kan.liang@intel.com>
Fixes: ebebbf082357 ("perf top: Switch default mode to overwrite mode")
Link: http://lkml.kernel.org/r/1519669030-176549-1-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-top.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index b7c823ba8374..35ac016fcb98 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -991,7 +991,7 @@ static int perf_top_overwrite_fallback(struct perf_top *top,
 	evlist__for_each_entry(evlist, counter)
 		counter->attr.write_backward = false;
 	opts->overwrite = false;
-	ui__warning("fall back to non-overwrite mode\n");
+	pr_debug2("fall back to non-overwrite mode\n");
 	return 1;
 }
 
-- 
cgit v1.2.3


From ab6c79b819f5a50cf41a11ebec17bef63b530333 Mon Sep 17 00:00:00 2001
From: Jin Yao <yao.jin@linux.intel.com>
Date: Tue, 16 Jan 2018 23:43:08 +0800
Subject: perf stat: Ignore error thread when enabling system-wide --per-thread

If we execute 'perf stat --per-thread' with non-root account (even set
kernel.perf_event_paranoid = -1 yet), it reports the error:

  jinyao@skl:~$ perf stat --per-thread
  Error:
  You may not have permission to collect system-wide stats.

  Consider tweaking /proc/sys/kernel/perf_event_paranoid,
  which controls use of the performance events system by
  unprivileged users (without CAP_SYS_ADMIN).

  The current value is 2:

    -1: Allow use of (almost) all events by all users
        Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK
  >= 0: Disallow ftrace function tracepoint by users without CAP_SYS_ADMIN
        Disallow raw tracepoint access by users without CAP_SYS_ADMIN
  >= 1: Disallow CPU event access by users without CAP_SYS_ADMIN
  >= 2: Disallow kernel profiling by users without CAP_SYS_ADMIN

  To make this setting permanent, edit /etc/sysctl.conf too, e.g.:

          kernel.perf_event_paranoid = -1

Perhaps the ptrace rule doesn't allow to trace some processes. But anyway
the global --per-thread mode had better ignore such errors and continue
working on other threads.

This patch will record the index of error thread in perf_evsel__open()
and remove this thread before retrying.

For example (run with non-root, kernel.perf_event_paranoid isn't set):

  jinyao@skl:~$ perf stat --per-thread
  ^C
   Performance counter stats for 'system wide':

         vmstat-3458    6.171984   cpu-clock:u (msec) #  0.000 CPUs utilized
           perf-3670    0.515599   cpu-clock:u (msec) #  0.000 CPUs utilized
         vmstat-3458   1,163,643   cycles:u           #  0.189 GHz
           perf-3670      40,881   cycles:u           #  0.079 GHz
         vmstat-3458   1,410,238   instructions:u     #  1.21  insn per cycle
           perf-3670       3,536   instructions:u     #  0.09  insn per cycle
         vmstat-3458     288,937   branches:u         # 46.814 M/sec
           perf-3670         936   branches:u         #  1.815 M/sec
         vmstat-3458      15,195   branch-misses:u    #  5.26% of all branches
           perf-3670          76   branch-misses:u    #  8.12% of all branches

        12.651675247 seconds time elapsed

Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1516117388-10120-1-git-send-email-yao.jin@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c    | 14 +++++++++++++-
 tools/perf/util/evsel.c      |  3 +++
 tools/perf/util/thread_map.c |  1 +
 tools/perf/util/thread_map.h |  1 +
 4 files changed, 18 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index fadcff52cd09..6214d2b220b2 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -637,7 +637,19 @@ try_again:
                                 if (verbose > 0)
                                         ui__warning("%s\n", msg);
                                 goto try_again;
-                        }
+			} else if (target__has_per_thread(&target) &&
+				   evsel_list->threads &&
+				   evsel_list->threads->err_thread != -1) {
+				/*
+				 * For global --per-thread case, skip current
+				 * error thread.
+				 */
+				if (!thread_map__remove(evsel_list->threads,
+							evsel_list->threads->err_thread)) {
+					evsel_list->threads->err_thread = -1;
+					goto try_again;
+				}
+			}
 
 			perf_evsel__open_strerror(counter, &target,
 						  errno, msg, sizeof(msg));
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index ef351688b797..b56e1c2ddaee 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1915,6 +1915,9 @@ try_fallback:
 		goto fallback_missing_features;
 	}
 out_close:
+	if (err)
+		threads->err_thread = thread;
+
 	do {
 		while (--thread >= 0) {
 			close(FD(evsel, cpu, thread));
diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c
index 729dad8f412d..5d467d8ae9ab 100644
--- a/tools/perf/util/thread_map.c
+++ b/tools/perf/util/thread_map.c
@@ -32,6 +32,7 @@ static void thread_map__reset(struct thread_map *map, int start, int nr)
 	size_t size = (nr - start) * sizeof(map->map[0]);
 
 	memset(&map->map[start], 0, size);
+	map->err_thread = -1;
 }
 
 static struct thread_map *thread_map__realloc(struct thread_map *map, int nr)
diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h
index 5ec91cfd1869..2f689c90a8c6 100644
--- a/tools/perf/util/thread_map.h
+++ b/tools/perf/util/thread_map.h
@@ -14,6 +14,7 @@ struct thread_map_data {
 struct thread_map {
 	refcount_t refcnt;
 	int nr;
+	int err_thread;
 	struct thread_map_data map[];
 };
 
-- 
cgit v1.2.3


From 9c04409d7f5c325233961673356ea8aced6a4ef3 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 1 Mar 2018 11:33:59 -0300
Subject: perf annotate browser: Be more robust when drawing jump arrows
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This first happened with a gcc function, _cpp_lex_token, that has the
usual jumps:

 │1159e6c: ↓ jne    115aa32 <_cpp_lex_token@@Base+0xf92>

I.e. jumps to a label inside that function (_cpp_lex_token), and those
works, but also this kind:

 │1159e8b: ↓ jne    c469be <cpp_named_operator2name@@Base+0xa72>

I.e. jumps to another function, outside _cpp_lex_token, which are not
being correctly handled generating as a side effect references to
ab->offset[] entries that are set to NULL, so to make this code more
robust, check that here.

A proper fix for will be put in place, looking at the function name
right after the '<' token and probably treating this like a 'call'
instruction.

For now just don't draw the arrow.

Reported-by: Ingo Molnar <mingo@kernel.org>
Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Cc: Jin Yao <yao.jin@intel.com>
Cc: Kan Liang <kan.liang@intel.com>
Link: https://lkml.kernel.org/n/tip-5tzvb875ep2sel03aeefgmud@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/annotate.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index e2f666391ac4..6ff6839558b0 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -328,7 +328,32 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
 	if (!disasm_line__is_valid_jump(cursor, sym))
 		return;
 
+	/*
+	 * This first was seen with a gcc function, _cpp_lex_token, that
+	 * has the usual jumps:
+	 *
+	 *  │1159e6c: ↓ jne    115aa32 <_cpp_lex_token@@Base+0xf92>
+	 *
+	 * I.e. jumps to a label inside that function (_cpp_lex_token), and
+	 * those works, but also this kind:
+	 *
+	 *  │1159e8b: ↓ jne    c469be <cpp_named_operator2name@@Base+0xa72>
+	 *
+	 *  I.e. jumps to another function, outside _cpp_lex_token, which
+	 *  are not being correctly handled generating as a side effect references
+	 *  to ab->offset[] entries that are set to NULL, so to make this code
+	 *  more robust, check that here.
+	 *
+	 *  A proper fix for will be put in place, looking at the function
+	 *  name right after the '<' token and probably treating this like a
+	 *  'call' instruction.
+	 */
 	target = ab->offsets[cursor->ops.target.offset];
+	if (target == NULL) {
+		ui_helpline__printf("WARN: jump target inconsistency, press 'o', ab->offsets[%#x] = NULL\n",
+				    cursor->ops.target.offset);
+		return;
+	}
 
 	bcursor = browser_line(&cursor->al);
 	btarget = browser_line(target);
-- 
cgit v1.2.3


From a18ee796f8af5569628c324700b9a34b88884488 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Thu, 1 Mar 2018 17:52:14 +0100
Subject: perf tests: Switch trace+probe_libc_inet_pton to use record

There's a problem with relying on backtrace data from 'perf trace' the
way the trace+probe_libc_inet_pton does. This test inserts uprobe within
ping binary and checks that it gets its sample using 'perf trace'.

It also checks it gets proper backtrace from sample and that's where the
issue is.

The 'perf trace' does not sort events (by definition) so it can happen
that it processes the event sample before the ping binary memory map
event. This can (very rarely) happen as proved by this events dump
output (from custom added debug output):

  ...
  7680/7680: [0x7f4e29718000(0x204000) @ 0 fd:00 33611321 4230892504]: r-xp /usr/lib64/libdl-2.17.so
  7680/7680: [0x7f4e29502000(0x216000) @ 0 fd:00 33617257 2606846872]: r-xp /usr/lib64/libz.so.1.2.7
  (IP, 0x2): 7680/7680: 0x7f4e29c2ed60 period: 1 addr: 0
  7680/7680: [0x564842ef0000(0x233000) @ 0 fd:00 83 1989280200]: r-xp /usr/bin/ping
  7680/7680: [0x7f4e2aca2000(0x224000) @ 0 fd:00 33611308 1219144940]: r-xp /usr/lib64/ld-2.17.so
  ...

In this case 'perf trace' fails to resolve the last callchain IP (within
the ping binary) because it does not know about the ping binary memory
map yet and the test fails like this:

  PING ::1(::1) 56 data bytes
  64 bytes from ::1: icmp_seq=1 ttl=64 time=0.037 ms
  --- ::1 ping statistics ---
  1 packets transmitted, 1 received, 0% packet loss, time 0ms
  rtt min/avg/max/mdev = 0.037/0.037/0.037/0.000 ms
  0.000 probe_libc:inet_pton:(7f4e29c2ed60))
  __GI___inet_pton (/usr/lib64/libc-2.17.so)
  getaddrinfo (/usr/lib64/libc-2.17.so)
  [0] ([unknown])
  FAIL: expected backtrace entry 8 ".*\(.*/bin/ping.*\)$" got "[0] ([unknown])"

Switching the test to use 'perf record' and 'perf script' instead of
'perf trace'.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180301165215.6780-1-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../perf/tests/shell/trace+probe_libc_inet_pton.sh | 30 +++++++++++-----------
 1 file changed, 15 insertions(+), 15 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh b/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh
index 8c4ab0b390c0..52c3ee701a89 100755
--- a/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh
+++ b/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh
@@ -15,30 +15,28 @@ nm -g $libc 2>/dev/null | fgrep -q inet_pton || exit 254
 
 trace_libc_inet_pton_backtrace() {
 	idx=0
-	expected[0]="PING.*bytes"
-	expected[1]="64 bytes from ::1.*"
-	expected[2]=".*ping statistics.*"
-	expected[3]=".*packets transmitted.*"
-	expected[4]="rtt min.*"
-	expected[5]="[0-9]+\.[0-9]+[[:space:]]+probe_libc:inet_pton:\([[:xdigit:]]+\)"
-	expected[6]=".*inet_pton[[:space:]]\($libc|inlined\)$"
+	expected[0]="ping[][0-9 \.:]+probe_libc:inet_pton: \([[:xdigit:]]+\)"
+	expected[1]=".*inet_pton[[:space:]]\($libc\)$"
 	case "$(uname -m)" in
 	s390x)
 		eventattr='call-graph=dwarf'
-		expected[7]="gaih_inet.*[[:space:]]\($libc|inlined\)$"
-		expected[8]="__GI_getaddrinfo[[:space:]]\($libc|inlined\)$"
-		expected[9]="main[[:space:]]\(.*/bin/ping.*\)$"
-		expected[10]="__libc_start_main[[:space:]]\($libc\)$"
-		expected[11]="_start[[:space:]]\(.*/bin/ping.*\)$"
+		expected[2]="gaih_inet.*[[:space:]]\($libc|inlined\)$"
+		expected[3]="__GI_getaddrinfo[[:space:]]\($libc|inlined\)$"
+		expected[4]="main[[:space:]]\(.*/bin/ping.*\)$"
+		expected[5]="__libc_start_main[[:space:]]\($libc\)$"
+		expected[6]="_start[[:space:]]\(.*/bin/ping.*\)$"
 		;;
 	*)
 		eventattr='max-stack=3'
-		expected[7]="getaddrinfo[[:space:]]\($libc\)$"
-		expected[8]=".*\(.*/bin/ping.*\)$"
+		expected[2]="getaddrinfo[[:space:]]\($libc\)$"
+		expected[3]=".*\(.*/bin/ping.*\)$"
 		;;
 	esac
 
-	perf trace --no-syscalls -e probe_libc:inet_pton/$eventattr/ ping -6 -c 1 ::1 2>&1 | grep -v ^$ | while read line ; do
+	file=`mktemp -u /tmp/perf.data.XXX`
+
+	perf record -e probe_libc:inet_pton/$eventattr/ -o $file ping -6 -c 1 ::1 > /dev/null 2>&1
+	perf script -i $file | while read line ; do
 		echo $line
 		echo "$line" | egrep -q "${expected[$idx]}"
 		if [ $? -ne 0 ] ; then
@@ -48,6 +46,8 @@ trace_libc_inet_pton_backtrace() {
 		let idx+=1
 		[ -z "${expected[$idx]}" ] && break
 	done
+
+	rm -f $file
 }
 
 # Check for IPv6 interface existence
-- 
cgit v1.2.3


From 4f67336870f641daa485ea504777486e24a9aece Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Thu, 1 Mar 2018 17:52:15 +0100
Subject: perf tests: Rename trace+probe_libc_inet_pton to
 record+probe_libc_inet_pton

Because the test is no longer using perf trace but perf record instead.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180301165215.6780-2-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../tests/shell/record+probe_libc_inet_pton.sh     | 62 ++++++++++++++++++++++
 .../perf/tests/shell/trace+probe_libc_inet_pton.sh | 62 ----------------------
 2 files changed, 62 insertions(+), 62 deletions(-)
 create mode 100755 tools/perf/tests/shell/record+probe_libc_inet_pton.sh
 delete mode 100755 tools/perf/tests/shell/trace+probe_libc_inet_pton.sh

(limited to 'tools')

diff --git a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
new file mode 100755
index 000000000000..52c3ee701a89
--- /dev/null
+++ b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
@@ -0,0 +1,62 @@
+# probe libc's inet_pton & backtrace it with ping
+
+# Installs a probe on libc's inet_pton function, that will use uprobes,
+# then use 'perf trace' on a ping to localhost asking for just one packet
+# with the a backtrace 3 levels deep, check that it is what we expect.
+# This needs no debuginfo package, all is done using the libc ELF symtab
+# and the CFI info in the binaries.
+
+# Arnaldo Carvalho de Melo <acme@kernel.org>, 2017
+
+. $(dirname $0)/lib/probe.sh
+
+libc=$(grep -w libc /proc/self/maps | head -1 | sed -r 's/.*[[:space:]](\/.*)/\1/g')
+nm -g $libc 2>/dev/null | fgrep -q inet_pton || exit 254
+
+trace_libc_inet_pton_backtrace() {
+	idx=0
+	expected[0]="ping[][0-9 \.:]+probe_libc:inet_pton: \([[:xdigit:]]+\)"
+	expected[1]=".*inet_pton[[:space:]]\($libc\)$"
+	case "$(uname -m)" in
+	s390x)
+		eventattr='call-graph=dwarf'
+		expected[2]="gaih_inet.*[[:space:]]\($libc|inlined\)$"
+		expected[3]="__GI_getaddrinfo[[:space:]]\($libc|inlined\)$"
+		expected[4]="main[[:space:]]\(.*/bin/ping.*\)$"
+		expected[5]="__libc_start_main[[:space:]]\($libc\)$"
+		expected[6]="_start[[:space:]]\(.*/bin/ping.*\)$"
+		;;
+	*)
+		eventattr='max-stack=3'
+		expected[2]="getaddrinfo[[:space:]]\($libc\)$"
+		expected[3]=".*\(.*/bin/ping.*\)$"
+		;;
+	esac
+
+	file=`mktemp -u /tmp/perf.data.XXX`
+
+	perf record -e probe_libc:inet_pton/$eventattr/ -o $file ping -6 -c 1 ::1 > /dev/null 2>&1
+	perf script -i $file | while read line ; do
+		echo $line
+		echo "$line" | egrep -q "${expected[$idx]}"
+		if [ $? -ne 0 ] ; then
+			printf "FAIL: expected backtrace entry %d \"%s\" got \"%s\"\n" $idx "${expected[$idx]}" "$line"
+			exit 1
+		fi
+		let idx+=1
+		[ -z "${expected[$idx]}" ] && break
+	done
+
+	rm -f $file
+}
+
+# Check for IPv6 interface existence
+ip a sh lo | fgrep -q inet6 || exit 2
+
+skip_if_no_perf_probe && \
+perf probe -q $libc inet_pton && \
+trace_libc_inet_pton_backtrace
+err=$?
+rm -f ${file}
+perf probe -q -d probe_libc:inet_pton
+exit $err
diff --git a/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh b/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh
deleted file mode 100755
index 52c3ee701a89..000000000000
--- a/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh
+++ /dev/null
@@ -1,62 +0,0 @@
-# probe libc's inet_pton & backtrace it with ping
-
-# Installs a probe on libc's inet_pton function, that will use uprobes,
-# then use 'perf trace' on a ping to localhost asking for just one packet
-# with the a backtrace 3 levels deep, check that it is what we expect.
-# This needs no debuginfo package, all is done using the libc ELF symtab
-# and the CFI info in the binaries.
-
-# Arnaldo Carvalho de Melo <acme@kernel.org>, 2017
-
-. $(dirname $0)/lib/probe.sh
-
-libc=$(grep -w libc /proc/self/maps | head -1 | sed -r 's/.*[[:space:]](\/.*)/\1/g')
-nm -g $libc 2>/dev/null | fgrep -q inet_pton || exit 254
-
-trace_libc_inet_pton_backtrace() {
-	idx=0
-	expected[0]="ping[][0-9 \.:]+probe_libc:inet_pton: \([[:xdigit:]]+\)"
-	expected[1]=".*inet_pton[[:space:]]\($libc\)$"
-	case "$(uname -m)" in
-	s390x)
-		eventattr='call-graph=dwarf'
-		expected[2]="gaih_inet.*[[:space:]]\($libc|inlined\)$"
-		expected[3]="__GI_getaddrinfo[[:space:]]\($libc|inlined\)$"
-		expected[4]="main[[:space:]]\(.*/bin/ping.*\)$"
-		expected[5]="__libc_start_main[[:space:]]\($libc\)$"
-		expected[6]="_start[[:space:]]\(.*/bin/ping.*\)$"
-		;;
-	*)
-		eventattr='max-stack=3'
-		expected[2]="getaddrinfo[[:space:]]\($libc\)$"
-		expected[3]=".*\(.*/bin/ping.*\)$"
-		;;
-	esac
-
-	file=`mktemp -u /tmp/perf.data.XXX`
-
-	perf record -e probe_libc:inet_pton/$eventattr/ -o $file ping -6 -c 1 ::1 > /dev/null 2>&1
-	perf script -i $file | while read line ; do
-		echo $line
-		echo "$line" | egrep -q "${expected[$idx]}"
-		if [ $? -ne 0 ] ; then
-			printf "FAIL: expected backtrace entry %d \"%s\" got \"%s\"\n" $idx "${expected[$idx]}" "$line"
-			exit 1
-		fi
-		let idx+=1
-		[ -z "${expected[$idx]}" ] && break
-	done
-
-	rm -f $file
-}
-
-# Check for IPv6 interface existence
-ip a sh lo | fgrep -q inet6 || exit 2
-
-skip_if_no_perf_probe && \
-perf probe -q $libc inet_pton && \
-trace_libc_inet_pton_backtrace
-err=$?
-rm -f ${file}
-perf probe -q -d probe_libc:inet_pton
-exit $err
-- 
cgit v1.2.3


From 67230479b2304be99e9451ee171aa288a112ea16 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 1 Mar 2018 13:46:23 -0300
Subject: perf record: Allow asking for the maximum allowed sample rate

Add the handy '-F max' shortcut to reading and using the
kernel.perf_event_max_sample_rate value as the user supplied
sampling frequency:

  # perf record -F max sleep 1
  info: Using a maximum frequency rate of 15,000 Hz
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.019 MB perf.data (14 samples) ]
  # sysctl kernel.perf_event_max_sample_rate
  kernel.perf_event_max_sample_rate = 15000
  # perf evlist -v
  cycles:ppp: size: 112, { sample_period, sample_freq }: 15000, sample_type: IP|TID|TIME|PERIOD, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, enable_on_exec: 1, task: 1, precise_ip: 3, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1

  # perf record -F 10 sleep 1
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.019 MB perf.data (4 samples) ]
  # perf evlist -v
  cycles:ppp: size: 112, { sample_period, sample_freq }: 10, sample_type: IP|TID|TIME|PERIOD, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, enable_on_exec: 1, task: 1, precise_ip: 3, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1
  #

Suggested-by: Ingo Molnar <mingo@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-4y0tiuws62c64gp4cf0hme0m@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-record.txt |  4 +++-
 tools/perf/builtin-record.c              |  7 ++++++-
 tools/perf/perf.h                        |  2 ++
 tools/perf/util/record.c                 | 23 +++++++++++++++++++++++
 4 files changed, 34 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 76bc2181d214..94f2faebc7f0 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -193,7 +193,9 @@ OPTIONS
 	Child tasks do not inherit counters.
 -F::
 --freq=::
-	Profile at this frequency.
+	Profile at this frequency. Use 'max' to use the currently maximum
+	allowed frequency, i.e. the value in the kernel.perf_event_max_sample_rate
+	sysctl.
 
 -m::
 --mmap-pages=::
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 907267206973..e1821eea14ef 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -45,6 +45,7 @@
 
 #include <errno.h>
 #include <inttypes.h>
+#include <locale.h>
 #include <poll.h>
 #include <unistd.h>
 #include <sched.h>
@@ -1542,7 +1543,9 @@ static struct option __record_options[] = {
 	OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
 		    "synthesize non-sample events at the end of output"),
 	OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
-	OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
+	OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
+		     "profile at this frequency",
+		      record__parse_freq),
 	OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
 		     "number of mmap data pages and AUX area tracing mmap pages",
 		     record__parse_mmap_pages),
@@ -1651,6 +1654,8 @@ int cmd_record(int argc, const char **argv)
 	struct record *rec = &record;
 	char errbuf[BUFSIZ];
 
+	setlocale(LC_ALL, "");
+
 #ifndef HAVE_LIBBPF_SUPPORT
 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
 	set_nobuild('\0', "clang-path", true);
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index cfe46236a5e5..a5df8bf73a68 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -82,4 +82,6 @@ struct record_opts {
 struct option;
 extern const char * const *record_usage;
 extern struct option *record_options;
+
+int record__parse_freq(const struct option *opt, const char *str, int unset);
 #endif
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
index 1e97937b03a9..acabf54ceccb 100644
--- a/tools/perf/util/record.c
+++ b/tools/perf/util/record.c
@@ -5,6 +5,7 @@
 #include "parse-events.h"
 #include <errno.h>
 #include <api/fs/fs.h>
+#include <subcmd/parse-options.h>
 #include "util.h"
 #include "cloexec.h"
 
@@ -287,3 +288,25 @@ out_delete:
 	perf_evlist__delete(temp_evlist);
 	return ret;
 }
+
+int record__parse_freq(const struct option *opt, const char *str, int unset __maybe_unused)
+{
+	unsigned int freq;
+	struct record_opts *opts = opt->value;
+
+	if (!str)
+		return -EINVAL;
+
+	if (strcasecmp(str, "max") == 0) {
+		if (get_max_rate(&freq)) {
+			pr_err("couldn't read /proc/sys/kernel/perf_event_max_sample_rate\n");
+			return -1;
+		}
+		pr_info("info: Using a maximum frequency rate of %'d Hz\n", freq);
+	} else {
+		freq = atoi(str);
+	}
+
+	opts->user_freq = freq;
+	return 0;
+}
-- 
cgit v1.2.3


From a9980a6dbb9efd154b032ad729c869784302f361 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 1 Mar 2018 14:22:12 -0300
Subject: perf top browser: Show sample_freq in browser title line

The '--stdio' 'perf top' UI shows it, so lets remove this UI difference
and show it too in '--tui', will be useful for 'perf top --tui -F max'.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-n3wd8n395uo4y9irst29pjic@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/hists.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 6495ee55d9c3..de2bde232cb3 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -2223,7 +2223,7 @@ static int perf_evsel_browser_title(struct hist_browser *browser,
 	u64 nr_events = hists->stats.total_period;
 	struct perf_evsel *evsel = hists_to_evsel(hists);
 	const char *ev_name = perf_evsel__name(evsel);
-	char buf[512];
+	char buf[512], sample_freq_str[64] = "";
 	size_t buflen = sizeof(buf);
 	char ref[30] = " show reference callgraph, ";
 	bool enable_ref = false;
@@ -2255,10 +2255,14 @@ static int perf_evsel_browser_title(struct hist_browser *browser,
 	if (symbol_conf.show_ref_callgraph &&
 	    strstr(ev_name, "call-graph=no"))
 		enable_ref = true;
+
+	if (!is_report_browser(hbt))
+		scnprintf(sample_freq_str, sizeof(sample_freq_str), " %d Hz,", evsel->attr.sample_freq);
+
 	nr_samples = convert_unit(nr_samples, &unit);
 	printed = scnprintf(bf, size,
-			   "Samples: %lu%c of event '%s',%sEvent count (approx.): %" PRIu64,
-			   nr_samples, unit, ev_name, enable_ref ? ref : " ", nr_events);
+			   "Samples: %lu%c of event '%s',%s%sEvent count (approx.): %" PRIu64,
+			   nr_samples, unit, ev_name, sample_freq_str, enable_ref ? ref : " ", nr_events);
 
 
 	if (hists->uid_filter_str)
-- 
cgit v1.2.3


From 7831bf236505bcb2a0a1255e7f3e902a0cb732d6 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 1 Mar 2018 14:25:56 -0300
Subject: perf top: Allow asking for the maximum allowed sample rate

Add the handy '-F max' shortcut, just introduced to 'perf record', to
reading and using the kernel.perf_event_max_sample_rate value as the
user supplied sampling frequency:

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-hz04f296zccknnb5at06a6q0@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-top.txt | 4 +++-
 tools/perf/builtin-top.c              | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 8a32cc77bead..a039407d63b8 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -55,7 +55,9 @@ Default is to monitor all CPUS.
 
 -F <freq>::
 --freq=<freq>::
-	Profile at this frequency.
+	Profile at this frequency. Use 'max' to use the currently maximum
+	allowed frequency, i.e. the value in the kernel.perf_event_max_sample_rate
+	sysctl.
 
 -i::
 --inherit::
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 35ac016fcb98..bb4f9fafd11d 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1307,7 +1307,9 @@ int cmd_top(int argc, const char **argv)
 	OPT_STRING(0, "sym-annotate", &top.sym_filter, "symbol name",
 		    "symbol to annotate"),
 	OPT_BOOLEAN('z', "zero", &top.zero, "zero history across updates"),
-	OPT_UINTEGER('F', "freq", &opts->user_freq, "profile at this frequency"),
+	OPT_CALLBACK('F', "freq", &top.record_opts, "freq or 'max'",
+		     "profile at this frequency",
+		      record__parse_freq),
 	OPT_INTEGER('E', "entries", &top.print_entries,
 		    "display this many functions"),
 	OPT_BOOLEAN('U', "hide_user_symbols", &top.hide_user_symbols,
-- 
cgit v1.2.3


From b09c2364a4dc2a67e640c2b839d936302815693f Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 1 Mar 2018 14:52:50 -0300
Subject: perf record: Throttle user defined frequencies to the maximum allowed

  # perf record -F 200000 sleep 1
  warning: Maximum frequency rate (15,000 Hz) exceeded, throttling from 200,000 Hz to 15,000 Hz.
           The limit can be raised via /proc/sys/kernel/perf_event_max_sample_rate.
           The kernel will lower it when perf's interrupts take too long.
	   Use --strict-freq to disable this throttling, refusing to record.
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.019 MB perf.data (15 samples) ]
  # perf evlist -v
  cycles:ppp: size: 112, { sample_period, sample_freq }: 15000, sample_type: IP|TID|TIME|PERIOD, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, enable_on_exec: 1, task: 1, precise_ip: 3, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1

For those wanting that it fails if the desired frequency can't be used:

  # perf record --strict-freq -F 200000 sleep 1
  error: Maximum frequency rate (15,000 Hz) exceeded.
         Please use -F freq option with a lower value or consider
         tweaking /proc/sys/kernel/perf_event_max_sample_rate.
  #

Suggested-by: Ingo Molnar <mingo@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-oyebruc44nlja499nqkr1nzn@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-record.txt |  7 ++++++-
 tools/perf/builtin-record.c              |  2 ++
 tools/perf/perf.h                        |  1 +
 tools/perf/util/record.c                 | 20 +++++++++++++++-----
 4 files changed, 24 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 94f2faebc7f0..cc37b3a4be76 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -191,11 +191,16 @@ OPTIONS
 -i::
 --no-inherit::
 	Child tasks do not inherit counters.
+
 -F::
 --freq=::
 	Profile at this frequency. Use 'max' to use the currently maximum
 	allowed frequency, i.e. the value in the kernel.perf_event_max_sample_rate
-	sysctl.
+	sysctl. Will throttle down to the currently maximum allowed frequency.
+	See --strict-freq.
+
+--strict-freq::
+	Fail if the specified frequency can't be used.
 
 -m::
 --mmap-pages=::
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index e1821eea14ef..62387942a1d5 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1543,6 +1543,8 @@ static struct option __record_options[] = {
 	OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
 		    "synthesize non-sample events at the end of output"),
 	OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
+	OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
+		    "Fail if the specified frequency can't be used"),
 	OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
 		     "profile at this frequency",
 		      record__parse_freq),
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index a5df8bf73a68..007e0dfd5ce3 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -61,6 +61,7 @@ struct record_opts {
 	bool	     tail_synthesize;
 	bool	     overwrite;
 	bool	     ignore_missing_thread;
+	bool	     strict_freq;
 	unsigned int freq;
 	unsigned int mmap_pages;
 	unsigned int auxtrace_mmap_pages;
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
index acabf54ceccb..4f1a82e76d39 100644
--- a/tools/perf/util/record.c
+++ b/tools/perf/util/record.c
@@ -216,11 +216,21 @@ static int record_opts__config_freq(struct record_opts *opts)
 	 * User specified frequency is over current maximum.
 	 */
 	if (user_freq && (max_rate < opts->freq)) {
-		pr_err("Maximum frequency rate (%u) reached.\n"
-		   "Please use -F freq option with lower value or consider\n"
-		   "tweaking /proc/sys/kernel/perf_event_max_sample_rate.\n",
-		   max_rate);
-		return -1;
+		if (opts->strict_freq) {
+			pr_err("error: Maximum frequency rate (%'u Hz) exceeded.\n"
+			       "       Please use -F freq option with a lower value or consider\n"
+			       "       tweaking /proc/sys/kernel/perf_event_max_sample_rate.\n",
+			       max_rate);
+			return -1;
+		} else {
+			pr_warning("warning: Maximum frequency rate (%'u Hz) exceeded, throttling from %'u Hz to %'u Hz.\n"
+				   "         The limit can be raised via /proc/sys/kernel/perf_event_max_sample_rate.\n"
+				   "         The kernel will lower it when perf's interrupts take too long.\n"
+				   "         Use --strict-freq to disable this throttling, refusing to record.\n",
+				   max_rate, opts->freq, max_rate);
+
+			opts->freq = max_rate;
+		}
 	}
 
 	/*
-- 
cgit v1.2.3


From 696703af37a28892db89ff6a6d0cdfde6fb803ab Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 2 Mar 2018 11:59:36 -0300
Subject: perf annotate: Find 'call' instruction target symbol at parsing time

So that we do it just once, not everytime we press enter or -> on a
'call' instruction line.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-uysyojl1e6nm94amzzzs08tf@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/annotate.c | 17 +++++------------
 tools/perf/util/annotate.c        | 38 +++++++++++++++++++++-----------------
 tools/perf/util/annotate.h        |  1 +
 3 files changed, 27 insertions(+), 29 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 6ff6839558b0..618edf96353c 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -568,35 +568,28 @@ static bool annotate_browser__callq(struct annotate_browser *browser,
 	struct map_symbol *ms = browser->b.priv;
 	struct disasm_line *dl = disasm_line(browser->selection);
 	struct annotation *notes;
-	struct addr_map_symbol target = {
-		.map = ms->map,
-		.addr = map__objdump_2mem(ms->map, dl->ops.target.addr),
-	};
 	char title[SYM_TITLE_MAX_SIZE];
 
 	if (!ins__is_call(&dl->ins))
 		return false;
 
-	if (map_groups__find_ams(&target) ||
-	    map__rip_2objdump(target.map, target.map->map_ip(target.map,
-							     target.addr)) !=
-	    dl->ops.target.addr) {
+	if (!dl->ops.target.sym) {
 		ui_helpline__puts("The called function was not found.");
 		return true;
 	}
 
-	notes = symbol__annotation(target.sym);
+	notes = symbol__annotation(dl->ops.target.sym);
 	pthread_mutex_lock(&notes->lock);
 
-	if (notes->src == NULL && symbol__alloc_hist(target.sym) < 0) {
+	if (notes->src == NULL && symbol__alloc_hist(dl->ops.target.sym) < 0) {
 		pthread_mutex_unlock(&notes->lock);
 		ui__warning("Not enough memory for annotating '%s' symbol!\n",
-			    target.sym->name);
+			    dl->ops.target.sym->name);
 		return true;
 	}
 
 	pthread_mutex_unlock(&notes->lock);
-	symbol__tui_annotate(target.sym, target.map, evsel, hbt);
+	symbol__tui_annotate(dl->ops.target.sym, ms->map, evsel, hbt);
 	sym_title(ms->sym, ms->map, title, sizeof(title));
 	ui_browser__show_title(&browser->b, title);
 	return true;
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 28b233c3dcbe..49ff825f745c 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -187,6 +187,9 @@ bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2)
 static int call__parse(struct arch *arch, struct ins_operands *ops, struct map *map)
 {
 	char *endptr, *tok, *name;
+	struct addr_map_symbol target = {
+		.map = map,
+	};
 
 	ops->target.addr = strtoull(ops->raw, &endptr, 16);
 
@@ -208,28 +211,29 @@ static int call__parse(struct arch *arch, struct ins_operands *ops, struct map *
 	ops->target.name = strdup(name);
 	*tok = '>';
 
-	return ops->target.name == NULL ? -1 : 0;
+	if (ops->target.name == NULL)
+		return -1;
+find_target:
+	target.addr = map__objdump_2mem(map, ops->target.addr);
 
-indirect_call:
-	tok = strchr(endptr, '*');
-	if (tok == NULL) {
-		struct symbol *sym = map__find_symbol(map, map->map_ip(map, ops->target.addr));
-		if (sym != NULL)
-			ops->target.name = strdup(sym->name);
-		else
-			ops->target.addr = 0;
-		return 0;
-	}
+	if (map_groups__find_ams(&target) == 0 &&
+	    map__rip_2objdump(target.map, map->map_ip(target.map, target.addr)) == ops->target.addr)
+		ops->target.sym = target.sym;
 
-	ops->target.addr = strtoull(tok + 1, NULL, 16);
 	return 0;
+
+indirect_call:
+	tok = strchr(endptr, '*');
+	if (tok != NULL)
+		ops->target.addr = strtoull(tok + 1, NULL, 16);
+	goto find_target;
 }
 
 static int call__scnprintf(struct ins *ins, char *bf, size_t size,
 			   struct ins_operands *ops)
 {
-	if (ops->target.name)
-		return scnprintf(bf, size, "%-6s %s", ins->name, ops->target.name);
+	if (ops->target.sym)
+		return scnprintf(bf, size, "%-6s %s", ins->name, ops->target.sym->name);
 
 	if (ops->target.addr == 0)
 		return ins__raw_scnprintf(ins, bf, size, ops);
@@ -1283,8 +1287,8 @@ static int symbol__parse_objdump_line(struct symbol *sym, FILE *file,
 		dl->ops.target.offset_avail = true;
 	}
 
-	/* kcore has no symbols, so add the call target name */
-	if (dl->ins.ops && ins__is_call(&dl->ins) && !dl->ops.target.name) {
+	/* kcore has no symbols, so add the call target symbol */
+	if (dl->ins.ops && ins__is_call(&dl->ins) && !dl->ops.target.sym) {
 		struct addr_map_symbol target = {
 			.map = map,
 			.addr = dl->ops.target.addr,
@@ -1292,7 +1296,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, FILE *file,
 
 		if (!map_groups__find_ams(&target) &&
 		    target.sym->start == target.al_addr)
-			dl->ops.target.name = strdup(target.sym->name);
+			dl->ops.target.sym = target.sym;
 	}
 
 	annotation_line__add(&dl->al, &notes->src->source);
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index ce427445671f..7e914e834101 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -24,6 +24,7 @@ struct ins_operands {
 	struct {
 		char	*raw;
 		char	*name;
+		struct symbol *sym;
 		u64	addr;
 		s64	offset;
 		bool	offset_avail;
-- 
cgit v1.2.3


From ad46e48c65fa1f204fa29eaff1b91174d314a94b Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Fri, 2 Mar 2018 17:13:54 +0100
Subject: perf record: Fix crash in pipe mode

Currently we can crash perf record when running in pipe mode, like:

  $ perf record ls | perf report
  # To display the perf.data header info, please use --header/--header-only options.
  #
  perf: Segmentation fault
  Error:
  The - file has no samples!

The callstack of the crash is:

    0x0000000000515242 in perf_event__synthesize_event_update_name
  3513            ev = event_update_event__new(len + 1, PERF_EVENT_UPDATE__NAME, evsel->id[0]);
  (gdb) bt
  #0  0x0000000000515242 in perf_event__synthesize_event_update_name
  #1  0x00000000005158a4 in perf_event__synthesize_extra_attr
  #2  0x0000000000443347 in record__synthesize
  #3  0x00000000004438e3 in __cmd_record
  #4  0x000000000044514e in cmd_record
  #5  0x00000000004cbc95 in run_builtin
  #6  0x00000000004cbf02 in handle_internal_command
  #7  0x00000000004cc054 in run_argv
  #8  0x00000000004cc422 in main

The reason of the crash is that the evsel does not have ids array
allocated and the pipe's synthesize code tries to access it.

We don't force evsel ids allocation when we have single event, because
it's not needed. However we need it when we are in pipe mode even for
single event as a key for evsel update event.

Fixing this by forcing evsel ids allocation event for single event, when
we are in pipe mode.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180302161354.30192-1-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-record.c | 9 +++++++++
 tools/perf/perf.h           | 1 +
 tools/perf/util/record.c    | 8 ++++++--
 3 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 62387942a1d5..12230ddb6506 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -882,6 +882,15 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 		}
 	}
 
+	/*
+	 * If we have just single event and are sending data
+	 * through pipe, we need to force the ids allocation,
+	 * because we synthesize event name through the pipe
+	 * and need the id for that.
+	 */
+	if (data->is_pipe && rec->evlist->nr_entries == 1)
+		rec->opts.sample_id = true;
+
 	if (record__open(rec) != 0) {
 		err = -1;
 		goto out_child;
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 007e0dfd5ce3..8fec1abd0f1f 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -62,6 +62,7 @@ struct record_opts {
 	bool	     overwrite;
 	bool	     ignore_missing_thread;
 	bool	     strict_freq;
+	bool	     sample_id;
 	unsigned int freq;
 	unsigned int mmap_pages;
 	unsigned int auxtrace_mmap_pages;
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
index 4f1a82e76d39..9cfc7bf16531 100644
--- a/tools/perf/util/record.c
+++ b/tools/perf/util/record.c
@@ -138,6 +138,7 @@ void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts,
 	struct perf_evsel *evsel;
 	bool use_sample_identifier = false;
 	bool use_comm_exec;
+	bool sample_id = opts->sample_id;
 
 	/*
 	 * Set the evsel leader links before we configure attributes,
@@ -164,8 +165,7 @@ void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts,
 		 * match the id.
 		 */
 		use_sample_identifier = perf_can_sample_identifier();
-		evlist__for_each_entry(evlist, evsel)
-			perf_evsel__set_sample_id(evsel, use_sample_identifier);
+		sample_id = true;
 	} else if (evlist->nr_entries > 1) {
 		struct perf_evsel *first = perf_evlist__first(evlist);
 
@@ -175,6 +175,10 @@ void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts,
 			use_sample_identifier = perf_can_sample_identifier();
 			break;
 		}
+		sample_id = true;
+	}
+
+	if (sample_id) {
 		evlist__for_each_entry(evlist, evsel)
 			perf_evsel__set_sample_id(evsel, use_sample_identifier);
 	}
-- 
cgit v1.2.3


From 53172f9057e92c9b27f0bbf2a46827d87f12b0d2 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Thu, 1 Mar 2018 18:08:58 -0500
Subject: perf kvm: Switch to new perf_mmap__read_event() interface

The perf kvm still use the legacy interface.

Switch to the new perf_mmap__read_event() interface for perf kvm.

No functional change.

Committer notes:

Tested before and after running:

  # perf kvm stat record

On a machine with a kvm guest, then used:

  # perf kvm stat report

Before/after results match and look like:

  # perf kvm stat record -a sleep 5
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 2.132 MB perf.data.guest (1828 samples) ]
  # perf kvm stat report

  Analyze events for all VMs, all VCPUs:

             VM-EXIT Samples Samples%  Time% Min Time    Max Time    Avg time

      IO_INSTRUCTION     258   40.06%  0.08%   3.51us    122.54us     14.87us (+- 6.76%)
           MSR_WRITE     178   27.64%  0.01%   0.47us      6.34us      2.18us (+- 4.80%)
       EPT_MISCONFIG     148   22.98%  0.03%   3.76us     65.60us     11.22us (+- 8.14%)
                 HLT      47    7.30% 99.88% 181.69us 249988.06us 102061.36us (+-13.49%)
   PAUSE_INSTRUCTION       5    0.78%  0.00%   0.38us      0.79us      0.47us (+-17.05%)
            MSR_READ       4    0.62%  0.00%   1.14us      3.33us      2.67us (+-19.35%)
  EXTERNAL_INTERRUPT       2    0.31%  0.00%   2.15us      2.17us      2.16us (+- 0.30%)
   PENDING_INTERRUPT       1    0.16%  0.00%   2.56us      2.56us      2.56us (+- 0.00%)
    PREEMPTION_TIMER       1    0.16%  0.00%   3.21us      3.21us      3.21us (+- 0.00%)

  Total Samples:644, Total events handled time:4802790.72us.

  #

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1519945751-37786-1-git-send-email-kan.liang@linux.intel.com
[ Changed bool parameters from 0 to 'false', as per Jiri comment ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-kvm.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 55d919dc5bc6..d2703d3b8366 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -743,16 +743,24 @@ static bool verify_vcpu(int vcpu)
 static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
 				   u64 *mmap_time)
 {
+	struct perf_evlist *evlist = kvm->evlist;
 	union perf_event *event;
+	struct perf_mmap *md;
+	u64 end, start;
 	u64 timestamp;
 	s64 n = 0;
 	int err;
 
 	*mmap_time = ULLONG_MAX;
-	while ((event = perf_evlist__mmap_read(kvm->evlist, idx)) != NULL) {
-		err = perf_evlist__parse_sample_timestamp(kvm->evlist, event, &timestamp);
+	md = &evlist->mmap[idx];
+	err = perf_mmap__read_init(md, false, &start, &end);
+	if (err < 0)
+		return (err == -EAGAIN) ? 0 : -1;
+
+	while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
+		err = perf_evlist__parse_sample_timestamp(evlist, event, &timestamp);
 		if (err) {
-			perf_evlist__mmap_consume(kvm->evlist, idx);
+			perf_mmap__consume(md, false);
 			pr_err("Failed to parse sample\n");
 			return -1;
 		}
@@ -762,7 +770,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
 		 * FIXME: Here we can't consume the event, as perf_session__queue_event will
 		 *        point to it, and it'll get possibly overwritten by the kernel.
 		 */
-		perf_evlist__mmap_consume(kvm->evlist, idx);
+		perf_mmap__consume(md, false);
 
 		if (err) {
 			pr_err("Failed to enqueue sample: %d\n", err);
@@ -779,6 +787,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
 			break;
 	}
 
+	perf_mmap__read_done(md);
 	return n;
 }
 
-- 
cgit v1.2.3


From d7f55c62e63461c4071afe8730851e406935d960 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Thu, 1 Mar 2018 18:08:59 -0500
Subject: perf trace: Switch to new perf_mmap__read_event() interface

The 'perf trace' utility still use the legacy interface.

Switch to the new perf_mmap__read_event() interface.

No functional change.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1519945751-37786-2-git-send-email-kan.liang@linux.intel.com
[ Changed bool parameters from 0 to 'false', as per Jiri comment ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-trace.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index e7f1b182fc15..1a93debc1e8d 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2472,8 +2472,14 @@ again:
 
 	for (i = 0; i < evlist->nr_mmaps; i++) {
 		union perf_event *event;
+		struct perf_mmap *md;
+		u64 end, start;
 
-		while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
+		md = &evlist->mmap[i];
+		if (perf_mmap__read_init(md, false, &start, &end) < 0)
+			continue;
+
+		while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
 			struct perf_sample sample;
 
 			++trace->nr_events;
@@ -2486,7 +2492,7 @@ again:
 
 			trace__handle_event(trace, event, &sample);
 next_event:
-			perf_evlist__mmap_consume(evlist, i);
+			perf_mmap__consume(md, false);
 
 			if (interrupted)
 				goto out_disable;
@@ -2496,6 +2502,7 @@ next_event:
 				draining = true;
 			}
 		}
+		perf_mmap__read_done(md);
 	}
 
 	if (trace->nr_events == before) {
-- 
cgit v1.2.3


From 35b7cdc6379ea8300161f0f80fe8aad083a1c5d0 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Thu, 1 Mar 2018 18:09:00 -0500
Subject: perf python: Switch to new perf_mmap__read_event() interface

The perf python binding still use the legacy interface.

No functional change.

Committer notes:

Tested before and after with:

  [root@jouet perf]# export PYTHONPATH=/tmp/build/perf/python
  [root@jouet perf]# tools/perf/python/twatch.py
  cpu: 0, pid: 1183, tid: 6293 { type: exit, pid: 1183, ppid: 1183, tid: 6293, ptid: 6293, time: 17886646588257}
  cpu: 2, pid: 13820, tid: 13820 { type: fork, pid: 13820, ppid: 13820, tid: 6306, ptid: 13820, time: 17886869099529}
  cpu: 1, pid: 13820, tid: 6306 { type: comm, pid: 13820, tid: 6306, comm: TaskSchedulerFo }
  ^CTraceback (most recent call last):
    File "tools/perf/python/twatch.py", line 68, in <module>
      main()
    File "tools/perf/python/twatch.py", line 40, in main
      evlist.poll(timeout = -1)
  KeyboardInterrupt
  [root@jouet perf]#

No problems found.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1519945751-37786-3-git-send-email-kan.liang@linux.intel.com
[ Changed bool parameters from 0 to 'false', as per Jiri comment ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/python.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 2918cac7a142..35fb5ef7d290 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -983,13 +983,19 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
 	union perf_event *event;
 	int sample_id_all = 1, cpu;
 	static char *kwlist[] = { "cpu", "sample_id_all", NULL };
+	struct perf_mmap *md;
+	u64 end, start;
 	int err;
 
 	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i|i", kwlist,
 					 &cpu, &sample_id_all))
 		return NULL;
 
-	event = perf_evlist__mmap_read(evlist, cpu);
+	md = &evlist->mmap[cpu];
+	if (perf_mmap__read_init(md, false, &start, &end) < 0)
+		goto end;
+
+	event = perf_mmap__read_event(md, false, &start, end);
 	if (event != NULL) {
 		PyObject *pyevent = pyrf_event__new(event);
 		struct pyrf_event *pevent = (struct pyrf_event *)pyevent;
@@ -1007,14 +1013,14 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
 		err = perf_evsel__parse_sample(evsel, event, &pevent->sample);
 
 		/* Consume the even only after we parsed it out. */
-		perf_evlist__mmap_consume(evlist, cpu);
+		perf_mmap__consume(md, false);
 
 		if (err)
 			return PyErr_Format(PyExc_OSError,
 					    "perf: can't parse sample, err=%d", err);
 		return pyevent;
 	}
-
+end:
 	Py_INCREF(Py_None);
 	return Py_None;
 }
-- 
cgit v1.2.3


From 2f54f3a4733c0cd857992d793af5e8321b281012 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Thu, 1 Mar 2018 18:09:01 -0500
Subject: perf test: Switch to new perf_mmap__read_event() interface for bpf

The perf test 'bpf' still use the legacy interface.

No functional change.

Committer notes:

Tested with:

  # perf test bpf
  39: BPF filter                                            :
  39.1: Basic BPF filtering                                 : Ok
  39.2: BPF pinning                                         : Ok
  39.3: BPF prologue generation                             : Ok
  39.4: BPF relocation checker                              : Ok
  #

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1519945751-37786-4-git-send-email-kan.liang@linux.intel.com
[ Changed bool parameters from 0 to 'false', as per Jiri comment ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/bpf.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c
index e8399beca62b..09c9c9f9e827 100644
--- a/tools/perf/tests/bpf.c
+++ b/tools/perf/tests/bpf.c
@@ -176,13 +176,20 @@ static int do_test(struct bpf_object *obj, int (*func)(void),
 
 	for (i = 0; i < evlist->nr_mmaps; i++) {
 		union perf_event *event;
+		struct perf_mmap *md;
+		u64 end, start;
 
-		while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
+		md = &evlist->mmap[i];
+		if (perf_mmap__read_init(md, false, &start, &end) < 0)
+			continue;
+
+		while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
 			const u32 type = event->header.type;
 
 			if (type == PERF_RECORD_SAMPLE)
 				count ++;
 		}
+		perf_mmap__read_done(md);
 	}
 
 	if (count != expect) {
-- 
cgit v1.2.3


From 00fc2460e735fa0f6add802c7426273e7dbc2b27 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Thu, 1 Mar 2018 18:09:02 -0500
Subject: perf test: Switch to new perf_mmap__read_event() interface for 'code
 reading' test

The perf test 'object code reading' still use the legacy interface.

No functional change.

Committer notes:

Testing:

  # perf test reading
  23: Object code reading: Ok
  #

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1519945751-37786-5-git-send-email-kan.liang@linux.intel.com
[ Changed bool parameters from 0 to 'false', as per Jiri comment ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/code-reading.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index c7115d369511..03ed8c77b1bb 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -409,15 +409,22 @@ static int process_events(struct machine *machine, struct perf_evlist *evlist,
 			  struct state *state)
 {
 	union perf_event *event;
+	struct perf_mmap *md;
+	u64 end, start;
 	int i, ret;
 
 	for (i = 0; i < evlist->nr_mmaps; i++) {
-		while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
+		md = &evlist->mmap[i];
+		if (perf_mmap__read_init(md, false, &start, &end) < 0)
+			continue;
+
+		while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
 			ret = process_event(machine, evlist, event, state);
-			perf_evlist__mmap_consume(evlist, i);
+			perf_mmap__consume(md, false);
 			if (ret < 0)
 				return ret;
 		}
+		perf_mmap__read_done(md);
 	}
 	return 0;
 }
-- 
cgit v1.2.3


From 693d32aebf857ef1d1803b08ef1b631990ae3747 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Thu, 1 Mar 2018 18:09:03 -0500
Subject: perf test: Switch to new perf_mmap__read_event() interface for "keep
 tracking" test

The perf test 'keep tracking' still use the legacy interface.

No functional change.

Committer testing:

  # perf test tracking
  25: Use a dummy software event to keep tracking           : Ok
  #

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1519945751-37786-6-git-send-email-kan.liang@linux.intel.com
[ Changed bool parameters from 0 to 'false', as per Jiri comment ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/keep-tracking.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c
index c46530918938..4590d8fb91ab 100644
--- a/tools/perf/tests/keep-tracking.c
+++ b/tools/perf/tests/keep-tracking.c
@@ -27,18 +27,24 @@
 static int find_comm(struct perf_evlist *evlist, const char *comm)
 {
 	union perf_event *event;
+	struct perf_mmap *md;
+	u64 end, start;
 	int i, found;
 
 	found = 0;
 	for (i = 0; i < evlist->nr_mmaps; i++) {
-		while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
+		md = &evlist->mmap[i];
+		if (perf_mmap__read_init(md, false, &start, &end) < 0)
+			continue;
+		while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
 			if (event->header.type == PERF_RECORD_COMM &&
 			    (pid_t)event->comm.pid == getpid() &&
 			    (pid_t)event->comm.tid == getpid() &&
 			    strcmp(event->comm.comm, comm) == 0)
 				found += 1;
-			perf_evlist__mmap_consume(evlist, i);
+			perf_mmap__consume(md, false);
 		}
+		perf_mmap__read_done(md);
 	}
 	return found;
 }
-- 
cgit v1.2.3


From 334f823e2ab58b3c0e58fa71321680382c5f60ff Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Thu, 1 Mar 2018 18:09:04 -0500
Subject: perf test: Switch to new perf_mmap__read_event() interface for
 mmap-basic

The perf test 'mmap-basic' still use the legacy interface.

No functional change.

Committer notes:

Testing it:

  # perf test "mmap interface"
   4: Read samples using the mmap interface                 : Ok
  #

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1519945751-37786-7-git-send-email-kan.liang@linux.intel.com
[ Changed bool parameters from 0 to 'false', as per Jiri comment ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/mmap-basic.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index c0e971da965c..44c58d69cd87 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -38,6 +38,8 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse
 		     expected_nr_events[nsyscalls], i, j;
 	struct perf_evsel *evsels[nsyscalls], *evsel;
 	char sbuf[STRERR_BUFSIZE];
+	struct perf_mmap *md;
+	u64 end, start;
 
 	threads = thread_map__new(-1, getpid(), UINT_MAX);
 	if (threads == NULL) {
@@ -106,7 +108,11 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse
 			++foo;
 		}
 
-	while ((event = perf_evlist__mmap_read(evlist, 0)) != NULL) {
+	md = &evlist->mmap[0];
+	if (perf_mmap__read_init(md, false, &start, &end) < 0)
+		goto out_init;
+
+	while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
 		struct perf_sample sample;
 
 		if (event->header.type != PERF_RECORD_SAMPLE) {
@@ -129,9 +135,11 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse
 			goto out_delete_evlist;
 		}
 		nr_events[evsel->idx]++;
-		perf_evlist__mmap_consume(evlist, 0);
+		perf_mmap__consume(md, false);
 	}
+	perf_mmap__read_done(md);
 
+out_init:
 	err = 0;
 	evlist__for_each_entry(evlist, evsel) {
 		if (nr_events[evsel->idx] != expected_nr_events[evsel->idx]) {
-- 
cgit v1.2.3


From 1d1b5632ed0b797721a409bbed718d85384168a2 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Thu, 1 Mar 2018 18:09:05 -0500
Subject: perf test: Switch to new perf_mmap__read_event() interface for tp
 fields

The perf test 'syscalls:sys_enter_openat event fields' still use the
legacy interface.

No functional change.

Committer notes:

Testing it:

  # perf test sys_enter_openat
  15: syscalls:sys_enter_openat event fields                : Ok
  #

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1519945751-37786-8-git-send-email-kan.liang@linux.intel.com
[ Changed bool parameters from 0 to 'false', as per Jiri comment ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/openat-syscall-tp-fields.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c
index 43519267b93b..620b21023f72 100644
--- a/tools/perf/tests/openat-syscall-tp-fields.c
+++ b/tools/perf/tests/openat-syscall-tp-fields.c
@@ -86,8 +86,14 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest
 
 		for (i = 0; i < evlist->nr_mmaps; i++) {
 			union perf_event *event;
+			struct perf_mmap *md;
+			u64 end, start;
 
-			while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
+			md = &evlist->mmap[i];
+			if (perf_mmap__read_init(md, false, &start, &end) < 0)
+				continue;
+
+			while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
 				const u32 type = event->header.type;
 				int tp_flags;
 				struct perf_sample sample;
@@ -95,7 +101,7 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest
 				++nr_events;
 
 				if (type != PERF_RECORD_SAMPLE) {
-					perf_evlist__mmap_consume(evlist, i);
+					perf_mmap__consume(md, false);
 					continue;
 				}
 
@@ -115,6 +121,7 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest
 
 				goto out_ok;
 			}
+			perf_mmap__read_done(md);
 		}
 
 		if (nr_events == before)
-- 
cgit v1.2.3


From 88e37a4bbe6e05fd5ad103738c542658b81e76ea Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Thu, 1 Mar 2018 18:09:06 -0500
Subject: perf test: Switch to new perf_mmap__read_event() interface for
 perf-record

The perf test 'perf-record' still use the legacy interface.

No functional change.

Committer notes:

Testing it:

  # perf test PERF_RECORD
   8: PERF_RECORD_* events & perf_sample fields             : Ok
  #

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1519945751-37786-9-git-send-email-kan.liang@linux.intel.com
[ Changed bool parameters from 0 to 'false', as per Jiri comment ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/perf-record.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index 0afafab85238..31f3f70adca6 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -164,8 +164,14 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus
 
 		for (i = 0; i < evlist->nr_mmaps; i++) {
 			union perf_event *event;
+			struct perf_mmap *md;
+			u64 end, start;
 
-			while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
+			md = &evlist->mmap[i];
+			if (perf_mmap__read_init(md, false, &start, &end) < 0)
+				continue;
+
+			while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
 				const u32 type = event->header.type;
 				const char *name = perf_event__name(type);
 
@@ -266,8 +272,9 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus
 					++errs;
 				}
 
-				perf_evlist__mmap_consume(evlist, i);
+				perf_mmap__consume(md, false);
 			}
+			perf_mmap__read_done(md);
 		}
 
 		/*
-- 
cgit v1.2.3


From 9dfb85dfaffe6bc38f0c9f8a8622e2a7ca333e58 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Thu, 1 Mar 2018 18:09:07 -0500
Subject: perf test: Switch to new perf_mmap__read_event() interface for
 time-to-tsc

The perf test 'time-to-tsc' still use the legacy interface.

No functional change.

Commiter notes:

Testing it:

  # perf test tsc
  57: Convert perf time to TSC                              : Ok
  #

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1519945751-37786-10-git-send-email-kan.liang@linux.intel.com
[ Changed bool parameters from 0 to 'false', as per Jiri comment ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/x86/tests/perf-time-to-tsc.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c
index 06abe8108b33..7f82d91ef473 100644
--- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c
+++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c
@@ -60,6 +60,8 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe
 	union perf_event *event;
 	u64 test_tsc, comm1_tsc, comm2_tsc;
 	u64 test_time, comm1_time = 0, comm2_time = 0;
+	struct perf_mmap *md;
+	u64 end, start;
 
 	threads = thread_map__new(-1, getpid(), UINT_MAX);
 	CHECK_NOT_NULL__(threads);
@@ -109,7 +111,11 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe
 	perf_evlist__disable(evlist);
 
 	for (i = 0; i < evlist->nr_mmaps; i++) {
-		while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
+		md = &evlist->mmap[i];
+		if (perf_mmap__read_init(md, false, &start, &end) < 0)
+			continue;
+
+		while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
 			struct perf_sample sample;
 
 			if (event->header.type != PERF_RECORD_COMM ||
@@ -128,8 +134,9 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe
 				comm2_time = sample.time;
 			}
 next_event:
-			perf_evlist__mmap_consume(evlist, i);
+			perf_mmap__consume(md, false);
 		}
+		perf_mmap__read_done(md);
 	}
 
 	if (!comm1_time || !comm2_time)
-- 
cgit v1.2.3


From 5d0007cdfc6612788badceb276156d6ccb30b6de Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Thu, 1 Mar 2018 18:09:08 -0500
Subject: perf test: Switch to new perf_mmap__read_event() interface for
 sw-clock

The perf test 'sw-clock' still use the legacy interface.

No functional change.

Committer testing:

  # perf test clock
  22: Software clock events period values                   : Ok
  #

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1519945751-37786-11-git-send-email-kan.liang@linux.intel.com
[ Changed bool parameters from 0 to 'false', as per Jiri comment ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/sw-clock.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c
index f6c72f915d48..e6320e267ba5 100644
--- a/tools/perf/tests/sw-clock.c
+++ b/tools/perf/tests/sw-clock.c
@@ -39,6 +39,8 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
 	};
 	struct cpu_map *cpus;
 	struct thread_map *threads;
+	struct perf_mmap *md;
+	u64 end, start;
 
 	attr.sample_freq = 500;
 
@@ -93,7 +95,11 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
 
 	perf_evlist__disable(evlist);
 
-	while ((event = perf_evlist__mmap_read(evlist, 0)) != NULL) {
+	md = &evlist->mmap[0];
+	if (perf_mmap__read_init(md, false, &start, &end) < 0)
+		goto out_init;
+
+	while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
 		struct perf_sample sample;
 
 		if (event->header.type != PERF_RECORD_SAMPLE)
@@ -108,9 +114,11 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
 		total_periods += sample.period;
 		nr_samples++;
 next_event:
-		perf_evlist__mmap_consume(evlist, 0);
+		perf_mmap__consume(md, false);
 	}
+	perf_mmap__read_done(md);
 
+out_init:
 	if ((u64) nr_samples == total_periods) {
 		pr_debug("All (%d) samples have period value of 1!\n",
 			 nr_samples);
-- 
cgit v1.2.3


From ee4024ff858211316c4824b16bea446f08765ae8 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Thu, 1 Mar 2018 18:09:09 -0500
Subject: perf test: Switch to new perf_mmap__read_event() interface for
 switch-tracking

The perf test 'switch-tracking' still use the legacy interface.

No functional change.

Committer testing:

  # perf test switch
  32: Track with sched_switch                               : Ok
  #

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1519945751-37786-12-git-send-email-kan.liang@linux.intel.com
[ Changed bool parameters from 0 to 'false', as per Jiri comment ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/switch-tracking.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c
index 33e00295a972..10c4dcdc2324 100644
--- a/tools/perf/tests/switch-tracking.c
+++ b/tools/perf/tests/switch-tracking.c
@@ -258,16 +258,23 @@ static int process_events(struct perf_evlist *evlist,
 	unsigned pos, cnt = 0;
 	LIST_HEAD(events);
 	struct event_node *events_array, *node;
+	struct perf_mmap *md;
+	u64 end, start;
 	int i, ret;
 
 	for (i = 0; i < evlist->nr_mmaps; i++) {
-		while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
+		md = &evlist->mmap[i];
+		if (perf_mmap__read_init(md, false, &start, &end) < 0)
+			continue;
+
+		while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
 			cnt += 1;
 			ret = add_event(evlist, &events, event);
-			perf_evlist__mmap_consume(evlist, i);
+			 perf_mmap__consume(md, false);
 			if (ret < 0)
 				goto out_free_nodes;
 		}
+		perf_mmap__read_done(md);
 	}
 
 	events_array = calloc(cnt, sizeof(struct event_node));
-- 
cgit v1.2.3


From 759487307625cd44ac4aa241ee547b52b72bc4ad Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Thu, 1 Mar 2018 18:09:10 -0500
Subject: perf test: Switch to new perf_mmap__read_event() interface for
 task-exit

The perf test 'task-exit' still use the legacy interface.

No functional change.

Committer notes:

Testing it:

  # perf test exit
  21: Number of exit events of a simple workload            : Ok
  #

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1519945751-37786-13-git-send-email-kan.liang@linux.intel.com
[ Changed bool parameters from 0 to 'false', as per Jiri comment ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/task-exit.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c
index 01b62b81751b..02b0888b72a3 100644
--- a/tools/perf/tests/task-exit.c
+++ b/tools/perf/tests/task-exit.c
@@ -47,6 +47,8 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused
 	char sbuf[STRERR_BUFSIZE];
 	struct cpu_map *cpus;
 	struct thread_map *threads;
+	struct perf_mmap *md;
+	u64 end, start;
 
 	signal(SIGCHLD, sig_handler);
 
@@ -110,13 +112,19 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused
 	perf_evlist__start_workload(evlist);
 
 retry:
-	while ((event = perf_evlist__mmap_read(evlist, 0)) != NULL) {
+	md = &evlist->mmap[0];
+	if (perf_mmap__read_init(md, false, &start, &end) < 0)
+		goto out_init;
+
+	while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
 		if (event->header.type == PERF_RECORD_EXIT)
 			nr_exit++;
 
-		perf_evlist__mmap_consume(evlist, 0);
+		perf_mmap__consume(md, false);
 	}
+	perf_mmap__read_done(md);
 
+out_init:
 	if (!exited || !nr_exit) {
 		perf_evlist__poll(evlist, -1);
 		goto retry;
-- 
cgit v1.2.3


From 6afad54d2f0ddebacfcf3b829147d7fed8dab298 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Thu, 1 Mar 2018 18:09:11 -0500
Subject: perf mmap: Discard legacy interfaces for mmap read forward

Discards legacy interfaces perf_evlist__mmap_read_forward(),
perf_evlist__mmap_read() and perf_evlist__mmap_consume().

No tools use them.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1519945751-37786-14-git-send-email-kan.liang@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/evlist.c | 25 +------------------------
 tools/perf/util/evlist.h |  4 ----
 tools/perf/util/mmap.c   | 21 +--------------------
 3 files changed, 2 insertions(+), 48 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 7b7d535396f7..41a4666f1519 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -702,29 +702,6 @@ static int perf_evlist__resume(struct perf_evlist *evlist)
 	return perf_evlist__set_paused(evlist, false);
 }
 
-union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int idx)
-{
-	struct perf_mmap *md = &evlist->mmap[idx];
-
-	/*
-	 * Check messup is required for forward overwritable ring buffer:
-	 * memory pointed by md->prev can be overwritten in this case.
-	 * No need for read-write ring buffer: kernel stop outputting when
-	 * it hit md->prev (perf_mmap__consume()).
-	 */
-	return perf_mmap__read_forward(md);
-}
-
-union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
-{
-	return perf_evlist__mmap_read_forward(evlist, idx);
-}
-
-void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx)
-{
-	perf_mmap__consume(&evlist->mmap[idx], false);
-}
-
 static void perf_evlist__munmap_nofree(struct perf_evlist *evlist)
 {
 	int i;
@@ -761,7 +738,7 @@ static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist)
 		map[i].fd = -1;
 		/*
 		 * When the perf_mmap() call is made we grab one refcount, plus
-		 * one extra to let perf_evlist__mmap_consume() get the last
+		 * one extra to let perf_mmap__consume() get the last
 		 * events after all real references (perf_mmap__get()) are
 		 * dropped.
 		 *
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 336b838e6957..6c41b2f78713 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -129,10 +129,6 @@ struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id);
 
 void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist, enum bkw_mmap_state state);
 
-union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx);
-
-union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist,
-						 int idx);
 void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx);
 
 int perf_evlist__open(struct perf_evlist *evlist);
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 91531a7c8fbf..4f27c464ce0b 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -63,25 +63,6 @@ static union perf_event *perf_mmap__read(struct perf_mmap *map,
 	return event;
 }
 
-/*
- * legacy interface for mmap read.
- * Don't use it. Use perf_mmap__read_event().
- */
-union perf_event *perf_mmap__read_forward(struct perf_mmap *map)
-{
-	u64 head;
-
-	/*
-	 * Check if event was unmapped due to a POLLHUP/POLLERR.
-	 */
-	if (!refcount_read(&map->refcnt))
-		return NULL;
-
-	head = perf_mmap__read_head(map);
-
-	return perf_mmap__read(map, &map->prev, head);
-}
-
 /*
  * Read event from ring buffer one by one.
  * Return one event for each call.
@@ -191,7 +172,7 @@ void perf_mmap__munmap(struct perf_mmap *map)
 int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd)
 {
 	/*
-	 * The last one will be done at perf_evlist__mmap_consume(), so that we
+	 * The last one will be done at perf_mmap__consume(), so that we
 	 * make sure we don't prevent tools from consuming every last event in
 	 * the ring buffer.
 	 *
-- 
cgit v1.2.3


From a6adc9bdf59c77ea4efb844c27f25361f0399586 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 6 Mar 2018 09:31:53 -0300
Subject: perf cgroup: Remove misplaced __maybe_unused

The 'opt' parameter in parse_cgroups() _is_ used. The original patch
used '__used' that was even more confusing :-)

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Wang Nan <wangnan0@huawei.com>
Fixes: 023695d96ee0 ("perf tool: Add cgroup support")
Link: https://lkml.kernel.org/n/tip-4jo2puz0empkoou6bbq460tl@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/cgroup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index 5dd9b5ea314d..2e2aa6c86107 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -153,7 +153,7 @@ void close_cgroup(struct cgroup_sel *cgrp)
 	}
 }
 
-int parse_cgroups(const struct option *opt __maybe_unused, const char *str,
+int parse_cgroups(const struct option *opt, const char *str,
 		  int unset __maybe_unused)
 {
 	struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
-- 
cgit v1.2.3


From 3ca32f69592be4d667c701763335fa496a0ede1d Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 6 Mar 2018 09:51:48 -0300
Subject: perf cgroup: Rename 'struct cgroup_sel' to 'struct cgroup'

That name isn't used, is shorter, lets switch to it.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-e51yphwgvepd1y4f5fjptmjq@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/cgroup.c | 6 +++---
 tools/perf/util/cgroup.h | 5 +++--
 tools/perf/util/evsel.h  | 4 ++--
 3 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index 2e2aa6c86107..6ce291f45f2d 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -93,7 +93,7 @@ static int open_cgroup(char *name)
 static int add_cgroup(struct perf_evlist *evlist, char *str)
 {
 	struct perf_evsel *counter;
-	struct cgroup_sel *cgrp = NULL;
+	struct cgroup *cgrp = NULL;
 	int n;
 	/*
 	 * check if cgrp is already defined, if so we reuse it
@@ -144,7 +144,7 @@ found:
 	return 0;
 }
 
-void close_cgroup(struct cgroup_sel *cgrp)
+void close_cgroup(struct cgroup *cgrp)
 {
 	if (cgrp && refcount_dec_and_test(&cgrp->refcnt)) {
 		close(cgrp->fd);
@@ -158,7 +158,7 @@ int parse_cgroups(const struct option *opt, const char *str,
 {
 	struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
 	struct perf_evsel *counter;
-	struct cgroup_sel *cgrp = NULL;
+	struct cgroup *cgrp = NULL;
 	const char *p, *e, *eos = str + strlen(str);
 	char *s;
 	int ret, i;
diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h
index afafc87e9201..934daa8e4c19 100644
--- a/tools/perf/util/cgroup.h
+++ b/tools/perf/util/cgroup.h
@@ -6,7 +6,7 @@
 
 struct option;
 
-struct cgroup_sel {
+struct cgroup {
 	char *name;
 	int fd;
 	refcount_t refcnt;
@@ -14,7 +14,8 @@ struct cgroup_sel {
 
 
 extern int nr_cgroups; /* number of explicit cgroups defined */
-void close_cgroup(struct cgroup_sel *cgrp);
+void close_cgroup(struct cgroup *cgrp);
+
 int parse_cgroups(const struct option *opt, const char *str, int unset);
 
 #endif /* __CGROUP_H__ */
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index a7487c6d1866..92ba001b627f 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -30,7 +30,7 @@ struct perf_sample_id {
 	u64			period;
 };
 
-struct cgroup_sel;
+struct cgroup;
 
 /*
  * The 'struct perf_evsel_config_term' is used to pass event
@@ -107,7 +107,7 @@ struct perf_evsel {
 	struct perf_stat_evsel  *stats;
 	void			*priv;
 	u64			db_id;
-	struct cgroup_sel	*cgrp;
+	struct cgroup		*cgrp;
 	void			*handler;
 	struct cpu_map		*cpus;
 	struct cpu_map		*own_cpus;
-- 
cgit v1.2.3


From 9450d0d46c895cae99e922145aca20bd30533bc6 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 6 Mar 2018 10:08:10 -0300
Subject: perf cgroup: Introduce cgroup__delete()

Just to make this code look more like other places in tools/perf.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-j3j72vvn2d5j7tenlghdy195@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/cgroup.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index 6ce291f45f2d..4dd52feb1ae6 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -144,12 +144,17 @@ found:
 	return 0;
 }
 
+static void cgroup__delete(struct cgroup *cgroup)
+{
+	close(cgroup->fd);
+	zfree(&cgroup->name);
+	free(cgroup);
+}
+
 void close_cgroup(struct cgroup *cgrp)
 {
 	if (cgrp && refcount_dec_and_test(&cgrp->refcnt)) {
-		close(cgrp->fd);
-		zfree(&cgrp->name);
-		free(cgrp);
+		cgroup__delete(cgrp);
 	}
 }
 
-- 
cgit v1.2.3


From a53b646030ee6f65accdc49e772823b8134a37f6 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 6 Mar 2018 10:10:45 -0300
Subject: perf cgroup: Rename close_cgroup() to cgroup__put()

It is not really closing the cgroup, but instead dropping a reference
count and if it hits zero, then calling delete, which will, among other
cleanup shores, close the cgroup fd.

So it is really dropping a reference to that cgroup, and the method name
for that is "put", so rename close_cgroup() to cgroup__put() to follow
this naming convention.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-sccxpnd7bgwc1llgokt6fcey@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/cgroup.c | 5 ++---
 tools/perf/util/cgroup.h | 3 ++-
 tools/perf/util/evsel.c  | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index 4dd52feb1ae6..b078d54d4245 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -135,9 +135,8 @@ static int add_cgroup(struct perf_evlist *evlist, char *str)
 			goto found;
 		n++;
 	}
-	if (refcount_dec_and_test(&cgrp->refcnt))
-		free(cgrp);
 
+	cgroup__put(cgrp);
 	return -1;
 found:
 	counter->cgrp = cgrp;
@@ -151,7 +150,7 @@ static void cgroup__delete(struct cgroup *cgroup)
 	free(cgroup);
 }
 
-void close_cgroup(struct cgroup *cgrp)
+void cgroup__put(struct cgroup *cgrp)
 {
 	if (cgrp && refcount_dec_and_test(&cgrp->refcnt)) {
 		cgroup__delete(cgrp);
diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h
index 934daa8e4c19..69169fbf8d13 100644
--- a/tools/perf/util/cgroup.h
+++ b/tools/perf/util/cgroup.h
@@ -14,7 +14,8 @@ struct cgroup {
 
 
 extern int nr_cgroups; /* number of explicit cgroups defined */
-void close_cgroup(struct cgroup *cgrp);
+
+void cgroup__put(struct cgroup *cgroup);
 
 int parse_cgroups(const struct option *opt, const char *str, int unset);
 
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index b56e1c2ddaee..f1f883bb41a8 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1233,7 +1233,7 @@ void perf_evsel__exit(struct perf_evsel *evsel)
 	perf_evsel__free_fd(evsel);
 	perf_evsel__free_id(evsel);
 	perf_evsel__free_config_terms(evsel);
-	close_cgroup(evsel->cgrp);
+	cgroup__put(evsel->cgrp);
 	cpu_map__put(evsel->cpus);
 	cpu_map__put(evsel->own_cpus);
 	thread_map__put(evsel->threads);
-- 
cgit v1.2.3


From fc9ffb9cf085c78f49d4d1a602d723504c0d9c75 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 6 Mar 2018 10:18:40 -0300
Subject: perf cgroup: Introduce cgroup__get()

The refcount operation counterpart to cgroup__put(), use it when reusing
a cgroup.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-14ynvrl7y2cz8gyuy5q5v41g@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/cgroup.c | 16 ++++++++++------
 tools/perf/util/cgroup.h |  1 +
 2 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index b078d54d4245..8ea964461eb7 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -99,15 +99,12 @@ static int add_cgroup(struct perf_evlist *evlist, char *str)
 	 * check if cgrp is already defined, if so we reuse it
 	 */
 	evlist__for_each_entry(evlist, counter) {
-		cgrp = counter->cgrp;
-		if (!cgrp)
+		if (!counter->cgrp)
 			continue;
-		if (!strcmp(cgrp->name, str)) {
-			refcount_inc(&cgrp->refcnt);
+		if (!strcmp(counter->cgrp->name, str)) {
+			cgrp = cgroup__get(counter->cgrp);
 			break;
 		}
-
-		cgrp = NULL;
 	}
 
 	if (!cgrp) {
@@ -157,6 +154,13 @@ void cgroup__put(struct cgroup *cgrp)
 	}
 }
 
+struct cgroup *cgroup__get(struct cgroup *cgroup)
+{
+       if (cgroup)
+		refcount_inc(&cgroup->refcnt);
+       return cgroup;
+}
+
 int parse_cgroups(const struct option *opt, const char *str,
 		  int unset __maybe_unused)
 {
diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h
index 69169fbf8d13..34a6c11543dc 100644
--- a/tools/perf/util/cgroup.h
+++ b/tools/perf/util/cgroup.h
@@ -15,6 +15,7 @@ struct cgroup {
 
 extern int nr_cgroups; /* number of explicit cgroups defined */
 
+struct cgroup *cgroup__get(struct cgroup *cgroup);
 void cgroup__put(struct cgroup *cgroup);
 
 int parse_cgroups(const struct option *opt, const char *str, int unset);
-- 
cgit v1.2.3


From b80271f76ac5fe498f8fb701b2745dcd841627cc Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 6 Mar 2018 10:27:00 -0300
Subject: perf cgroup: Introduce find_cgroup() method

To break down complexity in add_cgroup().

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-5yqshcf5hm837n7c86u7lhjf@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/cgroup.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index 8ea964461eb7..84dfc34a6d0f 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -90,11 +90,10 @@ static int open_cgroup(char *name)
 	return fd;
 }
 
-static int add_cgroup(struct perf_evlist *evlist, char *str)
+static struct cgroup *evlist__find_cgroup(struct perf_evlist *evlist, char *str)
 {
 	struct perf_evsel *counter;
 	struct cgroup *cgrp = NULL;
-	int n;
 	/*
 	 * check if cgrp is already defined, if so we reuse it
 	 */
@@ -107,6 +106,15 @@ static int add_cgroup(struct perf_evlist *evlist, char *str)
 		}
 	}
 
+	return cgrp;
+}
+
+static int add_cgroup(struct perf_evlist *evlist, char *str)
+{
+	struct perf_evsel *counter;
+	struct cgroup *cgrp = evlist__find_cgroup(evlist, str);
+	int n;
+
 	if (!cgrp) {
 		cgrp = zalloc(sizeof(*cgrp));
 		if (!cgrp)
-- 
cgit v1.2.3


From 923a0fb332f8ee49b063df07129b2686f78ec9c3 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 6 Mar 2018 10:33:04 -0300
Subject: perf cgroup: Introduce cgroup__new() out of open coded equivalent

To follow the namespacing convention in tools/perf.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-jaalyl6bkvvji4r5u8wqw4n4@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/cgroup.c | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index 84dfc34a6d0f..26a837037797 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -109,6 +109,25 @@ static struct cgroup *evlist__find_cgroup(struct perf_evlist *evlist, char *str)
 	return cgrp;
 }
 
+static struct cgroup *cgroup__new(char *name)
+{
+	struct cgroup *cgroup = zalloc(sizeof(*cgroup));
+
+	if (cgroup != NULL) {
+		cgroup->name = name;
+		refcount_set(&cgroup->refcnt, 1);
+
+		cgroup->fd = open_cgroup(name);
+		if (cgroup->fd == -1)
+			goto out_err;
+	}
+
+	return cgroup;
+out_err:
+	free(cgroup);
+	return NULL;
+}
+
 static int add_cgroup(struct perf_evlist *evlist, char *str)
 {
 	struct perf_evsel *counter;
@@ -116,18 +135,9 @@ static int add_cgroup(struct perf_evlist *evlist, char *str)
 	int n;
 
 	if (!cgrp) {
-		cgrp = zalloc(sizeof(*cgrp));
+		cgrp = cgroup__new(str);
 		if (!cgrp)
 			return -1;
-
-		cgrp->name = str;
-		refcount_set(&cgrp->refcnt, 1);
-
-		cgrp->fd = open_cgroup(str);
-		if (cgrp->fd == -1) {
-			free(cgrp);
-			return -1;
-		}
 	}
 
 	/*
-- 
cgit v1.2.3


From 8640da9f4fea88c8fbb44ff63fde4000203cb7d1 Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@intel.com>
Date: Tue, 6 Mar 2018 11:37:36 +0800
Subject: perf sched: Move thread::shortname to thread_runtime

The thread::shortname only used by sched command, so move it to sched
private structure.

Signed-off-by: Changbin Du <changbin.du@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1520307457-23668-2-git-send-email-changbin.du@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-sched.c | 95 +++++++++++++++++++++++++++-------------------
 tools/perf/util/thread.h   |  1 -
 2 files changed, 55 insertions(+), 41 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 83283fedb00f..0a632a6b6228 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -254,6 +254,8 @@ struct thread_runtime {
 	u64 total_delay_time;
 
 	int last_state;
+
+	char shortname[3];
 	u64 migrations;
 };
 
@@ -897,6 +899,37 @@ struct sort_dimension {
 	struct list_head	list;
 };
 
+/*
+ * handle runtime stats saved per thread
+ */
+static struct thread_runtime *thread__init_runtime(struct thread *thread)
+{
+	struct thread_runtime *r;
+
+	r = zalloc(sizeof(struct thread_runtime));
+	if (!r)
+		return NULL;
+
+	init_stats(&r->run_stats);
+	thread__set_priv(thread, r);
+
+	return r;
+}
+
+static struct thread_runtime *thread__get_runtime(struct thread *thread)
+{
+	struct thread_runtime *tr;
+
+	tr = thread__priv(thread);
+	if (tr == NULL) {
+		tr = thread__init_runtime(thread);
+		if (tr == NULL)
+			pr_debug("Failed to malloc memory for runtime data.\n");
+	}
+
+	return tr;
+}
+
 static int
 thread_lat_cmp(struct list_head *list, struct work_atoms *l, struct work_atoms *r)
 {
@@ -1480,6 +1513,7 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
 {
 	const u32 next_pid = perf_evsel__intval(evsel, sample, "next_pid");
 	struct thread *sched_in;
+	struct thread_runtime *tr;
 	int new_shortname;
 	u64 timestamp0, timestamp = sample->time;
 	s64 delta;
@@ -1519,22 +1553,28 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
 	if (sched_in == NULL)
 		return -1;
 
+	tr = thread__get_runtime(sched_in);
+	if (tr == NULL) {
+		thread__put(sched_in);
+		return -1;
+	}
+
 	sched->curr_thread[this_cpu] = thread__get(sched_in);
 
 	printf("  ");
 
 	new_shortname = 0;
-	if (!sched_in->shortname[0]) {
+	if (!tr->shortname[0]) {
 		if (!strcmp(thread__comm_str(sched_in), "swapper")) {
 			/*
 			 * Don't allocate a letter-number for swapper:0
 			 * as a shortname. Instead, we use '.' for it.
 			 */
-			sched_in->shortname[0] = '.';
-			sched_in->shortname[1] = ' ';
+			tr->shortname[0] = '.';
+			tr->shortname[1] = ' ';
 		} else {
-			sched_in->shortname[0] = sched->next_shortname1;
-			sched_in->shortname[1] = sched->next_shortname2;
+			tr->shortname[0] = sched->next_shortname1;
+			tr->shortname[1] = sched->next_shortname2;
 
 			if (sched->next_shortname1 < 'Z') {
 				sched->next_shortname1++;
@@ -1552,6 +1592,7 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
 	for (i = 0; i < cpus_nr; i++) {
 		int cpu = sched->map.comp ? sched->map.comp_cpus[i] : i;
 		struct thread *curr_thread = sched->curr_thread[cpu];
+		struct thread_runtime *curr_tr;
 		const char *pid_color = color;
 		const char *cpu_color = color;
 
@@ -1569,9 +1610,14 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
 		else
 			color_fprintf(stdout, cpu_color, "*");
 
-		if (sched->curr_thread[cpu])
-			color_fprintf(stdout, pid_color, "%2s ", sched->curr_thread[cpu]->shortname);
-		else
+		if (sched->curr_thread[cpu]) {
+			curr_tr = thread__get_runtime(sched->curr_thread[cpu]);
+			if (curr_tr == NULL) {
+				thread__put(sched_in);
+				return -1;
+			}
+			color_fprintf(stdout, pid_color, "%2s ", curr_tr->shortname);
+		} else
 			color_fprintf(stdout, color, "   ");
 	}
 
@@ -1587,7 +1633,7 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
 			pid_color = COLOR_PIDS;
 
 		color_fprintf(stdout, pid_color, "%s => %s:%d",
-		       sched_in->shortname, thread__comm_str(sched_in), sched_in->tid);
+		       tr->shortname, thread__comm_str(sched_in), sched_in->tid);
 	}
 
 	if (sched->map.comp && new_cpu)
@@ -2200,37 +2246,6 @@ static void save_idle_callchain(struct idle_thread_runtime *itr,
 	callchain_cursor__copy(&itr->cursor, &callchain_cursor);
 }
 
-/*
- * handle runtime stats saved per thread
- */
-static struct thread_runtime *thread__init_runtime(struct thread *thread)
-{
-	struct thread_runtime *r;
-
-	r = zalloc(sizeof(struct thread_runtime));
-	if (!r)
-		return NULL;
-
-	init_stats(&r->run_stats);
-	thread__set_priv(thread, r);
-
-	return r;
-}
-
-static struct thread_runtime *thread__get_runtime(struct thread *thread)
-{
-	struct thread_runtime *tr;
-
-	tr = thread__priv(thread);
-	if (tr == NULL) {
-		tr = thread__init_runtime(thread);
-		if (tr == NULL)
-			pr_debug("Failed to malloc memory for runtime data.\n");
-	}
-
-	return tr;
-}
-
 static struct thread *timehist_get_thread(struct perf_sched *sched,
 					  struct perf_sample *sample,
 					  struct machine *machine,
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 40cfa36c022a..14d44c3235b8 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -26,7 +26,6 @@ struct thread {
 	pid_t			ppid;
 	int			cpu;
 	refcount_t		refcnt;
-	char			shortname[3];
 	bool			comm_set;
 	int			comm_len;
 	bool			dead; /* if set thread has exited */
-- 
cgit v1.2.3


From 99a3c3a91382a7e5e841a98467a8409b47b540f0 Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@intel.com>
Date: Tue, 6 Mar 2018 11:37:37 +0800
Subject: perf sched map: Re-annotate shortname if thread comm changed

This is to show the real name of thread that created via fork-exec.  See
below example for shortname *A0*.

$ sudo ./perf sched map
              *A0   80393.050639 secs A0 => perf:22368
          *.   A0   80393.050748 secs .  => swapper:0
           .  *.    80393.050887 secs
      *B0  .   .    80393.052735 secs B0 => rcu_sched:8
      *.   .   .    80393.052743 secs
       .  *C0  .    80393.056264 secs C0 => kworker/2:1H:287
       .  *A0  .    80393.056270 secs
       .  *D0  .    80393.056769 secs D0 => ksoftirqd/2:22
-      .  *A0  .    80393.056804 secs
+      .  *A0  .    80393.056804 secs A0 => pi:22368
       .  *.   .    80393.056854 secs
      *B0  .   .    80393.060727 secs
      ...

Signed-off-by: Changbin Du <changbin.du@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1520307457-23668-3-git-send-email-changbin.du@intel.com
[ Optimally pack struct thread_runtime when adding the new bool member ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-sched.c | 38 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 36 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 0a632a6b6228..4dfdee668b0c 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -256,6 +256,8 @@ struct thread_runtime {
 	int last_state;
 
 	char shortname[3];
+	bool comm_changed;
+
 	u64 migrations;
 };
 
@@ -1626,7 +1628,7 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
 
 	timestamp__scnprintf_usec(timestamp, stimestamp, sizeof(stimestamp));
 	color_fprintf(stdout, color, "  %12s secs ", stimestamp);
-	if (new_shortname || (verbose > 0 && sched_in->tid)) {
+	if (new_shortname || tr->comm_changed || (verbose > 0 && sched_in->tid)) {
 		const char *pid_color = color;
 
 		if (thread__has_color(sched_in))
@@ -1634,6 +1636,7 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
 
 		color_fprintf(stdout, pid_color, "%s => %s:%d",
 		       tr->shortname, thread__comm_str(sched_in), sched_in->tid);
+		tr->comm_changed = false;
 	}
 
 	if (sched->map.comp && new_cpu)
@@ -1737,6 +1740,37 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __maybe_
 	return err;
 }
 
+static int perf_sched__process_comm(struct perf_tool *tool __maybe_unused,
+				    union perf_event *event,
+				    struct perf_sample *sample,
+				    struct machine *machine)
+{
+	struct thread *thread;
+	struct thread_runtime *tr;
+	int err;
+
+	err = perf_event__process_comm(tool, event, sample, machine);
+	if (err)
+		return err;
+
+	thread = machine__find_thread(machine, sample->pid, sample->tid);
+	if (!thread) {
+		pr_err("Internal error: can't find thread\n");
+		return -1;
+	}
+
+	tr = thread__get_runtime(thread);
+	if (tr == NULL) {
+		thread__put(thread);
+		return -1;
+	}
+
+	tr->comm_changed = true;
+	thread__put(thread);
+
+	return 0;
+}
+
 static int perf_sched__read_events(struct perf_sched *sched)
 {
 	const struct perf_evsel_str_handler handlers[] = {
@@ -3306,7 +3340,7 @@ int cmd_sched(int argc, const char **argv)
 	struct perf_sched sched = {
 		.tool = {
 			.sample		 = perf_sched__process_tracepoint_sample,
-			.comm		 = perf_event__process_comm,
+			.comm		 = perf_sched__process_comm,
 			.namespaces	 = perf_event__process_namespaces,
 			.lost		 = perf_event__process_lost,
 			.fork		 = perf_sched__process_fork_event,
-- 
cgit v1.2.3


From 4b5ea3bd674f707180b0e8c4f514d30e49f293b3 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Tue, 6 Mar 2018 11:13:12 +0200
Subject: perf record: Combine some auxtrace initialization into a single
 function

In preparation for adding AUX area sampling support, combine some
auxtrace initialization into a single function.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/1520327598-1317-2-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-record.c | 36 ++++++++++++++++++++++++------------
 1 file changed, 24 insertions(+), 12 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 12230ddb6506..14d82f0fe5cc 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -274,6 +274,24 @@ static void record__read_auxtrace_snapshot(struct record *rec)
 	}
 }
 
+static int record__auxtrace_init(struct record *rec)
+{
+	int err;
+
+	if (!rec->itr) {
+		rec->itr = auxtrace_record__init(rec->evlist, &err);
+		if (err)
+			return err;
+	}
+
+	err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
+					      rec->opts.auxtrace_snapshot_opts);
+	if (err)
+		return err;
+
+	return auxtrace_parse_filters(rec->evlist);
+}
+
 #else
 
 static inline
@@ -294,6 +312,11 @@ int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
 	return 0;
 }
 
+static int record__auxtrace_init(struct record *rec __maybe_unused)
+{
+	return 0;
+}
+
 #endif
 
 static int record__mmap_evlist(struct record *rec,
@@ -1727,17 +1750,6 @@ int cmd_record(int argc, const char **argv)
 		alarm(rec->switch_output.time);
 	}
 
-	if (!rec->itr) {
-		rec->itr = auxtrace_record__init(rec->evlist, &err);
-		if (err)
-			goto out;
-	}
-
-	err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
-					      rec->opts.auxtrace_snapshot_opts);
-	if (err)
-		goto out;
-
 	/*
 	 * Allow aliases to facilitate the lookup of symbols for address
 	 * filters. Refer to auxtrace_parse_filters().
@@ -1746,7 +1758,7 @@ int cmd_record(int argc, const char **argv)
 
 	symbol__init(NULL);
 
-	err = auxtrace_parse_filters(rec->evlist);
+	err = record__auxtrace_init(rec);
 	if (err)
 		goto out;
 
-- 
cgit v1.2.3


From 69239ec81d596c57e0cf2d2f26cfe92e8cbd0929 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 6 Mar 2018 15:27:08 -0300
Subject: perf cgroup: Add evlist__findnew_cgroup()

Similar to machine__findnew_thread(), etc, i.e. try to find, get a
refcount if found and return it, otherwise return a new cgroup object.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-im1omevlihhyneiic4nl3g24@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/cgroup.c | 17 ++++++++++-------
 tools/perf/util/cgroup.h |  4 ++++
 2 files changed, 14 insertions(+), 7 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index 26a837037797..6fd33ffc2488 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -128,18 +128,21 @@ out_err:
 	return NULL;
 }
 
+struct cgroup *evlist__findnew_cgroup(struct perf_evlist *evlist, char *name)
+{
+	struct cgroup *cgroup = evlist__find_cgroup(evlist, name);
+
+	return cgroup ?: cgroup__new(name);
+}
+
 static int add_cgroup(struct perf_evlist *evlist, char *str)
 {
 	struct perf_evsel *counter;
-	struct cgroup *cgrp = evlist__find_cgroup(evlist, str);
+	struct cgroup *cgrp = evlist__findnew_cgroup(evlist, str);
 	int n;
 
-	if (!cgrp) {
-		cgrp = cgroup__new(str);
-		if (!cgrp)
-			return -1;
-	}
-
+	if (!cgrp)
+		return -1;
 	/*
 	 * find corresponding event
 	 * if add cgroup N, then need to find event N
diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h
index 34a6c11543dc..0e377e6340dd 100644
--- a/tools/perf/util/cgroup.h
+++ b/tools/perf/util/cgroup.h
@@ -18,6 +18,10 @@ extern int nr_cgroups; /* number of explicit cgroups defined */
 struct cgroup *cgroup__get(struct cgroup *cgroup);
 void cgroup__put(struct cgroup *cgroup);
 
+struct perf_evlist;
+
+struct cgroup *evlist__findnew_cgroup(struct perf_evlist *evlist, char *name);
+
 int parse_cgroups(const struct option *opt, const char *str, int unset);
 
 #endif /* __CGROUP_H__ */
-- 
cgit v1.2.3


From 483322dda03a7ad807e82f9c25fac315a4eee1c5 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 6 Mar 2018 15:48:10 -0300
Subject: perf cgroup: Add evlist__add_default_cgroup()

So that tools like 'perf trace' can allow the user to set a cgroup
to be used for all the evsels still without a crgroup setup by
parse_cgroups(), such as the one to use for the syscalls, vfs_getname
and other events involved in strace like syscall tracing.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-zf9jjsbj661r3lk6qb7g8j70@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/cgroup.c | 14 ++++++++++++++
 tools/perf/util/cgroup.h |  2 ++
 2 files changed, 16 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index 6fd33ffc2488..d8cd0e601c56 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -182,6 +182,20 @@ struct cgroup *cgroup__get(struct cgroup *cgroup)
        return cgroup;
 }
 
+static void evsel__set_default_cgroup(struct perf_evsel *evsel, struct cgroup *cgroup)
+{
+	if (evsel->cgrp == NULL)
+		evsel->cgrp = cgroup__get(cgroup);
+}
+
+void evlist__set_default_cgroup(struct perf_evlist *evlist, struct cgroup *cgroup)
+{
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel)
+		evsel__set_default_cgroup(evsel, cgroup);
+}
+
 int parse_cgroups(const struct option *opt, const char *str,
 		  int unset __maybe_unused)
 {
diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h
index 0e377e6340dd..b213f5e9a3ed 100644
--- a/tools/perf/util/cgroup.h
+++ b/tools/perf/util/cgroup.h
@@ -22,6 +22,8 @@ struct perf_evlist;
 
 struct cgroup *evlist__findnew_cgroup(struct perf_evlist *evlist, char *name);
 
+void evlist__set_default_cgroup(struct perf_evlist *evlist, struct cgroup *cgroup);
+
 int parse_cgroups(const struct option *opt, const char *str, int unset);
 
 #endif /* __CGROUP_H__ */
-- 
cgit v1.2.3


From 3b5692864da3a8dec95d8c757147f436d19f8ff7 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 6 Mar 2018 15:53:22 -0300
Subject: perf cgroup: Make the cgroup name be const char *

The usual thing is for a constructor to allocate space for its members,
not to require that the caller pass a pre-allocated 'name' and then, at
its destructor, to free something not allocated by it.

Fix it by making cgroup__new() to receive a const char pointer, then
allocate cgroup->name that then can continue to be freed at
cgroup__delete(), balancing the alloc/free operations inside the cgroup
struct methods.

This eases calling evlist__findnew_cgroup() from the custom 'perf trace'
cgroup parser, that will only call parse_cgroups() when the '-G cgroup'
is passed on the command line after '-e event' entries, when it'll
behave just like 'perf stat' and 'perf record', i.e. the previous
parse_cgroup() users that mandate that -G only can come after a -e.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-4leugnuyqi10t98990o3xi1t@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/cgroup.c | 24 ++++++++++++++----------
 tools/perf/util/cgroup.h |  2 +-
 2 files changed, 15 insertions(+), 11 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index d8cd0e601c56..78408f5c4bad 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -71,7 +71,7 @@ cgroupfs_find_mountpoint(char *buf, size_t maxlen)
 	return -1;
 }
 
-static int open_cgroup(char *name)
+static int open_cgroup(const char *name)
 {
 	char path[PATH_MAX + 1];
 	char mnt[PATH_MAX + 1];
@@ -90,7 +90,7 @@ static int open_cgroup(char *name)
 	return fd;
 }
 
-static struct cgroup *evlist__find_cgroup(struct perf_evlist *evlist, char *str)
+static struct cgroup *evlist__find_cgroup(struct perf_evlist *evlist, const char *str)
 {
 	struct perf_evsel *counter;
 	struct cgroup *cgrp = NULL;
@@ -109,33 +109,38 @@ static struct cgroup *evlist__find_cgroup(struct perf_evlist *evlist, char *str)
 	return cgrp;
 }
 
-static struct cgroup *cgroup__new(char *name)
+static struct cgroup *cgroup__new(const char *name)
 {
 	struct cgroup *cgroup = zalloc(sizeof(*cgroup));
 
 	if (cgroup != NULL) {
-		cgroup->name = name;
 		refcount_set(&cgroup->refcnt, 1);
 
+		cgroup->name = strdup(name);
+		if (!cgroup->name)
+			goto out_err;
 		cgroup->fd = open_cgroup(name);
 		if (cgroup->fd == -1)
-			goto out_err;
+			goto out_free_name;
 	}
 
 	return cgroup;
+
+out_free_name:
+	free(cgroup->name);
 out_err:
 	free(cgroup);
 	return NULL;
 }
 
-struct cgroup *evlist__findnew_cgroup(struct perf_evlist *evlist, char *name)
+struct cgroup *evlist__findnew_cgroup(struct perf_evlist *evlist, const char *name)
 {
 	struct cgroup *cgroup = evlist__find_cgroup(evlist, name);
 
 	return cgroup ?: cgroup__new(name);
 }
 
-static int add_cgroup(struct perf_evlist *evlist, char *str)
+static int add_cgroup(struct perf_evlist *evlist, const char *str)
 {
 	struct perf_evsel *counter;
 	struct cgroup *cgrp = evlist__findnew_cgroup(evlist, str);
@@ -222,10 +227,9 @@ int parse_cgroups(const struct option *opt, const char *str,
 			if (!s)
 				return -1;
 			ret = add_cgroup(evlist, s);
-			if (ret) {
-				free(s);
+			free(s);
+			if (ret)
 				return -1;
-			}
 		}
 		/* nr_cgroups is increased een for empty cgroups */
 		nr_cgroups++;
diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h
index b213f5e9a3ed..f033a80c1b14 100644
--- a/tools/perf/util/cgroup.h
+++ b/tools/perf/util/cgroup.h
@@ -20,7 +20,7 @@ void cgroup__put(struct cgroup *cgroup);
 
 struct perf_evlist;
 
-struct cgroup *evlist__findnew_cgroup(struct perf_evlist *evlist, char *name);
+struct cgroup *evlist__findnew_cgroup(struct perf_evlist *evlist, const char *name);
 
 void evlist__set_default_cgroup(struct perf_evlist *evlist, struct cgroup *cgroup);
 
-- 
cgit v1.2.3


From 9ea42ba4411ac502e83baa9c61bb037d0a7355a0 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 6 Mar 2018 16:30:51 -0300
Subject: perf trace: Support setting cgroups as targets

One can set a cgroup as a default cgroup to be used by all events or
set cgroups with the 'perf stat' and 'perf record' behaviour, i.e.
'-G A' will be the cgroup for events defined so far in the command line.

Here in my main machine, with a kvm instance running a rhel6 guinea pig
I have:

  # ls -la /sys/fs/cgroup/perf_event/ | grep drw
  drwxr-xr-x. 14 root root 360 Mar  6 12:04 ..
  drwxr-xr-x.  3 root root   0 Mar  6 15:05 machine.slice
  #

So I can go ahead and use that cgroup hierarchy, say lets see what
syscalls are being emitted by threads in that 'machine.slice' hierarchy
that are taking more than 100ms:

  # perf trace --duration 100 -G machine.slice
     0.188 (249.850 ms): CPU 0/KVM/23744 ioctl(fd: 16<anon_inode:kvm-vcpu:0>, cmd: KVM_RUN) = 0
   250.274 (249.743 ms): CPU 0/KVM/23744 ioctl(fd: 16<anon_inode:kvm-vcpu:0>, cmd: KVM_RUN) = 0
   500.224 (249.755 ms): CPU 0/KVM/23744 ioctl(fd: 16<anon_inode:kvm-vcpu:0>, cmd: KVM_RUN) = 0
   750.097 (249.934 ms): CPU 0/KVM/23744 ioctl(fd: 16<anon_inode:kvm-vcpu:0>, cmd: KVM_RUN) = 0
  1000.244 (249.780 ms): CPU 0/KVM/23744 ioctl(fd: 16<anon_inode:kvm-vcpu:0>, cmd: KVM_RUN) = 0
  1250.197 (249.796 ms): CPU 0/KVM/23744 ioctl(fd: 16<anon_inode:kvm-vcpu:0>, cmd: KVM_RUN) = 0
  1500.124 (249.859 ms): CPU 0/KVM/23744 ioctl(fd: 16<anon_inode:kvm-vcpu:0>, cmd: KVM_RUN) = 0
  1750.076 (172.900 ms): CPU 0/KVM/23744 ioctl(fd: 16<anon_inode:kvm-vcpu:0>, cmd: KVM_RUN) = 0
   902.570 (1021.116 ms): qemu-system-x8/23667 ppoll(ufds: 0x558151e03180, nfds: 74, tsp: 0x7ffc00cd0900, sigsetsize: 8) = 1
  1923.825 (305.133 ms): qemu-system-x8/23667 ppoll(ufds: 0x558151e03180, nfds: 74, tsp: 0x7ffc00cd0900, sigsetsize: 8) = 1
  2000.172 (229.002 ms): CPU 0/KVM/23744 ioctl(fd: 16<anon_inode:kvm-vcpu:0>, cmd: KVM_RUN) = 0
^C  #

If we look inside that cgroup hierarchy we get:

  # ls -la /sys/fs/cgroup/perf_event/machine.slice/ | grep drw
  drwxr-xr-x. 3 root root 0 Mar  6 15:05 .
  drwxr-xr-x. 2 root root 0 Mar  6 16:16 machine-qemu\x2d2\x2drhel6.sandy.scope
  #

There is just one, but lets say there were more and we would want to see
5 seconds worth of syscall summary for the threads in that cgroup:

  # perf trace --summary -G machine.slice/machine-qemu\\x2d2\\x2drhel6.sandy.scope/ -a sleep 5

   Summary of events:

     qemu-system-x86 (23667), 143858 events, 24.2%

     syscall            calls    total       min       avg       max      stddev
                                 (msec)    (msec)    (msec)    (msec)        (%)
     --------------- -------- --------- --------- --------- ---------     ------
     ppoll              28492  4348.631     0.000     0.153    11.616      1.05%
     futex              19661   140.801     0.001     0.007     2.993      3.20%
     read               18440    68.084     0.001     0.004     1.653      4.33%
     ioctl               5387    24.768     0.002     0.005     0.134      1.62%

     CPU 0/KVM (23744), 449455 events, 75.8%

     syscall            calls    total       min       avg       max      stddev
                               (msec)    (msec)    (msec)    (msec)        (%)
     --------------- -------- --------- --------- --------- ---------     ------
     ioctl             148364  3401.812     0.000     0.023    11.801      1.15%
     futex              36131   404.127     0.001     0.011     7.377      2.63%
     writev             29452   339.688     0.003     0.012     1.740      1.36%
     write              11315    45.992     0.001     0.004     0.105      1.10%

  #

See the documentation about how to set more than one cgroup for
different events in the same command line.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Richter <tmricht@linux.vnet.ibm.com>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-t126jh4occqvu0xdqlcjygex@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-trace.txt | 25 +++++++++++++++++
 tools/perf/builtin-trace.c              | 50 +++++++++++++++++++++++++++++++++
 2 files changed, 75 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index 33a88e984e66..5a7035c5c523 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -63,6 +63,31 @@ filter out the startup phase of the program, which is often very different.
 --uid=::
         Record events in threads owned by uid. Name or number.
 
+-G::
+--cgroup::
+	Record events in threads in a cgroup.
+
+	Look for cgroups to set at the /sys/fs/cgroup/perf_event directory, then
+	remove the /sys/fs/cgroup/perf_event/ part and try:
+
+		perf trace -G A -e sched:*switch
+
+	Will set all raw_syscalls:sys_{enter,exit}, pgfault, vfs_getname, etc
+	_and_ sched:sched_switch to the 'A' cgroup, while:
+
+		perf trace -e sched:*switch -G A
+
+	will only set the sched:sched_switch event to the 'A' cgroup, all the
+	other events (raw_syscalls:sys_{enter,exit}, etc are left "without"
+	a cgroup (on the root cgroup, sys wide, etc).
+
+	Multiple cgroups:
+
+		perf trace -G A -e sched:*switch -G B
+
+	the syscall ones go to the 'A' cgroup, the sched:sched_switch goes
+	to the 'B' cgroup.
+
 --filter-pids=::
 	Filter out events for these pids and for 'trace' itself (comma separated list).
 
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 1a93debc1e8d..5b81060a8117 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -19,6 +19,7 @@
 #include <traceevent/event-parse.h>
 #include <api/fs/tracing_path.h>
 #include "builtin.h"
+#include "util/cgroup.h"
 #include "util/color.h"
 #include "util/debug.h"
 #include "util/env.h"
@@ -83,6 +84,7 @@ struct trace {
 	struct perf_evlist	*evlist;
 	struct machine		*host;
 	struct thread		*current;
+	struct cgroup		*cgroup;
 	u64			base_time;
 	FILE			*output;
 	unsigned long		nr_events;
@@ -2370,6 +2372,34 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 				   trace__sched_stat_runtime))
 		goto out_error_sched_stat_runtime;
 
+	/*
+	 * If a global cgroup was set, apply it to all the events without an
+	 * explicit cgroup. I.e.:
+	 *
+	 * 	trace -G A -e sched:*switch
+	 *
+	 * Will set all raw_syscalls:sys_{enter,exit}, pgfault, vfs_getname, etc
+	 * _and_ sched:sched_switch to the 'A' cgroup, while:
+	 *
+	 * trace -e sched:*switch -G A
+	 *
+	 * will only set the sched:sched_switch event to the 'A' cgroup, all the
+	 * other events (raw_syscalls:sys_{enter,exit}, etc are left "without"
+	 * a cgroup (on the root cgroup, sys wide, etc).
+	 *
+	 * Multiple cgroups:
+	 *
+	 * trace -G A -e sched:*switch -G B
+	 *
+	 * the syscall ones go to the 'A' cgroup, the sched:sched_switch goes
+	 * to the 'B' cgroup.
+	 *
+	 * evlist__set_default_cgroup() grabs a reference of the passed cgroup
+	 * only for the evsels still without a cgroup, i.e. evsel->cgroup == NULL.
+	 */
+	if (trace->cgroup)
+		evlist__set_default_cgroup(trace->evlist, trace->cgroup);
+
 	err = perf_evlist__create_maps(evlist, &trace->opts.target);
 	if (err < 0) {
 		fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
@@ -2540,6 +2570,7 @@ out_delete_evlist:
 	trace__symbols__exit(trace);
 
 	perf_evlist__delete(evlist);
+	cgroup__put(trace->cgroup);
 	trace->evlist = NULL;
 	trace->live = false;
 	return err;
@@ -2979,6 +3010,18 @@ out:
 	return err;
 }
 
+static int trace__parse_cgroups(const struct option *opt, const char *str, int unset)
+{
+	struct trace *trace = opt->value;
+
+	if (!list_empty(&trace->evlist->entries))
+		return parse_cgroups(opt, str, unset);
+
+	trace->cgroup = evlist__findnew_cgroup(trace->evlist, str);
+
+	return 0;
+}
+
 int cmd_trace(int argc, const char **argv)
 {
 	const char *trace_usage[] = {
@@ -3069,6 +3112,8 @@ int cmd_trace(int argc, const char **argv)
 			"print the PERF_RECORD_SAMPLE PERF_SAMPLE_ info, for debugging"),
 	OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
 			"per thread proc mmap processing timeout in ms"),
+	OPT_CALLBACK('G', "cgroup", &trace, "name", "monitor event in cgroup name only",
+		     trace__parse_cgroups),
 	OPT_UINTEGER('D', "delay", &trace.opts.initial_delay,
 		     "ms to wait before starting measurement after program "
 		     "start"),
@@ -3095,6 +3140,11 @@ int cmd_trace(int argc, const char **argv)
 	argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
 				 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
 
+	if ((nr_cgroups || trace.cgroup) && !trace.opts.target.system_wide) {
+		usage_with_options_msg(trace_usage, trace_options,
+				       "cgroup monitoring only available in system-wide mode");
+	}
+
 	err = bpf__setup_stdout(trace.evlist);
 	if (err) {
 		bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
-- 
cgit v1.2.3


From b818ec613b42d679604215c188a12cf43164cab0 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Tue, 6 Mar 2018 11:13:14 +0200
Subject: perf auxtrace: Add missing parameters from kernel-doc comments

Add missing parameters from kernel-doc comments.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/1520327598-1317-4-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/auxtrace.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index 453c148d2158..e731f55da072 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -130,6 +130,7 @@ struct auxtrace_index {
 /**
  * struct auxtrace - session callbacks to allow AUX area data decoding.
  * @process_event: lets the decoder see all session events
+ * @process_auxtrace_event: process a PERF_RECORD_AUXTRACE event
  * @flush_events: process any remaining data
  * @free_events: free resources associated with event processing
  * @free: free resources associated with the session
@@ -301,6 +302,7 @@ struct auxtrace_mmap_params {
  * @parse_snapshot_options: parse snapshot options
  * @reference: provide a 64-bit reference number for auxtrace_event
  * @read_finish: called after reading from an auxtrace mmap
+ * @alignment: alignment (if any) for AUX area data
  */
 struct auxtrace_record {
 	int (*recording_options)(struct auxtrace_record *itr,
-- 
cgit v1.2.3


From a356a59799d376dd8e4839f29d32f4c6a5582e6d Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Tue, 6 Mar 2018 11:13:15 +0200
Subject: perf auxtrace: Rename some buffer-queuing functions

Rename some buffer-queuing functions in preparation for supporting AUX area
sampling buffers.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/1520327598-1317-5-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/auxtrace.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 6470ea2aa25e..6ea840ec5b7f 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -233,9 +233,9 @@ static void *auxtrace_copy_data(u64 size, struct perf_session *session)
 	return p;
 }
 
-static int auxtrace_queues__add_buffer(struct auxtrace_queues *queues,
-				       unsigned int idx,
-				       struct auxtrace_buffer *buffer)
+static int auxtrace_queues__queue_buffer(struct auxtrace_queues *queues,
+					 unsigned int idx,
+					 struct auxtrace_buffer *buffer)
 {
 	struct auxtrace_queue *queue;
 	int err;
@@ -286,7 +286,7 @@ static int auxtrace_queues__split_buffer(struct auxtrace_queues *queues,
 			return -ENOMEM;
 		b->size = BUFFER_LIMIT_FOR_32_BIT;
 		b->consecutive = consecutive;
-		err = auxtrace_queues__add_buffer(queues, idx, b);
+		err = auxtrace_queues__queue_buffer(queues, idx, b);
 		if (err) {
 			auxtrace_buffer__free(b);
 			return err;
@@ -302,10 +302,10 @@ static int auxtrace_queues__split_buffer(struct auxtrace_queues *queues,
 	return 0;
 }
 
-static int auxtrace_queues__add_event_buffer(struct auxtrace_queues *queues,
-					     struct perf_session *session,
-					     unsigned int idx,
-					     struct auxtrace_buffer *buffer)
+static int auxtrace_queues__add_buffer(struct auxtrace_queues *queues,
+				       struct perf_session *session,
+				       unsigned int idx,
+				       struct auxtrace_buffer *buffer)
 {
 	if (session->one_mmap) {
 		buffer->data = buffer->data_offset - session->one_mmap_offset +
@@ -324,7 +324,7 @@ static int auxtrace_queues__add_event_buffer(struct auxtrace_queues *queues,
 			return err;
 	}
 
-	return auxtrace_queues__add_buffer(queues, idx, buffer);
+	return auxtrace_queues__queue_buffer(queues, idx, buffer);
 }
 
 static bool filter_cpu(struct perf_session *session, int cpu)
@@ -359,7 +359,7 @@ int auxtrace_queues__add_event(struct auxtrace_queues *queues,
 	buffer->size = event->auxtrace.size;
 	idx = event->auxtrace.idx;
 
-	err = auxtrace_queues__add_event_buffer(queues, session, idx, buffer);
+	err = auxtrace_queues__add_buffer(queues, session, idx, buffer);
 	if (err)
 		goto out_err;
 
-- 
cgit v1.2.3


From 4c4548437c5722effcaf5b2dea85abd321935627 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Tue, 6 Mar 2018 11:13:16 +0200
Subject: perf auxtrace: Make auxtrace_queues__add_buffer() return buffer_ptr

In preparation for supporting AUX area sampling buffers,
auxtrace_queues__add_buffer() needs to be more generic. To that end, make
it return buffer_ptr instead of the caller.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/1520327598-1317-6-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/auxtrace.c | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 6ea840ec5b7f..fb357a00dd86 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -305,8 +305,11 @@ static int auxtrace_queues__split_buffer(struct auxtrace_queues *queues,
 static int auxtrace_queues__add_buffer(struct auxtrace_queues *queues,
 				       struct perf_session *session,
 				       unsigned int idx,
-				       struct auxtrace_buffer *buffer)
+				       struct auxtrace_buffer *buffer,
+				       struct auxtrace_buffer **buffer_ptr)
 {
+	int err;
+
 	if (session->one_mmap) {
 		buffer->data = buffer->data_offset - session->one_mmap_offset +
 			       session->one_mmap_addr;
@@ -317,14 +320,20 @@ static int auxtrace_queues__add_buffer(struct auxtrace_queues *queues,
 		buffer->data_needs_freeing = true;
 	} else if (BITS_PER_LONG == 32 &&
 		   buffer->size > BUFFER_LIMIT_FOR_32_BIT) {
-		int err;
-
 		err = auxtrace_queues__split_buffer(queues, idx, buffer);
 		if (err)
 			return err;
 	}
 
-	return auxtrace_queues__queue_buffer(queues, idx, buffer);
+	err = auxtrace_queues__queue_buffer(queues, idx, buffer);
+	if (err)
+		return err;
+
+	/* FIXME: Doesn't work for split buffer */
+	if (buffer_ptr)
+		*buffer_ptr = buffer;
+
+	return 0;
 }
 
 static bool filter_cpu(struct perf_session *session, int cpu)
@@ -359,13 +368,11 @@ int auxtrace_queues__add_event(struct auxtrace_queues *queues,
 	buffer->size = event->auxtrace.size;
 	idx = event->auxtrace.idx;
 
-	err = auxtrace_queues__add_buffer(queues, session, idx, buffer);
+	err = auxtrace_queues__add_buffer(queues, session, idx, buffer,
+					  buffer_ptr);
 	if (err)
 		goto out_err;
 
-	if (buffer_ptr)
-		*buffer_ptr = buffer;
-
 	return 0;
 
 out_err:
-- 
cgit v1.2.3


From ea66536ab2cac7ba1553a677b8114f1b120cd9f2 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 7 Mar 2018 11:54:41 +0100
Subject: perf tools: Correct title markers for asciidoctor

I've tested to process the perf man pages with asciidoctor that is
picker than asciidoc, and it revealed minor syntax errors in some
documents.  Namely, the title markers aren't aligned with the previous
line, hence asciidoctor didn't recognize as titles.

This patch corrects these markers to be processed properly.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180307105441.28512-1-tiwai@suse.de
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-data.txt        | 2 +-
 tools/perf/Documentation/perf-ftrace.txt      | 2 +-
 tools/perf/Documentation/perf-kallsyms.txt    | 2 +-
 tools/perf/Documentation/perf-sched.txt       | 2 +-
 tools/perf/Documentation/perf-script-perl.txt | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/Documentation/perf-data.txt b/tools/perf/Documentation/perf-data.txt
index 90bb4aabe4f8..c87180764829 100644
--- a/tools/perf/Documentation/perf-data.txt
+++ b/tools/perf/Documentation/perf-data.txt
@@ -1,5 +1,5 @@
 perf-data(1)
-==============
+============
 
 NAME
 ----
diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt
index 721a447f046e..b80c84307dc9 100644
--- a/tools/perf/Documentation/perf-ftrace.txt
+++ b/tools/perf/Documentation/perf-ftrace.txt
@@ -1,5 +1,5 @@
 perf-ftrace(1)
-=============
+==============
 
 NAME
 ----
diff --git a/tools/perf/Documentation/perf-kallsyms.txt b/tools/perf/Documentation/perf-kallsyms.txt
index cf9f4040ea5c..f3c620951f6e 100644
--- a/tools/perf/Documentation/perf-kallsyms.txt
+++ b/tools/perf/Documentation/perf-kallsyms.txt
@@ -1,5 +1,5 @@
 perf-kallsyms(1)
-==============
+================
 
 NAME
 ----
diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt
index c7e50f263887..bb33601a823b 100644
--- a/tools/perf/Documentation/perf-sched.txt
+++ b/tools/perf/Documentation/perf-sched.txt
@@ -1,5 +1,5 @@
 perf-sched(1)
-==============
+=============
 
 NAME
 ----
diff --git a/tools/perf/Documentation/perf-script-perl.txt b/tools/perf/Documentation/perf-script-perl.txt
index 142606c0ec9c..5a1f68122f50 100644
--- a/tools/perf/Documentation/perf-script-perl.txt
+++ b/tools/perf/Documentation/perf-script-perl.txt
@@ -1,5 +1,5 @@
 perf-script-perl(1)
-==================
+===================
 
 NAME
 ----
-- 
cgit v1.2.3


From b2b9d3a3f0211c5d08c7befdf9d4adad48cda315 Mon Sep 17 00:00:00 2001
From: Agustin Vega-Frias <agustinv@codeaurora.org>
Date: Tue, 6 Mar 2018 09:04:42 -0500
Subject: perf pmu: Support wildcards on pmu name in dynamic pmu events

Starting on v4.12 event parsing code for dynamic pmu events already
supports prefix-based matching of multiple pmus when creating dynamic
events. E.g., in a system with the following dynamic pmus:

    mypmu_0
    mypmu_1
    mypmu_2
    mypmu_4

passing mypmu/<config>/ as an event spec will result in the creation of
the event in all of the pmus. This change expands this matching through
the use of fnmatch so glob-like expressions can be used to create events
in multiple pmus. E.g., in the system described above if a user only
wants to create the event in mypmu_0 and mypmu_1, mypmu_[01]/<config>/
can be passed.

Signed-off-by: Agustin Vega-Frias <agustinv@codeaurora.org>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Timur Tabi <timur@codeaurora.org>
Change-Id: Icb25653fc5d5239c20f3bffdfdf4ab4c9c9bb20b
Link: http://lkml.kernel.org/r/1520454947-16977-1-git-send-email-agustinv@codeaurora.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-list.txt |  8 +++++++-
 tools/perf/Documentation/perf-stat.txt | 13 +++++++++++++
 tools/perf/util/parse-events.l         |  2 +-
 tools/perf/util/parse-events.y         | 14 ++++++++++++--
 4 files changed, 33 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index e2a897ae3596..2549c34a7895 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -141,7 +141,13 @@ on the first memory controller on socket 0 of a Intel Xeon system
 
 Each memory controller has its own PMU.  Measuring the complete system
 bandwidth would require specifying all imc PMUs (see perf list output),
-and adding the values together.
+and adding the values together. To simplify creation of multiple events,
+prefix and glob matching is supported in the PMU name, and the prefix
+'uncore_' is also ignored when performing the match. So the command above
+can be expanded to all memory controllers by using the syntaxes:
+
+  perf stat -C 0 -a imc/cas_count_read/,imc/cas_count_write/ -I 1000 ...
+  perf stat -C 0 -a *imc*/cas_count_read/,*imc*/cas_count_write/ -I 1000 ...
 
 This example measures the combined core power every second
 
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 2b38e222016a..628026dbedc5 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -49,6 +49,13 @@ report::
 	  parameters are defined by corresponding entries in
 	  /sys/bus/event_source/devices/<pmu>/format/*
 
+	Note that the last two syntaxes support prefix and glob matching in
+	the PMU name to simplify creation of events accross multiple instances
+	of the same type of PMU in large systems (e.g. memory controller PMUs).
+	Multiple PMU instances are typical for uncore PMUs, so the prefix
+	'uncore_' is also ignored when performing this match.
+
+
 -i::
 --no-inherit::
         child tasks do not inherit counters
@@ -260,6 +267,12 @@ taskset.
 --no-merge::
 Do not merge results from same PMUs.
 
+When multiple events are created from a single event alias, stat will,
+by default, aggregate the event counts and show the result in a single
+row. This option disables that behavior and shows the individual events
+and counts. Aliases are listed immediately after the Kernel PMU events
+by perf list.
+
 --smi-cost::
 Measure SMI cost if msr/aperf/ and msr/smi/ events are supported.
 
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 655ecff636a8..a1a01b1ac8b8 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -175,7 +175,7 @@ bpf_source	[^,{}]+\.c[a-zA-Z0-9._]*
 num_dec		[0-9]+
 num_hex		0x[a-fA-F0-9]+
 num_raw_hex	[a-fA-F0-9]+
-name		[a-zA-Z_*?][a-zA-Z0-9_*?.]*
+name		[a-zA-Z_*?\[\]][a-zA-Z0-9_*?.\[\]]*
 name_minus	[a-zA-Z_*?][a-zA-Z0-9\-_*?.:]*
 drv_cfg_term	[a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)?
 /* If you add a modifier you need to update check_modifier() */
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index e81a20ea8d7d..dedf184b5bed 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -8,6 +8,7 @@
 
 #define YYDEBUG 1
 
+#include <fnmatch.h>
 #include <linux/compiler.h>
 #include <linux/list.h>
 #include <linux/types.h>
@@ -234,6 +235,10 @@ PE_NAME opt_event_config
 	if (parse_events_add_pmu(_parse_state, list, $1, $2)) {
 		struct perf_pmu *pmu = NULL;
 		int ok = 0;
+		char *pattern;
+
+		if (asprintf(&pattern, "%s*", $1) < 0)
+			YYABORT;
 
 		while ((pmu = perf_pmu__scan(pmu)) != NULL) {
 			char *name = pmu->name;
@@ -241,14 +246,19 @@ PE_NAME opt_event_config
 			if (!strncmp(name, "uncore_", 7) &&
 			    strncmp($1, "uncore_", 7))
 				name += 7;
-			if (!strncmp($1, name, strlen($1))) {
-				if (parse_events_copy_term_list(orig_terms, &terms))
+			if (!fnmatch(pattern, name, 0)) {
+				if (parse_events_copy_term_list(orig_terms, &terms)) {
+					free(pattern);
 					YYABORT;
+				}
 				if (!parse_events_add_pmu(_parse_state, list, pmu->name, terms))
 					ok++;
 				parse_events_terms__delete(terms);
 			}
 		}
+
+		free(pattern);
+
 		if (!ok)
 			YYABORT;
 	}
-- 
cgit v1.2.3


From 8c5421c016a4ef7fd0141fe3a1ad221feba12f92 Mon Sep 17 00:00:00 2001
From: Agustin Vega-Frias <agustinv@codeaurora.org>
Date: Tue, 6 Mar 2018 09:04:43 -0500
Subject: perf pmu: Display pmu name when printing unmerged events in stat

To simplify creation of events accross multiple instances of the same
type of PMU stat supports two methods for creating multiple events from
a single event specification:

1. A prefix or glob can be used in the PMU name.
2. Aliases, which are listed immediately after the Kernel PMU events
   by perf list, are used.

When the --no-merge option is passed and these events are displayed
individually the PMU name is lost and it's not possible to see which
count corresponds to which pmu:

    $ perf stat -a -e l3cache/read-miss/ --no-merge ls > /dev/null

     Performance counter stats for 'system wide':

                    67      l3cache/read-miss/
                    67      l3cache/read-miss/
                    63      l3cache/read-miss/
                    60      l3cache/read-miss/

           0.001675706 seconds time elapsed

    $ perf stat -a -e l3cache_read_miss --no-merge ls > /dev/null

     Performance counter stats for 'system wide':

                    12      l3cache_read_miss
                    17      l3cache_read_miss
                    10      l3cache_read_miss
                     8      l3cache_read_miss

           0.001661305 seconds time elapsed

This change adds the original pmu name to the event. For dynamic pmu
events the pmu name is restored in the event name:

    $ perf stat -a -e l3cache/read-miss/ --no-merge ls > /dev/null

     Performance counter stats for 'system wide':

                    63      l3cache_0_3/read-miss/
                    74      l3cache_0_1/read-miss/
                    64      l3cache_0_2/read-miss/
                    74      l3cache_0_0/read-miss/

           0.001675706 seconds time elapsed

For alias events the name is added after the event name:

    $ perf stat -a -e l3cache_read_miss --no-merge ls > /dev/null

     Performance counter stats for 'system wide':

                    10      l3cache_read_miss [l3cache_0_3]
                    12      l3cache_read_miss [l3cache_0_1]
                    10      l3cache_read_miss [l3cache_0_2]
                    17      l3cache_read_miss [l3cache_0_0]

           0.001661305 seconds time elapsed

Signed-off-by: Agustin Vega-Frias <agustinv@codeaurora.org>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Timur Tabi <timur@codeaurora.org>
Cc: linux-arm-kernel@lists.infradead.org
Change-Id: I8056b9eda74bda33e95065056167ad96e97cb1fb
Link: http://lkml.kernel.org/r/1520345084-42646-3-git-send-email-agustinv@codeaurora.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c      | 29 ++++++++++++++++++++++++++++-
 tools/perf/util/evsel.c        |  1 +
 tools/perf/util/evsel.h        |  1 +
 tools/perf/util/parse-events.c |  8 +++++++-
 4 files changed, 37 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 3a022b3e5c02..0fa9ea3a6d92 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1251,6 +1251,31 @@ static void aggr_update_shadow(void)
 	}
 }
 
+static void uniquify_event_name(struct perf_evsel *counter)
+{
+	char *new_name;
+	char *config;
+
+	if (!counter->pmu_name || !strncmp(counter->name, counter->pmu_name,
+					   strlen(counter->pmu_name)))
+		return;
+
+	config = strchr(counter->name, '/');
+	if (config) {
+		if (asprintf(&new_name,
+			     "%s%s", counter->pmu_name, config) > 0) {
+			free(counter->name);
+			counter->name = new_name;
+		}
+	} else {
+		if (asprintf(&new_name,
+			     "%s [%s]", counter->name, counter->pmu_name) > 0) {
+			free(counter->name);
+			counter->name = new_name;
+		}
+	}
+}
+
 static void collect_all_aliases(struct perf_evsel *counter,
 			    void (*cb)(struct perf_evsel *counter, void *data,
 				       bool first),
@@ -1279,7 +1304,9 @@ static bool collect_data(struct perf_evsel *counter,
 	if (counter->merged_stat)
 		return false;
 	cb(counter, data, true);
-	if (!no_merge && counter->auto_merge_stats)
+	if (no_merge)
+		uniquify_event_name(counter);
+	else if (counter->auto_merge_stats)
 		collect_all_aliases(counter, cb, data);
 	return true;
 }
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index f1f883bb41a8..e937894654b2 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -244,6 +244,7 @@ void perf_evsel__init(struct perf_evsel *evsel,
 	evsel->metric_name   = NULL;
 	evsel->metric_events = NULL;
 	evsel->collect_stat  = false;
+	evsel->pmu_name      = NULL;
 }
 
 struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx)
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 92ba001b627f..55ae1cda7396 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -142,6 +142,7 @@ struct perf_evsel {
 	struct perf_evsel	**metric_events;
 	bool			collect_stat;
 	bool			weak_group;
+	const char		*pmu_name;
 };
 
 union u64_swap {
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 34589c427e52..bafc91edcb44 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1247,7 +1247,12 @@ static int __parse_events_add_pmu(struct parse_events_state *parse_state,
 	if (!head_config) {
 		attr.type = pmu->type;
 		evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu, NULL, auto_merge_stats);
-		return evsel ? 0 : -ENOMEM;
+		if (evsel) {
+			evsel->pmu_name = name;
+			return 0;
+		} else {
+			return -ENOMEM;
+		}
 	}
 
 	if (perf_pmu__check_alias(pmu, head_config, &info))
@@ -1276,6 +1281,7 @@ static int __parse_events_add_pmu(struct parse_events_state *parse_state,
 		evsel->snapshot = info.snapshot;
 		evsel->metric_expr = info.metric_expr;
 		evsel->metric_name = info.metric_name;
+		evsel->pmu_name = name;
 	}
 
 	return evsel ? 0 : -ENOMEM;
-- 
cgit v1.2.3


From c199c11dce197b12ff884ac0cfcb527b1164788b Mon Sep 17 00:00:00 2001
From: Agustin Vega-Frias <agustinv@codeaurora.org>
Date: Tue, 6 Mar 2018 09:04:44 -0500
Subject: perf pmu: Auto-merge PMU events created by prefix or glob match

Auto-merge for these events was disabled when auto-merging of non-alias
events was disabled in commit 63ce844 (perf stat: Only auto-merge events
that are PMU aliases).

Non-merging of legacy events is preserved:

    $ perf stat -ag -e cache-misses,cache-misses sleep 1

     Performance counter stats for 'system wide':

                86,323      cache-misses
                86,323      cache-misses

           1.002623307 seconds time elapsed

But prefix or glob matching auto-merges the events created:

    $ perf stat -a -e l3cache/read-miss/ sleep 1

     Performance counter stats for 'system wide':

                   328      l3cache/read-miss/

           1.002627008 seconds time elapsed

    $ perf stat -a -e l3cache_0_[01]/read-miss/ sleep 1

     Performance counter stats for 'system wide':

                   172      l3cache/read-miss/

           1.002627008 seconds time elapsed

As with events created with aliases, auto-merging can be suppressed with
the --no-merge option:

    $ perf stat -a -e l3cache/read-miss/ --no-merge sleep 1

     Performance counter stats for 'system wide':

                    67      l3cache/read-miss/
                    67      l3cache/read-miss/
                    63      l3cache/read-miss/
                    60      l3cache/read-miss/

           1.002622192 seconds time elapsed

Signed-off-by: Agustin Vega-Frias <agustinv@codeaurora.org>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Timur Tabi <timur@codeaurora.org>
Cc: linux-arm-kernel@lists.infradead.org
Change-Id: I0a47eed54c05e1982ca964d743b37f50f60c508c
Link: http://lkml.kernel.org/r/1520345084-42646-4-git-send-email-agustinv@codeaurora.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-stat.txt | 14 +++++++++-----
 tools/perf/util/parse-events.c         | 13 +++----------
 tools/perf/util/parse-events.h         |  2 +-
 tools/perf/util/parse-events.y         |  4 ++--
 4 files changed, 15 insertions(+), 18 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 628026dbedc5..f15b306be183 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -267,11 +267,15 @@ taskset.
 --no-merge::
 Do not merge results from same PMUs.
 
-When multiple events are created from a single event alias, stat will,
-by default, aggregate the event counts and show the result in a single
-row. This option disables that behavior and shows the individual events
-and counts. Aliases are listed immediately after the Kernel PMU events
-by perf list.
+When multiple events are created from a single event specification,
+stat will, by default, aggregate the event counts and show the result
+in a single row. This option disables that behavior and shows
+the individual events and counts.
+
+Multiple events are created from a single event specification when:
+1. Prefix or glob matching is used for the PMU name.
+2. Aliases, which are listed immediately after the Kernel PMU events
+   by perf list, are used.
 
 --smi-cost::
 Measure SMI cost if msr/aperf/ and msr/smi/ events are supported.
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index bafc91edcb44..4e80ca320399 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1217,7 +1217,7 @@ int parse_events_add_numeric(struct parse_events_state *parse_state,
 			 get_config_name(head_config), &config_terms);
 }
 
-static int __parse_events_add_pmu(struct parse_events_state *parse_state,
+int parse_events_add_pmu(struct parse_events_state *parse_state,
 			 struct list_head *list, char *name,
 			 struct list_head *head_config, bool auto_merge_stats)
 {
@@ -1287,13 +1287,6 @@ static int __parse_events_add_pmu(struct parse_events_state *parse_state,
 	return evsel ? 0 : -ENOMEM;
 }
 
-int parse_events_add_pmu(struct parse_events_state *parse_state,
-			 struct list_head *list, char *name,
-			 struct list_head *head_config)
-{
-	return __parse_events_add_pmu(parse_state, list, name, head_config, false);
-}
-
 int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
 			       char *str, struct list_head **listp)
 {
@@ -1323,8 +1316,8 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
 					return -1;
 				list_add_tail(&term->list, head);
 
-				if (!__parse_events_add_pmu(parse_state, list,
-							    pmu->name, head, true)) {
+				if (!parse_events_add_pmu(parse_state, list,
+							  pmu->name, head, true)) {
 					pr_debug("%s -> %s/%s/\n", str,
 						 pmu->name, alias->str);
 					ok++;
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 88108cd11b4c..5015cfd58277 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -167,7 +167,7 @@ int parse_events_add_breakpoint(struct list_head *list, int *idx,
 				void *ptr, char *type, u64 len);
 int parse_events_add_pmu(struct parse_events_state *parse_state,
 			 struct list_head *list, char *name,
-			 struct list_head *head_config);
+			 struct list_head *head_config, bool auto_merge_stats);
 
 int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
 			       char *str,
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index dedf184b5bed..7afeb80cc39e 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -232,7 +232,7 @@ PE_NAME opt_event_config
 		YYABORT;
 
 	ALLOC_LIST(list);
-	if (parse_events_add_pmu(_parse_state, list, $1, $2)) {
+	if (parse_events_add_pmu(_parse_state, list, $1, $2, false)) {
 		struct perf_pmu *pmu = NULL;
 		int ok = 0;
 		char *pattern;
@@ -251,7 +251,7 @@ PE_NAME opt_event_config
 					free(pattern);
 					YYABORT;
 				}
-				if (!parse_events_add_pmu(_parse_state, list, pmu->name, terms))
+				if (!parse_events_add_pmu(_parse_state, list, pmu->name, terms, true))
 					ok++;
 				parse_events_terms__delete(terms);
 			}
-- 
cgit v1.2.3


From 2c5f6d876b4edda8740f5a2826cf9b1a67fa76fb Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Tue, 6 Mar 2018 10:36:00 -0500
Subject: perf evlist: Store 'overwrite' in struct perf_mmap

It has been determined that the map is for overwrite mode
(evlist->overwrite_mmap) or non-overwrite mode (evlist->mmap) when
calling perf_evlist__alloc_mmap().

Store the information in struct perf_mmap, which will be used later to
simplify the perf_mmap__read*() interfaces.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Suggested-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1520350567-80082-1-git-send-email-kan.liang@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/evlist.c | 8 +++++---
 tools/perf/util/mmap.h   | 1 +
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 41a4666f1519..a59281d64368 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -722,7 +722,8 @@ void perf_evlist__munmap(struct perf_evlist *evlist)
 	zfree(&evlist->overwrite_mmap);
 }
 
-static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist)
+static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist,
+						 bool overwrite)
 {
 	int i;
 	struct perf_mmap *map;
@@ -736,6 +737,7 @@ static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist)
 
 	for (i = 0; i < evlist->nr_mmaps; i++) {
 		map[i].fd = -1;
+		map[i].overwrite = overwrite;
 		/*
 		 * When the perf_mmap() call is made we grab one refcount, plus
 		 * one extra to let perf_mmap__consume() get the last
@@ -779,7 +781,7 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
 			maps = evlist->overwrite_mmap;
 
 			if (!maps) {
-				maps = perf_evlist__alloc_mmap(evlist);
+				maps = perf_evlist__alloc_mmap(evlist, true);
 				if (!maps)
 					return -1;
 				evlist->overwrite_mmap = maps;
@@ -1029,7 +1031,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
 	struct mmap_params mp;
 
 	if (!evlist->mmap)
-		evlist->mmap = perf_evlist__alloc_mmap(evlist);
+		evlist->mmap = perf_evlist__alloc_mmap(evlist, false);
 	if (!evlist->mmap)
 		return -ENOMEM;
 
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index ec7d3a24e276..71137977af28 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -20,6 +20,7 @@ struct perf_mmap {
 	int		 fd;
 	refcount_t	 refcnt;
 	u64		 prev;
+	bool		 overwrite;
 	struct auxtrace_mmap auxtrace_mmap;
 	char		 event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8);
 };
-- 
cgit v1.2.3


From 4fda3459e3c2e5ca35d304646aeeb811242537b2 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Tue, 6 Mar 2018 10:36:01 -0500
Subject: perf mmap: Store mmap scope in struct perf_mmap()

There is too much boilerplate in the perf_mmap__read*() interfaces.

The 'start' and 'end' variables should be stored in struct perf_mmap at
initialization. They will be used later.

The old 'startp' and 'endp' pointers are used by perf_mmap__read_event()
now.  They cannot be removed. So the old 'startp/endp' and new
'md->start/md->end' will exist simultaneously now.  The old one will be
removed later.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Suggested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1520350567-80082-2-git-send-email-kan.liang@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/mmap.c | 12 ++++++++----
 tools/perf/util/mmap.h |  2 ++
 2 files changed, 10 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 4f27c464ce0b..09acaf7392bb 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -250,13 +250,15 @@ int perf_mmap__read_init(struct perf_mmap *md, bool overwrite,
 
 	*startp = overwrite ? head : old;
 	*endp = overwrite ? old : head;
+	md->start = md->overwrite ? head : old;
+	md->end = md->overwrite ? old : head;
 
-	if (*startp == *endp)
+	if (md->start == md->end)
 		return -EAGAIN;
 
-	size = *endp - *startp;
+	size = md->end - md->start;
 	if (size > (unsigned long)(md->mask) + 1) {
-		if (!overwrite) {
+		if (!md->overwrite) {
 			WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
 
 			md->prev = head;
@@ -268,8 +270,10 @@ int perf_mmap__read_init(struct perf_mmap *md, bool overwrite,
 		 * Backward ring buffer is full. We still have a chance to read
 		 * most of data from it.
 		 */
-		if (overwrite_rb_find_range(data, md->mask, head, startp, endp))
+		if (overwrite_rb_find_range(data, md->mask, head, &md->start, &md->end))
 			return -EINVAL;
+		*startp = md->start;
+		*endp = md->end;
 	}
 
 	return 0;
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index 71137977af28..9359e934ab14 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -20,6 +20,8 @@ struct perf_mmap {
 	int		 fd;
 	refcount_t	 refcnt;
 	u64		 prev;
+	u64		 start;
+	u64		 end;
 	bool		 overwrite;
 	struct auxtrace_mmap auxtrace_mmap;
 	char		 event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8);
-- 
cgit v1.2.3


From 07a9461da67292ffdf3f4a02522caf475b1151d7 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Tue, 6 Mar 2018 10:36:02 -0500
Subject: perf mmap: Use the stored scope data in perf_mmap__push()

Using the 'start' and 'end' which are stored in struct perf_mmap to
replace the temporary 'start' and 'end'.
The temporary variables will be discarded later.

It doesn't need to pass 'overwrite' to perf_mmap__push(). It's stored in
struct perf_mmap.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1520350567-80082-3-git-send-email-kan.liang@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-record.c |  2 +-
 tools/perf/util/mmap.c      | 24 ++++++++++++------------
 tools/perf/util/mmap.h      |  4 ++--
 3 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 14d82f0fe5cc..753ffcecf254 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -533,7 +533,7 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
 		struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap;
 
 		if (maps[i].base) {
-			if (perf_mmap__push(&maps[i], overwrite, rec, record__pushfn) != 0) {
+			if (perf_mmap__push(&maps[i], rec, record__pushfn) != 0) {
 				rc = -1;
 				goto out;
 			}
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 09acaf7392bb..8c1d033638c2 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -279,8 +279,8 @@ int perf_mmap__read_init(struct perf_mmap *md, bool overwrite,
 	return 0;
 }
 
-int perf_mmap__push(struct perf_mmap *md, bool overwrite,
-		    void *to, int push(void *to, void *buf, size_t size))
+int perf_mmap__push(struct perf_mmap *md, void *to,
+		    int push(void *to, void *buf, size_t size))
 {
 	u64 head = perf_mmap__read_head(md);
 	u64 end, start;
@@ -289,16 +289,16 @@ int perf_mmap__push(struct perf_mmap *md, bool overwrite,
 	void *buf;
 	int rc = 0;
 
-	rc = perf_mmap__read_init(md, overwrite, &start, &end);
+	rc = perf_mmap__read_init(md, md->overwrite, &start, &end);
 	if (rc < 0)
 		return (rc == -EAGAIN) ? 0 : -1;
 
-	size = end - start;
+	size = md->end - md->start;
 
-	if ((start & md->mask) + size != (end & md->mask)) {
-		buf = &data[start & md->mask];
-		size = md->mask + 1 - (start & md->mask);
-		start += size;
+	if ((md->start & md->mask) + size != (md->end & md->mask)) {
+		buf = &data[md->start & md->mask];
+		size = md->mask + 1 - (md->start & md->mask);
+		md->start += size;
 
 		if (push(to, buf, size) < 0) {
 			rc = -1;
@@ -306,9 +306,9 @@ int perf_mmap__push(struct perf_mmap *md, bool overwrite,
 		}
 	}
 
-	buf = &data[start & md->mask];
-	size = end - start;
-	start += size;
+	buf = &data[md->start & md->mask];
+	size = md->end - md->start;
+	md->start += size;
 
 	if (push(to, buf, size) < 0) {
 		rc = -1;
@@ -316,7 +316,7 @@ int perf_mmap__push(struct perf_mmap *md, bool overwrite,
 	}
 
 	md->prev = head;
-	perf_mmap__consume(md, overwrite);
+	perf_mmap__consume(md, md->overwrite);
 out:
 	return rc;
 }
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index 9359e934ab14..65f5b26d8668 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -93,8 +93,8 @@ union perf_event *perf_mmap__read_event(struct perf_mmap *map,
 					bool overwrite,
 					u64 *startp, u64 end);
 
-int perf_mmap__push(struct perf_mmap *md, bool backward,
-		    void *to, int push(void *to, void *buf, size_t size));
+int perf_mmap__push(struct perf_mmap *md, void *to,
+		    int push(void *to, void *buf, size_t size));
 
 size_t perf_mmap__mmap_len(struct perf_mmap *map);
 
-- 
cgit v1.2.3


From b9de0f6e507b2f0ee305aba338b79941deb5a461 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Tue, 6 Mar 2018 10:36:03 -0500
Subject: perf mmap: Use the stored data in perf_mmap__read_event()

Using the 'start', 'end' and 'overwrite' which are stored in
struct perf_mmap to replace the parameters of perf_mmap__read_event().
The parameters will be discarded later.

No functional change.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Suggested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1520350567-80082-4-git-send-email-kan.liang@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/mmap.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 8c1d033638c2..2933942abfe2 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -76,8 +76,9 @@ static union perf_event *perf_mmap__read(struct perf_mmap *map,
  * perf_mmap__read_done()
  */
 union perf_event *perf_mmap__read_event(struct perf_mmap *map,
-					bool overwrite,
-					u64 *startp, u64 end)
+					bool overwrite __maybe_unused,
+					u64 *startp __maybe_unused,
+					u64 end __maybe_unused)
 {
 	union perf_event *event;
 
@@ -87,17 +88,14 @@ union perf_event *perf_mmap__read_event(struct perf_mmap *map,
 	if (!refcount_read(&map->refcnt))
 		return NULL;
 
-	if (startp == NULL)
-		return NULL;
-
 	/* non-overwirte doesn't pause the ringbuffer */
-	if (!overwrite)
-		end = perf_mmap__read_head(map);
+	if (!map->overwrite)
+		map->end = perf_mmap__read_head(map);
 
-	event = perf_mmap__read(map, startp, end);
+	event = perf_mmap__read(map, &map->start, map->end);
 
-	if (!overwrite)
-		map->prev = *startp;
+	if (!map->overwrite)
+		map->prev = map->start;
 
 	return event;
 }
-- 
cgit v1.2.3


From bdec8b2f7eed4f16b764ca258487d26f8079f5ff Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Tue, 6 Mar 2018 10:36:04 -0500
Subject: perf mmap: Use stored 'overwrite' in perf_mmap__consume()

The 'overwrite' is set at allocation. It will not be changed.  Using it
to replace the parameter of perf_mmap__consume().  The parameters will
be discarded later.

No functional change.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Suggested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1520350567-80082-5-git-send-email-kan.liang@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/mmap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 2933942abfe2..8a2dac90056c 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -118,9 +118,9 @@ void perf_mmap__put(struct perf_mmap *map)
 		perf_mmap__munmap(map);
 }
 
-void perf_mmap__consume(struct perf_mmap *map, bool overwrite)
+void perf_mmap__consume(struct perf_mmap *map, bool overwrite __maybe_unused)
 {
-	if (!overwrite) {
+	if (!map->overwrite) {
 		u64 old = map->prev;
 
 		perf_mmap__write_tail(map, old);
-- 
cgit v1.2.3


From d6ace3df43049a46aa1f5596c0c8d75433321437 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Tue, 6 Mar 2018 10:36:05 -0500
Subject: perf mmap: Simplify perf_mmap__consume()

It isn't necessary to pass the 'overwrite' argument to
perf_mmap__consume().  Discard it.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Suggested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1520350567-80082-6-git-send-email-kan.liang@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/x86/tests/perf-time-to-tsc.c | 2 +-
 tools/perf/builtin-kvm.c                     | 4 ++--
 tools/perf/builtin-top.c                     | 2 +-
 tools/perf/builtin-trace.c                   | 2 +-
 tools/perf/tests/code-reading.c              | 2 +-
 tools/perf/tests/keep-tracking.c             | 2 +-
 tools/perf/tests/mmap-basic.c                | 2 +-
 tools/perf/tests/openat-syscall-tp-fields.c  | 2 +-
 tools/perf/tests/perf-record.c               | 2 +-
 tools/perf/tests/sw-clock.c                  | 2 +-
 tools/perf/tests/switch-tracking.c           | 2 +-
 tools/perf/tests/task-exit.c                 | 2 +-
 tools/perf/util/mmap.c                       | 6 +++---
 tools/perf/util/mmap.h                       | 2 +-
 tools/perf/util/python.c                     | 2 +-
 15 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c
index 7f82d91ef473..a9bc77df6a65 100644
--- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c
+++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c
@@ -134,7 +134,7 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe
 				comm2_time = sample.time;
 			}
 next_event:
-			perf_mmap__consume(md, false);
+			perf_mmap__consume(md);
 		}
 		perf_mmap__read_done(md);
 	}
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index d2703d3b8366..165c044616b6 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -760,7 +760,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
 	while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
 		err = perf_evlist__parse_sample_timestamp(evlist, event, &timestamp);
 		if (err) {
-			perf_mmap__consume(md, false);
+			perf_mmap__consume(md);
 			pr_err("Failed to parse sample\n");
 			return -1;
 		}
@@ -770,7 +770,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
 		 * FIXME: Here we can't consume the event, as perf_session__queue_event will
 		 *        point to it, and it'll get possibly overwritten by the kernel.
 		 */
-		perf_mmap__consume(md, false);
+		perf_mmap__consume(md);
 
 		if (err) {
 			pr_err("Failed to enqueue sample: %d\n", err);
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index bb4f9fafd11d..11b4a413039f 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -879,7 +879,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
 		} else
 			++session->evlist->stats.nr_unknown_events;
 next_event:
-		perf_mmap__consume(md, opts->overwrite);
+		perf_mmap__consume(md);
 	}
 
 	perf_mmap__read_done(md);
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 5b81060a8117..27eadf3018c6 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2522,7 +2522,7 @@ again:
 
 			trace__handle_event(trace, event, &sample);
 next_event:
-			perf_mmap__consume(md, false);
+			perf_mmap__consume(md);
 
 			if (interrupted)
 				goto out_disable;
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 03ed8c77b1bb..f7c199acb7ac 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -420,7 +420,7 @@ static int process_events(struct machine *machine, struct perf_evlist *evlist,
 
 		while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
 			ret = process_event(machine, evlist, event, state);
-			perf_mmap__consume(md, false);
+			perf_mmap__consume(md);
 			if (ret < 0)
 				return ret;
 		}
diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c
index 4590d8fb91ab..1f1db5965d05 100644
--- a/tools/perf/tests/keep-tracking.c
+++ b/tools/perf/tests/keep-tracking.c
@@ -42,7 +42,7 @@ static int find_comm(struct perf_evlist *evlist, const char *comm)
 			    (pid_t)event->comm.tid == getpid() &&
 			    strcmp(event->comm.comm, comm) == 0)
 				found += 1;
-			perf_mmap__consume(md, false);
+			perf_mmap__consume(md);
 		}
 		perf_mmap__read_done(md);
 	}
diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index 44c58d69cd87..f473e106aeba 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -135,7 +135,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse
 			goto out_delete_evlist;
 		}
 		nr_events[evsel->idx]++;
-		perf_mmap__consume(md, false);
+		perf_mmap__consume(md);
 	}
 	perf_mmap__read_done(md);
 
diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c
index 620b21023f72..7837ae936679 100644
--- a/tools/perf/tests/openat-syscall-tp-fields.c
+++ b/tools/perf/tests/openat-syscall-tp-fields.c
@@ -101,7 +101,7 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest
 				++nr_events;
 
 				if (type != PERF_RECORD_SAMPLE) {
-					perf_mmap__consume(md, false);
+					perf_mmap__consume(md);
 					continue;
 				}
 
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index 31f3f70adca6..6ff5f99b3e0e 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -272,7 +272,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus
 					++errs;
 				}
 
-				perf_mmap__consume(md, false);
+				perf_mmap__consume(md);
 			}
 			perf_mmap__read_done(md);
 		}
diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c
index e6320e267ba5..b58297b9a83d 100644
--- a/tools/perf/tests/sw-clock.c
+++ b/tools/perf/tests/sw-clock.c
@@ -114,7 +114,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
 		total_periods += sample.period;
 		nr_samples++;
 next_event:
-		perf_mmap__consume(md, false);
+		perf_mmap__consume(md);
 	}
 	perf_mmap__read_done(md);
 
diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c
index 10c4dcdc2324..dbf9e20f7d56 100644
--- a/tools/perf/tests/switch-tracking.c
+++ b/tools/perf/tests/switch-tracking.c
@@ -270,7 +270,7 @@ static int process_events(struct perf_evlist *evlist,
 		while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
 			cnt += 1;
 			ret = add_event(evlist, &events, event);
-			 perf_mmap__consume(md, false);
+			 perf_mmap__consume(md);
 			if (ret < 0)
 				goto out_free_nodes;
 		}
diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c
index 02b0888b72a3..5aa2e68e9d05 100644
--- a/tools/perf/tests/task-exit.c
+++ b/tools/perf/tests/task-exit.c
@@ -120,7 +120,7 @@ retry:
 		if (event->header.type == PERF_RECORD_EXIT)
 			nr_exit++;
 
-		perf_mmap__consume(md, false);
+		perf_mmap__consume(md);
 	}
 	perf_mmap__read_done(md);
 
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 8a2dac90056c..62312e06d75b 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -118,7 +118,7 @@ void perf_mmap__put(struct perf_mmap *map)
 		perf_mmap__munmap(map);
 }
 
-void perf_mmap__consume(struct perf_mmap *map, bool overwrite __maybe_unused)
+void perf_mmap__consume(struct perf_mmap *map)
 {
 	if (!map->overwrite) {
 		u64 old = map->prev;
@@ -260,7 +260,7 @@ int perf_mmap__read_init(struct perf_mmap *md, bool overwrite,
 			WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
 
 			md->prev = head;
-			perf_mmap__consume(md, overwrite);
+			perf_mmap__consume(md);
 			return -EAGAIN;
 		}
 
@@ -314,7 +314,7 @@ int perf_mmap__push(struct perf_mmap *md, void *to,
 	}
 
 	md->prev = head;
-	perf_mmap__consume(md, md->overwrite);
+	perf_mmap__consume(md);
 out:
 	return rc;
 }
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index 65f5b26d8668..043916a20de6 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -66,7 +66,7 @@ void perf_mmap__munmap(struct perf_mmap *map);
 void perf_mmap__get(struct perf_mmap *map);
 void perf_mmap__put(struct perf_mmap *map);
 
-void perf_mmap__consume(struct perf_mmap *map, bool overwrite);
+void perf_mmap__consume(struct perf_mmap *map);
 
 static inline u64 perf_mmap__read_head(struct perf_mmap *mm)
 {
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 35fb5ef7d290..ca077f8be1f0 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -1013,7 +1013,7 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
 		err = perf_evsel__parse_sample(evsel, event, &pevent->sample);
 
 		/* Consume the even only after we parsed it out. */
-		perf_mmap__consume(md, false);
+		perf_mmap__consume(md);
 
 		if (err)
 			return PyErr_Format(PyExc_OSError,
-- 
cgit v1.2.3


From 0019dc87b928380e2c85fcd8694e512079d7b580 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Tue, 6 Mar 2018 10:36:06 -0500
Subject: perf mmap: Simplify perf_mmap__read_event()

It isn't necessary to pass the 'overwrite', 'start' and 'end' argument
to perf_mmap__read_event().  Discard them.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Suggested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1520350567-80082-7-git-send-email-kan.liang@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/x86/tests/perf-time-to-tsc.c | 2 +-
 tools/perf/builtin-kvm.c                     | 2 +-
 tools/perf/builtin-top.c                     | 2 +-
 tools/perf/builtin-trace.c                   | 2 +-
 tools/perf/tests/backward-ring-buffer.c      | 2 +-
 tools/perf/tests/bpf.c                       | 2 +-
 tools/perf/tests/code-reading.c              | 2 +-
 tools/perf/tests/keep-tracking.c             | 2 +-
 tools/perf/tests/mmap-basic.c                | 2 +-
 tools/perf/tests/openat-syscall-tp-fields.c  | 2 +-
 tools/perf/tests/perf-record.c               | 2 +-
 tools/perf/tests/sw-clock.c                  | 2 +-
 tools/perf/tests/switch-tracking.c           | 2 +-
 tools/perf/tests/task-exit.c                 | 2 +-
 tools/perf/util/mmap.c                       | 5 +----
 tools/perf/util/mmap.h                       | 4 +---
 tools/perf/util/python.c                     | 2 +-
 17 files changed, 17 insertions(+), 22 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c
index a9bc77df6a65..17cf7fc3c7d5 100644
--- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c
+++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c
@@ -115,7 +115,7 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe
 		if (perf_mmap__read_init(md, false, &start, &end) < 0)
 			continue;
 
-		while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
+		while ((event = perf_mmap__read_event(md)) != NULL) {
 			struct perf_sample sample;
 
 			if (event->header.type != PERF_RECORD_COMM ||
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 165c044616b6..e9f69b8f8e25 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -757,7 +757,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
 	if (err < 0)
 		return (err == -EAGAIN) ? 0 : -1;
 
-	while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
+	while ((event = perf_mmap__read_event(md)) != NULL) {
 		err = perf_evlist__parse_sample_timestamp(evlist, event, &timestamp);
 		if (err) {
 			perf_mmap__consume(md);
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 11b4a413039f..eb19cf92a388 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -824,7 +824,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
 	if (perf_mmap__read_init(md, opts->overwrite, &start, &end) < 0)
 		return;
 
-	while ((event = perf_mmap__read_event(md, opts->overwrite, &start, end)) != NULL) {
+	while ((event = perf_mmap__read_event(md)) != NULL) {
 		ret = perf_evlist__parse_sample(evlist, event, &sample);
 		if (ret) {
 			pr_err("Can't parse sample, err = %d\n", ret);
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 27eadf3018c6..29fda506ac75 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2509,7 +2509,7 @@ again:
 		if (perf_mmap__read_init(md, false, &start, &end) < 0)
 			continue;
 
-		while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
+		while ((event = perf_mmap__read_event(md)) != NULL) {
 			struct perf_sample sample;
 
 			++trace->nr_events;
diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c
index e0b1b414d466..e0eae1053ddc 100644
--- a/tools/perf/tests/backward-ring-buffer.c
+++ b/tools/perf/tests/backward-ring-buffer.c
@@ -36,7 +36,7 @@ static int count_samples(struct perf_evlist *evlist, int *sample_count,
 		u64 start, end;
 
 		perf_mmap__read_init(map, true, &start, &end);
-		while ((event = perf_mmap__read_event(map, true, &start, end)) != NULL) {
+		while ((event = perf_mmap__read_event(map)) != NULL) {
 			const u32 type = event->header.type;
 
 			switch (type) {
diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c
index 09c9c9f9e827..384c20f4c902 100644
--- a/tools/perf/tests/bpf.c
+++ b/tools/perf/tests/bpf.c
@@ -183,7 +183,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void),
 		if (perf_mmap__read_init(md, false, &start, &end) < 0)
 			continue;
 
-		while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
+		while ((event = perf_mmap__read_event(md)) != NULL) {
 			const u32 type = event->header.type;
 
 			if (type == PERF_RECORD_SAMPLE)
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index f7c199acb7ac..f7919666bc2c 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -418,7 +418,7 @@ static int process_events(struct machine *machine, struct perf_evlist *evlist,
 		if (perf_mmap__read_init(md, false, &start, &end) < 0)
 			continue;
 
-		while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
+		while ((event = perf_mmap__read_event(md)) != NULL) {
 			ret = process_event(machine, evlist, event, state);
 			perf_mmap__consume(md);
 			if (ret < 0)
diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c
index 1f1db5965d05..ad477b7cf238 100644
--- a/tools/perf/tests/keep-tracking.c
+++ b/tools/perf/tests/keep-tracking.c
@@ -36,7 +36,7 @@ static int find_comm(struct perf_evlist *evlist, const char *comm)
 		md = &evlist->mmap[i];
 		if (perf_mmap__read_init(md, false, &start, &end) < 0)
 			continue;
-		while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
+		while ((event = perf_mmap__read_event(md)) != NULL) {
 			if (event->header.type == PERF_RECORD_COMM &&
 			    (pid_t)event->comm.pid == getpid() &&
 			    (pid_t)event->comm.tid == getpid() &&
diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index f473e106aeba..7790eb3303e6 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -112,7 +112,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse
 	if (perf_mmap__read_init(md, false, &start, &end) < 0)
 		goto out_init;
 
-	while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
+	while ((event = perf_mmap__read_event(md)) != NULL) {
 		struct perf_sample sample;
 
 		if (event->header.type != PERF_RECORD_SAMPLE) {
diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c
index 7837ae936679..b0be2da65da7 100644
--- a/tools/perf/tests/openat-syscall-tp-fields.c
+++ b/tools/perf/tests/openat-syscall-tp-fields.c
@@ -93,7 +93,7 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest
 			if (perf_mmap__read_init(md, false, &start, &end) < 0)
 				continue;
 
-			while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
+			while ((event = perf_mmap__read_event(md)) != NULL) {
 				const u32 type = event->header.type;
 				int tp_flags;
 				struct perf_sample sample;
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index 6ff5f99b3e0e..59be0942b787 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -171,7 +171,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus
 			if (perf_mmap__read_init(md, false, &start, &end) < 0)
 				continue;
 
-			while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
+			while ((event = perf_mmap__read_event(md)) != NULL) {
 				const u32 type = event->header.type;
 				const char *name = perf_event__name(type);
 
diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c
index b58297b9a83d..403f2d9cee96 100644
--- a/tools/perf/tests/sw-clock.c
+++ b/tools/perf/tests/sw-clock.c
@@ -99,7 +99,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
 	if (perf_mmap__read_init(md, false, &start, &end) < 0)
 		goto out_init;
 
-	while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
+	while ((event = perf_mmap__read_event(md)) != NULL) {
 		struct perf_sample sample;
 
 		if (event->header.type != PERF_RECORD_SAMPLE)
diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c
index dbf9e20f7d56..99839de3caee 100644
--- a/tools/perf/tests/switch-tracking.c
+++ b/tools/perf/tests/switch-tracking.c
@@ -267,7 +267,7 @@ static int process_events(struct perf_evlist *evlist,
 		if (perf_mmap__read_init(md, false, &start, &end) < 0)
 			continue;
 
-		while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
+		while ((event = perf_mmap__read_event(md)) != NULL) {
 			cnt += 1;
 			ret = add_event(evlist, &events, event);
 			 perf_mmap__consume(md);
diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c
index 5aa2e68e9d05..2df0c0573e2c 100644
--- a/tools/perf/tests/task-exit.c
+++ b/tools/perf/tests/task-exit.c
@@ -116,7 +116,7 @@ retry:
 	if (perf_mmap__read_init(md, false, &start, &end) < 0)
 		goto out_init;
 
-	while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
+	while ((event = perf_mmap__read_event(md)) != NULL) {
 		if (event->header.type == PERF_RECORD_EXIT)
 			nr_exit++;
 
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 62312e06d75b..2fbe4c8a02d2 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -75,10 +75,7 @@ static union perf_event *perf_mmap__read(struct perf_mmap *map,
  * }
  * perf_mmap__read_done()
  */
-union perf_event *perf_mmap__read_event(struct perf_mmap *map,
-					bool overwrite __maybe_unused,
-					u64 *startp __maybe_unused,
-					u64 end __maybe_unused)
+union perf_event *perf_mmap__read_event(struct perf_mmap *map)
 {
 	union perf_event *event;
 
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index 043916a20de6..ae9499b80110 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -89,9 +89,7 @@ static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail)
 
 union perf_event *perf_mmap__read_forward(struct perf_mmap *map);
 
-union perf_event *perf_mmap__read_event(struct perf_mmap *map,
-					bool overwrite,
-					u64 *startp, u64 end);
+union perf_event *perf_mmap__read_event(struct perf_mmap *map);
 
 int perf_mmap__push(struct perf_mmap *md, void *to,
 		    int push(void *to, void *buf, size_t size));
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index ca077f8be1f0..4798db93e7ee 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -995,7 +995,7 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
 	if (perf_mmap__read_init(md, false, &start, &end) < 0)
 		goto end;
 
-	event = perf_mmap__read_event(md, false, &start, end);
+	event = perf_mmap__read_event(md);
 	if (event != NULL) {
 		PyObject *pyevent = pyrf_event__new(event);
 		struct pyrf_event *pevent = (struct pyrf_event *)pyevent;
-- 
cgit v1.2.3


From b9bae2c841b73eac6bfed510bc24a3051754ff51 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Tue, 6 Mar 2018 10:36:07 -0500
Subject: perf mmap: Simplify perf_mmap__read_init()

It isn't necessary to pass the 'start', 'end' and 'overwrite' arguments
to perf_mmap__read_init().  The data is stored in the struct perf_mmap.

Discard the parameters.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Suggested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1520350567-80082-8-git-send-email-kan.liang@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/x86/tests/perf-time-to-tsc.c |  3 +--
 tools/perf/builtin-kvm.c                     |  3 +--
 tools/perf/builtin-top.c                     |  3 +--
 tools/perf/builtin-trace.c                   |  3 +--
 tools/perf/tests/backward-ring-buffer.c      |  3 +--
 tools/perf/tests/bpf.c                       |  3 +--
 tools/perf/tests/code-reading.c              |  3 +--
 tools/perf/tests/keep-tracking.c             |  3 +--
 tools/perf/tests/mmap-basic.c                |  3 +--
 tools/perf/tests/openat-syscall-tp-fields.c  |  3 +--
 tools/perf/tests/perf-record.c               |  3 +--
 tools/perf/tests/sw-clock.c                  |  3 +--
 tools/perf/tests/switch-tracking.c           |  3 +--
 tools/perf/tests/task-exit.c                 |  3 +--
 tools/perf/util/mmap.c                       | 10 ++--------
 tools/perf/util/mmap.h                       |  3 +--
 tools/perf/util/python.c                     |  3 +--
 17 files changed, 18 insertions(+), 40 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c
index 17cf7fc3c7d5..7a7721604b86 100644
--- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c
+++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c
@@ -61,7 +61,6 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe
 	u64 test_tsc, comm1_tsc, comm2_tsc;
 	u64 test_time, comm1_time = 0, comm2_time = 0;
 	struct perf_mmap *md;
-	u64 end, start;
 
 	threads = thread_map__new(-1, getpid(), UINT_MAX);
 	CHECK_NOT_NULL__(threads);
@@ -112,7 +111,7 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe
 
 	for (i = 0; i < evlist->nr_mmaps; i++) {
 		md = &evlist->mmap[i];
-		if (perf_mmap__read_init(md, false, &start, &end) < 0)
+		if (perf_mmap__read_init(md) < 0)
 			continue;
 
 		while ((event = perf_mmap__read_event(md)) != NULL) {
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index e9f69b8f8e25..72e2ca096bf5 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -746,14 +746,13 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
 	struct perf_evlist *evlist = kvm->evlist;
 	union perf_event *event;
 	struct perf_mmap *md;
-	u64 end, start;
 	u64 timestamp;
 	s64 n = 0;
 	int err;
 
 	*mmap_time = ULLONG_MAX;
 	md = &evlist->mmap[idx];
-	err = perf_mmap__read_init(md, false, &start, &end);
+	err = perf_mmap__read_init(md);
 	if (err < 0)
 		return (err == -EAGAIN) ? 0 : -1;
 
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index eb19cf92a388..0a26b56afcc5 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -817,11 +817,10 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
 	struct perf_session *session = top->session;
 	union perf_event *event;
 	struct machine *machine;
-	u64 end, start;
 	int ret;
 
 	md = opts->overwrite ? &evlist->overwrite_mmap[idx] : &evlist->mmap[idx];
-	if (perf_mmap__read_init(md, opts->overwrite, &start, &end) < 0)
+	if (perf_mmap__read_init(md) < 0)
 		return;
 
 	while ((event = perf_mmap__read_event(md)) != NULL) {
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 29fda506ac75..87b95c9410b4 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2503,10 +2503,9 @@ again:
 	for (i = 0; i < evlist->nr_mmaps; i++) {
 		union perf_event *event;
 		struct perf_mmap *md;
-		u64 end, start;
 
 		md = &evlist->mmap[i];
-		if (perf_mmap__read_init(md, false, &start, &end) < 0)
+		if (perf_mmap__read_init(md) < 0)
 			continue;
 
 		while ((event = perf_mmap__read_event(md)) != NULL) {
diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c
index e0eae1053ddc..6d598cc071ae 100644
--- a/tools/perf/tests/backward-ring-buffer.c
+++ b/tools/perf/tests/backward-ring-buffer.c
@@ -33,9 +33,8 @@ static int count_samples(struct perf_evlist *evlist, int *sample_count,
 	for (i = 0; i < evlist->nr_mmaps; i++) {
 		struct perf_mmap *map = &evlist->overwrite_mmap[i];
 		union perf_event *event;
-		u64 start, end;
 
-		perf_mmap__read_init(map, true, &start, &end);
+		perf_mmap__read_init(map);
 		while ((event = perf_mmap__read_event(map)) != NULL) {
 			const u32 type = event->header.type;
 
diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c
index 384c20f4c902..79b54f8ddebf 100644
--- a/tools/perf/tests/bpf.c
+++ b/tools/perf/tests/bpf.c
@@ -177,10 +177,9 @@ static int do_test(struct bpf_object *obj, int (*func)(void),
 	for (i = 0; i < evlist->nr_mmaps; i++) {
 		union perf_event *event;
 		struct perf_mmap *md;
-		u64 end, start;
 
 		md = &evlist->mmap[i];
-		if (perf_mmap__read_init(md, false, &start, &end) < 0)
+		if (perf_mmap__read_init(md) < 0)
 			continue;
 
 		while ((event = perf_mmap__read_event(md)) != NULL) {
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index f7919666bc2c..99936352df4f 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -410,12 +410,11 @@ static int process_events(struct machine *machine, struct perf_evlist *evlist,
 {
 	union perf_event *event;
 	struct perf_mmap *md;
-	u64 end, start;
 	int i, ret;
 
 	for (i = 0; i < evlist->nr_mmaps; i++) {
 		md = &evlist->mmap[i];
-		if (perf_mmap__read_init(md, false, &start, &end) < 0)
+		if (perf_mmap__read_init(md) < 0)
 			continue;
 
 		while ((event = perf_mmap__read_event(md)) != NULL) {
diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c
index ad477b7cf238..17c46f3e6f1e 100644
--- a/tools/perf/tests/keep-tracking.c
+++ b/tools/perf/tests/keep-tracking.c
@@ -28,13 +28,12 @@ static int find_comm(struct perf_evlist *evlist, const char *comm)
 {
 	union perf_event *event;
 	struct perf_mmap *md;
-	u64 end, start;
 	int i, found;
 
 	found = 0;
 	for (i = 0; i < evlist->nr_mmaps; i++) {
 		md = &evlist->mmap[i];
-		if (perf_mmap__read_init(md, false, &start, &end) < 0)
+		if (perf_mmap__read_init(md) < 0)
 			continue;
 		while ((event = perf_mmap__read_event(md)) != NULL) {
 			if (event->header.type == PERF_RECORD_COMM &&
diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index 7790eb3303e6..bb8e6bcb0d96 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -39,7 +39,6 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse
 	struct perf_evsel *evsels[nsyscalls], *evsel;
 	char sbuf[STRERR_BUFSIZE];
 	struct perf_mmap *md;
-	u64 end, start;
 
 	threads = thread_map__new(-1, getpid(), UINT_MAX);
 	if (threads == NULL) {
@@ -109,7 +108,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse
 		}
 
 	md = &evlist->mmap[0];
-	if (perf_mmap__read_init(md, false, &start, &end) < 0)
+	if (perf_mmap__read_init(md) < 0)
 		goto out_init;
 
 	while ((event = perf_mmap__read_event(md)) != NULL) {
diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c
index b0be2da65da7..344dc3ac2469 100644
--- a/tools/perf/tests/openat-syscall-tp-fields.c
+++ b/tools/perf/tests/openat-syscall-tp-fields.c
@@ -87,10 +87,9 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest
 		for (i = 0; i < evlist->nr_mmaps; i++) {
 			union perf_event *event;
 			struct perf_mmap *md;
-			u64 end, start;
 
 			md = &evlist->mmap[i];
-			if (perf_mmap__read_init(md, false, &start, &end) < 0)
+			if (perf_mmap__read_init(md) < 0)
 				continue;
 
 			while ((event = perf_mmap__read_event(md)) != NULL) {
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index 59be0942b787..34394cc05077 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -165,10 +165,9 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus
 		for (i = 0; i < evlist->nr_mmaps; i++) {
 			union perf_event *event;
 			struct perf_mmap *md;
-			u64 end, start;
 
 			md = &evlist->mmap[i];
-			if (perf_mmap__read_init(md, false, &start, &end) < 0)
+			if (perf_mmap__read_init(md) < 0)
 				continue;
 
 			while ((event = perf_mmap__read_event(md)) != NULL) {
diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c
index 403f2d9cee96..f9490b237893 100644
--- a/tools/perf/tests/sw-clock.c
+++ b/tools/perf/tests/sw-clock.c
@@ -40,7 +40,6 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
 	struct cpu_map *cpus;
 	struct thread_map *threads;
 	struct perf_mmap *md;
-	u64 end, start;
 
 	attr.sample_freq = 500;
 
@@ -96,7 +95,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
 	perf_evlist__disable(evlist);
 
 	md = &evlist->mmap[0];
-	if (perf_mmap__read_init(md, false, &start, &end) < 0)
+	if (perf_mmap__read_init(md) < 0)
 		goto out_init;
 
 	while ((event = perf_mmap__read_event(md)) != NULL) {
diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c
index 99839de3caee..9b5be51e5e7b 100644
--- a/tools/perf/tests/switch-tracking.c
+++ b/tools/perf/tests/switch-tracking.c
@@ -259,12 +259,11 @@ static int process_events(struct perf_evlist *evlist,
 	LIST_HEAD(events);
 	struct event_node *events_array, *node;
 	struct perf_mmap *md;
-	u64 end, start;
 	int i, ret;
 
 	for (i = 0; i < evlist->nr_mmaps; i++) {
 		md = &evlist->mmap[i];
-		if (perf_mmap__read_init(md, false, &start, &end) < 0)
+		if (perf_mmap__read_init(md) < 0)
 			continue;
 
 		while ((event = perf_mmap__read_event(md)) != NULL) {
diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c
index 2df0c0573e2c..e92fa6029ac7 100644
--- a/tools/perf/tests/task-exit.c
+++ b/tools/perf/tests/task-exit.c
@@ -48,7 +48,6 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused
 	struct cpu_map *cpus;
 	struct thread_map *threads;
 	struct perf_mmap *md;
-	u64 end, start;
 
 	signal(SIGCHLD, sig_handler);
 
@@ -113,7 +112,7 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused
 
 retry:
 	md = &evlist->mmap[0];
-	if (perf_mmap__read_init(md, false, &start, &end) < 0)
+	if (perf_mmap__read_init(md) < 0)
 		goto out_init;
 
 	while ((event = perf_mmap__read_event(md)) != NULL) {
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 2fbe4c8a02d2..074c4fd3b67e 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -235,16 +235,13 @@ static int overwrite_rb_find_range(void *buf, int mask, u64 head, u64 *start, u6
 /*
  * Report the start and end of the available data in ringbuffer
  */
-int perf_mmap__read_init(struct perf_mmap *md, bool overwrite,
-			 u64 *startp, u64 *endp)
+int perf_mmap__read_init(struct perf_mmap *md)
 {
 	u64 head = perf_mmap__read_head(md);
 	u64 old = md->prev;
 	unsigned char *data = md->base + page_size;
 	unsigned long size;
 
-	*startp = overwrite ? head : old;
-	*endp = overwrite ? old : head;
 	md->start = md->overwrite ? head : old;
 	md->end = md->overwrite ? old : head;
 
@@ -267,8 +264,6 @@ int perf_mmap__read_init(struct perf_mmap *md, bool overwrite,
 		 */
 		if (overwrite_rb_find_range(data, md->mask, head, &md->start, &md->end))
 			return -EINVAL;
-		*startp = md->start;
-		*endp = md->end;
 	}
 
 	return 0;
@@ -278,13 +273,12 @@ int perf_mmap__push(struct perf_mmap *md, void *to,
 		    int push(void *to, void *buf, size_t size))
 {
 	u64 head = perf_mmap__read_head(md);
-	u64 end, start;
 	unsigned char *data = md->base + page_size;
 	unsigned long size;
 	void *buf;
 	int rc = 0;
 
-	rc = perf_mmap__read_init(md, md->overwrite, &start, &end);
+	rc = perf_mmap__read_init(md);
 	if (rc < 0)
 		return (rc == -EAGAIN) ? 0 : -1;
 
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index ae9499b80110..d82294db1295 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -96,7 +96,6 @@ int perf_mmap__push(struct perf_mmap *md, void *to,
 
 size_t perf_mmap__mmap_len(struct perf_mmap *map);
 
-int perf_mmap__read_init(struct perf_mmap *md, bool overwrite,
-			 u64 *startp, u64 *endp);
+int perf_mmap__read_init(struct perf_mmap *md);
 void perf_mmap__read_done(struct perf_mmap *map);
 #endif /*__PERF_MMAP_H */
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 4798db93e7ee..b956868fd445 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -984,7 +984,6 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
 	int sample_id_all = 1, cpu;
 	static char *kwlist[] = { "cpu", "sample_id_all", NULL };
 	struct perf_mmap *md;
-	u64 end, start;
 	int err;
 
 	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i|i", kwlist,
@@ -992,7 +991,7 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
 		return NULL;
 
 	md = &evlist->mmap[cpu];
-	if (perf_mmap__read_init(md, false, &start, &end) < 0)
+	if (perf_mmap__read_init(md) < 0)
 		goto end;
 
 	event = perf_mmap__read_event(md);
-- 
cgit v1.2.3


From 117db4b27bf08dba412faf3924ba55fe970c57b8 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 7 Mar 2018 16:02:21 +0200
Subject: perf intel-pt: Fix overlap detection to identify consecutive buffers
 correctly

Overlap detection was not not updating the buffer's 'consecutive' flag.
Marking buffers consecutive has the advantage that decoding begins from
the start of the buffer instead of the first PSB. Fix overlap detection
to identify consecutive buffers correctly.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/1520431349-30689-2-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../perf/util/intel-pt-decoder/intel-pt-decoder.c  | 62 ++++++++++------------
 .../perf/util/intel-pt-decoder/intel-pt-decoder.h  |  2 +-
 tools/perf/util/intel-pt.c                         |  5 +-
 3 files changed, 34 insertions(+), 35 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index aa1593ce551d..00f25f4b5f48 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -2390,14 +2390,6 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
 	return &decoder->state;
 }
 
-static bool intel_pt_at_psb(unsigned char *buf, size_t len)
-{
-	if (len < INTEL_PT_PSB_LEN)
-		return false;
-	return memmem(buf, INTEL_PT_PSB_LEN, INTEL_PT_PSB_STR,
-		      INTEL_PT_PSB_LEN);
-}
-
 /**
  * intel_pt_next_psb - move buffer pointer to the start of the next PSB packet.
  * @buf: pointer to buffer pointer
@@ -2486,6 +2478,7 @@ static unsigned char *intel_pt_last_psb(unsigned char *buf, size_t len)
  * @buf: buffer
  * @len: size of buffer
  * @tsc: TSC value returned
+ * @rem: returns remaining size when TSC is found
  *
  * Find a TSC packet in @buf and return the TSC value.  This function assumes
  * that @buf starts at a PSB and that PSB+ will contain TSC and so stops if a
@@ -2493,7 +2486,8 @@ static unsigned char *intel_pt_last_psb(unsigned char *buf, size_t len)
  *
  * Return: %true if TSC is found, false otherwise.
  */
-static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc)
+static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc,
+			      size_t *rem)
 {
 	struct intel_pt_pkt packet;
 	int ret;
@@ -2504,6 +2498,7 @@ static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc)
 			return false;
 		if (packet.type == INTEL_PT_TSC) {
 			*tsc = packet.payload;
+			*rem = len;
 			return true;
 		}
 		if (packet.type == INTEL_PT_PSBEND)
@@ -2554,6 +2549,8 @@ static int intel_pt_tsc_cmp(uint64_t tsc1, uint64_t tsc2)
  * @len_a: size of first buffer
  * @buf_b: second buffer
  * @len_b: size of second buffer
+ * @consecutive: returns true if there is data in buf_b that is consecutive
+ *               to buf_a
  *
  * If the trace contains TSC we can look at the last TSC of @buf_a and the
  * first TSC of @buf_b in order to determine if the buffers overlap, and then
@@ -2566,33 +2563,41 @@ static int intel_pt_tsc_cmp(uint64_t tsc1, uint64_t tsc2)
 static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a,
 						size_t len_a,
 						unsigned char *buf_b,
-						size_t len_b)
+						size_t len_b, bool *consecutive)
 {
 	uint64_t tsc_a, tsc_b;
 	unsigned char *p;
-	size_t len;
+	size_t len, rem_a, rem_b;
 
 	p = intel_pt_last_psb(buf_a, len_a);
 	if (!p)
 		return buf_b; /* No PSB in buf_a => no overlap */
 
 	len = len_a - (p - buf_a);
-	if (!intel_pt_next_tsc(p, len, &tsc_a)) {
+	if (!intel_pt_next_tsc(p, len, &tsc_a, &rem_a)) {
 		/* The last PSB+ in buf_a is incomplete, so go back one more */
 		len_a -= len;
 		p = intel_pt_last_psb(buf_a, len_a);
 		if (!p)
 			return buf_b; /* No full PSB+ => assume no overlap */
 		len = len_a - (p - buf_a);
-		if (!intel_pt_next_tsc(p, len, &tsc_a))
+		if (!intel_pt_next_tsc(p, len, &tsc_a, &rem_a))
 			return buf_b; /* No TSC in buf_a => assume no overlap */
 	}
 
 	while (1) {
 		/* Ignore PSB+ with no TSC */
-		if (intel_pt_next_tsc(buf_b, len_b, &tsc_b) &&
-		    intel_pt_tsc_cmp(tsc_a, tsc_b) < 0)
-			return buf_b; /* tsc_a < tsc_b => no overlap */
+		if (intel_pt_next_tsc(buf_b, len_b, &tsc_b, &rem_b)) {
+			int cmp = intel_pt_tsc_cmp(tsc_a, tsc_b);
+
+			/* Same TSC, so buffers are consecutive */
+			if (!cmp && rem_b >= rem_a) {
+				*consecutive = true;
+				return buf_b + len_b - (rem_b - rem_a);
+			}
+			if (cmp < 0)
+				return buf_b; /* tsc_a < tsc_b => no overlap */
+		}
 
 		if (!intel_pt_step_psb(&buf_b, &len_b))
 			return buf_b + len_b; /* No PSB in buf_b => no data */
@@ -2606,6 +2611,8 @@ static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a,
  * @buf_b: second buffer
  * @len_b: size of second buffer
  * @have_tsc: can use TSC packets to detect overlap
+ * @consecutive: returns true if there is data in buf_b that is consecutive
+ *               to buf_a
  *
  * When trace samples or snapshots are recorded there is the possibility that
  * the data overlaps.  Note that, for the purposes of decoding, data is only
@@ -2616,7 +2623,7 @@ static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a,
  */
 unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
 				     unsigned char *buf_b, size_t len_b,
-				     bool have_tsc)
+				     bool have_tsc, bool *consecutive)
 {
 	unsigned char *found;
 
@@ -2628,7 +2635,8 @@ unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
 		return buf_b; /* No overlap */
 
 	if (have_tsc) {
-		found = intel_pt_find_overlap_tsc(buf_a, len_a, buf_b, len_b);
+		found = intel_pt_find_overlap_tsc(buf_a, len_a, buf_b, len_b,
+						  consecutive);
 		if (found)
 			return found;
 	}
@@ -2643,28 +2651,16 @@ unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
 	}
 
 	/* Now len_b >= len_a */
-	if (len_b > len_a) {
-		/* The leftover buffer 'b' must start at a PSB */
-		while (!intel_pt_at_psb(buf_b + len_a, len_b - len_a)) {
-			if (!intel_pt_step_psb(&buf_a, &len_a))
-				return buf_b; /* No overlap */
-		}
-	}
-
 	while (1) {
 		/* Potential overlap so check the bytes */
 		found = memmem(buf_a, len_a, buf_b, len_a);
-		if (found)
+		if (found) {
+			*consecutive = true;
 			return buf_b + len_a;
+		}
 
 		/* Try again at next PSB in buffer 'a' */
 		if (!intel_pt_step_psb(&buf_a, &len_a))
 			return buf_b; /* No overlap */
-
-		/* The leftover buffer 'b' must start at a PSB */
-		while (!intel_pt_at_psb(buf_b + len_a, len_b - len_a)) {
-			if (!intel_pt_step_psb(&buf_a, &len_a))
-				return buf_b; /* No overlap */
-		}
 	}
 }
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
index 921b22e8ca0e..fc1752d50019 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
@@ -117,7 +117,7 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder);
 
 unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
 				     unsigned char *buf_b, size_t len_b,
-				     bool have_tsc);
+				     bool have_tsc, bool *consecutive);
 
 int intel_pt__strerror(int code, char *buf, size_t buflen);
 
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 3773d9c54f45..4a7746249999 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -207,14 +207,17 @@ static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf,
 static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a,
 				   struct auxtrace_buffer *b)
 {
+	bool consecutive = false;
 	void *start;
 
 	start = intel_pt_find_overlap(a->data, a->size, b->data, b->size,
-				      pt->have_tsc);
+				      pt->have_tsc, &consecutive);
 	if (!start)
 		return -EINVAL;
 	b->use_size = b->data + b->size - start;
 	b->use_data = start;
+	if (b->use_size && consecutive)
+		b->consecutive = true;
 	return 0;
 }
 
-- 
cgit v1.2.3


From 63d8e38f6ae6c36dd5b5ba0e8c112e8861532ea2 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 7 Mar 2018 16:02:22 +0200
Subject: perf intel-pt: Fix sync_switch

sync_switch is a facility to synchronize decoding more closely with the
point in the kernel when the context actually switched.

The flag when sync_switch is enabled was global to the decoding, whereas
it is really specific to the CPU.

The trace data for different CPUs is put on different queues, so add
sync_switch to the intel_pt_queue structure and use that in preference
to the global setting in the intel_pt structure.

That fixes problems decoding one CPU's trace because sync_switch was
disabled on a different CPU's queue.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/1520431349-30689-3-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/intel-pt.c | 32 +++++++++++++++++++++++++-------
 1 file changed, 25 insertions(+), 7 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 4a7746249999..0979a6e8b2b7 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -143,6 +143,7 @@ struct intel_pt_queue {
 	bool stop;
 	bool step_through_buffers;
 	bool use_buffer_pid_tid;
+	bool sync_switch;
 	pid_t pid, tid;
 	int cpu;
 	int switch_state;
@@ -963,10 +964,12 @@ static int intel_pt_setup_queue(struct intel_pt *pt,
 			if (pt->timeless_decoding || !pt->have_sched_switch)
 				ptq->use_buffer_pid_tid = true;
 		}
+
+		ptq->sync_switch = pt->sync_switch;
 	}
 
 	if (!ptq->on_heap &&
-	    (!pt->sync_switch ||
+	    (!ptq->sync_switch ||
 	     ptq->switch_state != INTEL_PT_SS_EXPECTING_SWITCH_EVENT)) {
 		const struct intel_pt_state *state;
 		int ret;
@@ -1549,7 +1552,7 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
 	if (pt->synth_opts.last_branch)
 		intel_pt_update_last_branch_rb(ptq);
 
-	if (!pt->sync_switch)
+	if (!ptq->sync_switch)
 		return 0;
 
 	if (intel_pt_is_switch_ip(ptq, state->to_ip)) {
@@ -1630,6 +1633,21 @@ static u64 intel_pt_switch_ip(struct intel_pt *pt, u64 *ptss_ip)
 	return switch_ip;
 }
 
+static void intel_pt_enable_sync_switch(struct intel_pt *pt)
+{
+	unsigned int i;
+
+	pt->sync_switch = true;
+
+	for (i = 0; i < pt->queues.nr_queues; i++) {
+		struct auxtrace_queue *queue = &pt->queues.queue_array[i];
+		struct intel_pt_queue *ptq = queue->priv;
+
+		if (ptq)
+			ptq->sync_switch = true;
+	}
+}
+
 static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
 {
 	const struct intel_pt_state *state = ptq->state;
@@ -1646,7 +1664,7 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
 			if (pt->switch_ip) {
 				intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n",
 					     pt->switch_ip, pt->ptss_ip);
-				pt->sync_switch = true;
+				intel_pt_enable_sync_switch(pt);
 			}
 		}
 	}
@@ -1662,9 +1680,9 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
 		if (state->err) {
 			if (state->err == INTEL_PT_ERR_NODATA)
 				return 1;
-			if (pt->sync_switch &&
+			if (ptq->sync_switch &&
 			    state->from_ip >= pt->kernel_start) {
-				pt->sync_switch = false;
+				ptq->sync_switch = false;
 				intel_pt_next_tid(pt, ptq);
 			}
 			if (pt->synth_opts.errors) {
@@ -1690,7 +1708,7 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
 				     state->timestamp, state->est_timestamp);
 			ptq->timestamp = state->est_timestamp;
 		/* Use estimated TSC in unknown switch state */
-		} else if (pt->sync_switch &&
+		} else if (ptq->sync_switch &&
 			   ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
 			   intel_pt_is_switch_ip(ptq, state->to_ip) &&
 			   ptq->next_tid == -1) {
@@ -1837,7 +1855,7 @@ static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid,
 		return 1;
 
 	ptq = intel_pt_cpu_to_ptq(pt, cpu);
-	if (!ptq)
+	if (!ptq || !ptq->sync_switch)
 		return 1;
 
 	switch (ptq->switch_state) {
-- 
cgit v1.2.3


From 1c196a6c771c47a2faa63d38d913e03284f73a16 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 7 Mar 2018 16:02:23 +0200
Subject: perf intel-pt: Fix error recovery from missing TIP packet

When a TIP packet is expected but there is a different packet, it is an
error. However the unexpected packet might be something important like a
TSC packet, so after the error, it is necessary to continue from there,
rather than the next packet. That is achieved by setting pkt_step to
zero.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/1520431349-30689-4-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index 00f25f4b5f48..5e4d0bbafc8b 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -1616,6 +1616,7 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
 		case INTEL_PT_PWRX:
 			intel_pt_log("ERROR: Missing TIP after FUP\n");
 			decoder->pkt_state = INTEL_PT_STATE_ERR3;
+			decoder->pkt_step = 0;
 			return -ENOENT;
 
 		case INTEL_PT_OVF:
-- 
cgit v1.2.3


From 91d29b288aed3406caf7c454bf2b898c96cfd177 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 7 Mar 2018 16:02:24 +0200
Subject: perf intel-pt: Fix timestamp following overflow

timestamp_insn_cnt is used to estimate the timestamp based on the number of
instructions since the last known timestamp.

If the estimate is not accurate enough decoding might not be correctly
synchronized with side-band events causing more trace errors.

However there are always timestamps following an overflow, so the
estimate is not needed and can indeed result in more errors.

Suppress the estimate by setting timestamp_insn_cnt to zero.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/1520431349-30689-5-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index 5e4d0bbafc8b..f9157aed1289 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -1378,6 +1378,7 @@ static int intel_pt_overflow(struct intel_pt_decoder *decoder)
 	intel_pt_clear_tx_flags(decoder);
 	decoder->have_tma = false;
 	decoder->cbr = 0;
+	decoder->timestamp_insn_cnt = 0;
 	decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
 	decoder->overflow = true;
 	return -EOVERFLOW;
-- 
cgit v1.2.3


From 15d599a25c7649807a2b66f7100efcf030665068 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 7 Mar 2018 16:02:25 +0200
Subject: perf intel-pt/bts: In auxtrace_record__init_intel() evlist is never
 NULL

Tidy auxtrace_record__init_intel() slightly by recognizing that evlist is
never NULL.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/1520431349-30689-6-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/x86/util/auxtrace.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/arch/x86/util/auxtrace.c b/tools/perf/arch/x86/util/auxtrace.c
index 6aa3f2a38321..b135af62011c 100644
--- a/tools/perf/arch/x86/util/auxtrace.c
+++ b/tools/perf/arch/x86/util/auxtrace.c
@@ -37,15 +37,11 @@ struct auxtrace_record *auxtrace_record__init_intel(struct perf_evlist *evlist,
 	intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME);
 	intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME);
 
-	if (evlist) {
-		evlist__for_each_entry(evlist, evsel) {
-			if (intel_pt_pmu &&
-			    evsel->attr.type == intel_pt_pmu->type)
-				found_pt = true;
-			if (intel_bts_pmu &&
-			    evsel->attr.type == intel_bts_pmu->type)
-				found_bts = true;
-		}
+	evlist__for_each_entry(evlist, evsel) {
+		if (intel_pt_pmu && evsel->attr.type == intel_pt_pmu->type)
+			found_pt = true;
+		if (intel_bts_pmu && evsel->attr.type == intel_bts_pmu->type)
+			found_bts = true;
 	}
 
 	if (found_pt && found_bts) {
-- 
cgit v1.2.3


From 1c071c80d9661c263b043969fd92305037bf9773 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 7 Mar 2018 16:02:26 +0200
Subject: perf intel-pt: Get rid of intel_pt_use_buffer_pid_tid()

With the new way sampling support will be implemented,
intel_pt_use_buffer_pid_tid() will not be needed. Get rid of it.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/1520431349-30689-7-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/intel-pt.c | 39 +++------------------------------------
 1 file changed, 3 insertions(+), 36 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 0979a6e8b2b7..cfb3614f5def 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -222,32 +222,6 @@ static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *
 	return 0;
 }
 
-static void intel_pt_use_buffer_pid_tid(struct intel_pt_queue *ptq,
-					struct auxtrace_queue *queue,
-					struct auxtrace_buffer *buffer)
-{
-	if (queue->cpu == -1 && buffer->cpu != -1)
-		ptq->cpu = buffer->cpu;
-
-	ptq->pid = buffer->pid;
-	ptq->tid = buffer->tid;
-
-	intel_pt_log("queue %u cpu %d pid %d tid %d\n",
-		     ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid);
-
-	thread__zput(ptq->thread);
-
-	if (ptq->tid != -1) {
-		if (ptq->pid != -1)
-			ptq->thread = machine__findnew_thread(ptq->pt->machine,
-							      ptq->pid,
-							      ptq->tid);
-		else
-			ptq->thread = machine__find_thread(ptq->pt->machine, -1,
-							   ptq->tid);
-	}
-}
-
 /* This function assumes data is processed sequentially only */
 static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
 {
@@ -311,10 +285,6 @@ next:
 		b->consecutive = true;
 	}
 
-	if (ptq->use_buffer_pid_tid && (ptq->pid != buffer->pid ||
-					ptq->tid != buffer->tid))
-		intel_pt_use_buffer_pid_tid(ptq, queue, buffer);
-
 	if (ptq->step_through_buffers)
 		ptq->stop = true;
 
@@ -958,12 +928,9 @@ static int intel_pt_setup_queue(struct intel_pt *pt,
 			ptq->cpu = queue->cpu;
 		ptq->tid = queue->tid;
 
-		if (pt->sampling_mode) {
-			if (pt->timeless_decoding)
-				ptq->step_through_buffers = true;
-			if (pt->timeless_decoding || !pt->have_sched_switch)
-				ptq->use_buffer_pid_tid = true;
-		}
+		if (pt->sampling_mode && !pt->snapshot_mode &&
+		    pt->timeless_decoding)
+			ptq->step_through_buffers = true;
 
 		ptq->sync_switch = pt->sync_switch;
 	}
-- 
cgit v1.2.3


From 9c6650647de430a6ad9707c51b3342f23af0d2ee Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 7 Mar 2018 16:02:27 +0200
Subject: perf intel-pt: Tidy old_buffer handling in intel_pt_get_trace()

intel_pt_get_trace() fixes overlaps between the current buffer and the
previous buffer ('old_buffer').

However the previous buffer might not have had usable data (no PSB) so
the comparison must be made against the previous buffer that had usable
data.

Tidy that by keeping a pointer for that purpose in struct intel_pt_queue.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/1520431349-30689-8-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/intel-pt.c | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index cfb3614f5def..9a4f9cdb752f 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -132,6 +132,7 @@ struct intel_pt_queue {
 	struct intel_pt *pt;
 	unsigned int queue_nr;
 	struct auxtrace_buffer *buffer;
+	struct auxtrace_buffer *old_buffer;
 	void *decoder;
 	const struct intel_pt_state *state;
 	struct ip_callchain *chain;
@@ -226,7 +227,8 @@ static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *
 static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
 {
 	struct intel_pt_queue *ptq = data;
-	struct auxtrace_buffer *buffer = ptq->buffer, *old_buffer = buffer;
+	struct auxtrace_buffer *buffer = ptq->buffer;
+	struct auxtrace_buffer *old_buffer = ptq->old_buffer;
 	struct auxtrace_queue *queue;
 
 	if (ptq->stop) {
@@ -235,7 +237,7 @@ static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
 	}
 
 	queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
-next:
+
 	buffer = auxtrace_buffer__next(queue, buffer);
 	if (!buffer) {
 		if (old_buffer)
@@ -267,16 +269,6 @@ next:
 	}
 	b->ref_timestamp = buffer->reference;
 
-	/*
-	 * If in snapshot mode and the buffer has no usable data, get next
-	 * buffer and again check overlap against old_buffer.
-	 */
-	if (ptq->pt->snapshot_mode && !b->len)
-		goto next;
-
-	if (old_buffer)
-		auxtrace_buffer__drop_data(old_buffer);
-
 	if (!old_buffer || ptq->pt->sampling_mode || (ptq->pt->snapshot_mode &&
 						      !buffer->consecutive)) {
 		b->consecutive = false;
@@ -288,8 +280,14 @@ next:
 	if (ptq->step_through_buffers)
 		ptq->stop = true;
 
-	if (!b->len)
+	if (b->len) {
+		if (old_buffer)
+			auxtrace_buffer__drop_data(old_buffer);
+		ptq->old_buffer = buffer;
+	} else {
+		auxtrace_buffer__drop_data(buffer);
 		return intel_pt_get_trace(b, data);
+	}
 
 	return 0;
 }
-- 
cgit v1.2.3


From 13f89dbafe73e56fd317c078bb1d3a45fd94ab7b Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 7 Mar 2018 16:02:28 +0200
Subject: perf intel-pt: Remove a check for sampling mode

Intel PT code already has some preparation for AUX area sampling mode.

However the implementation has changed from the first proposal and one
of the side-effects is that it will not be impossible to support snapshot
mode and sampling mode at the same time.

Although there are no plans to support it, let validation (not yet
implemented) control whether it is allowed rather than low-level
functions.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/1520431349-30689-9-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/intel-pt.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 9a4f9cdb752f..5c5c155fba78 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -2061,9 +2061,6 @@ static int intel_pt_process_auxtrace_event(struct perf_session *session,
 	struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
 					   auxtrace);
 
-	if (pt->sampling_mode)
-		return 0;
-
 	if (!pt->data_queued) {
 		struct auxtrace_buffer *buffer;
 		off_t data_offset;
-- 
cgit v1.2.3


From 599a5beb78ad95181677e7919f54dbd64b404cf7 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 7 Mar 2018 16:02:29 +0200
Subject: perf intel-pt: Adjust overlap-checking to support sampling mode

Adjust overlap-checking to support sampling mode.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/1520431349-30689-10-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/intel-pt.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 5c5c155fba78..0effaff57020 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -230,6 +230,7 @@ static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
 	struct auxtrace_buffer *buffer = ptq->buffer;
 	struct auxtrace_buffer *old_buffer = ptq->old_buffer;
 	struct auxtrace_queue *queue;
+	bool might_overlap;
 
 	if (ptq->stop) {
 		b->len = 0;
@@ -256,7 +257,8 @@ static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
 			return -ENOMEM;
 	}
 
-	if (ptq->pt->snapshot_mode && !buffer->consecutive && old_buffer &&
+	might_overlap = ptq->pt->snapshot_mode || ptq->pt->sampling_mode;
+	if (might_overlap && !buffer->consecutive && old_buffer &&
 	    intel_pt_do_fix_overlap(ptq->pt, old_buffer, buffer))
 		return -ENOMEM;
 
@@ -269,8 +271,7 @@ static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
 	}
 	b->ref_timestamp = buffer->reference;
 
-	if (!old_buffer || ptq->pt->sampling_mode || (ptq->pt->snapshot_mode &&
-						      !buffer->consecutive)) {
+	if (!old_buffer || (might_overlap && !buffer->consecutive)) {
 		b->consecutive = false;
 		b->trace_nr = buffer->buffer_nr + 1;
 	} else {
-- 
cgit v1.2.3


From 0b58a77ca8792bd7798ef3a4d865c57694ec74fc Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.vnet.ibm.com>
Date: Wed, 7 Mar 2018 14:43:25 +0100
Subject: perf annotate: Fix s390 target function disassembly

'perf annotate' displays function call assembler instructions with a
right arrow. Hitting enter on this line/instruction causes the browser
to disassemble this target function and show it on the screen.  On s390
this results in an error message 'The called function was not found.'

The function call assembly line parsing does not handle the s390 bras
and brasl instructions. Function call__parse expects the target as first
operand:

	callq	e9140 <__fxstat>

S390 has a register number as first operand:

	brasl	%r14,41d60 <abort>

Therefore the target addresses on s390 are always zero which is an
invalid address.

Introduce a s390 specific call parsing function which skips the first
operand on s390.

Signed-off-by: Thomas Richter <tmricht@linux.vnet.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Link: http://lkml.kernel.org/r/20180307134325.96106-1-tmricht@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/s390/annotate/instructions.c | 53 +++++++++++++++++++++++++++-
 tools/perf/util/annotate.c                   |  2 +-
 2 files changed, 53 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/arch/s390/annotate/instructions.c b/tools/perf/arch/s390/annotate/instructions.c
index 01df9d8303e1..e80589fc5b58 100644
--- a/tools/perf/arch/s390/annotate/instructions.c
+++ b/tools/perf/arch/s390/annotate/instructions.c
@@ -1,6 +1,57 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/compiler.h>
 
+static int s390_call__parse(struct arch *arch, struct ins_operands *ops,
+			    struct map *map)
+{
+	char *endptr, *tok, *name;
+	struct addr_map_symbol target = {
+		.map = map,
+	};
+
+	tok = strchr(ops->raw, ',');
+	if (!tok)
+		return -1;
+
+	ops->target.addr = strtoull(tok + 1, &endptr, 16);
+
+	name = strchr(endptr, '<');
+	if (name == NULL)
+		return -1;
+
+	name++;
+
+	if (arch->objdump.skip_functions_char &&
+	    strchr(name, arch->objdump.skip_functions_char))
+		return -1;
+
+	tok = strchr(name, '>');
+	if (tok == NULL)
+		return -1;
+
+	*tok = '\0';
+	ops->target.name = strdup(name);
+	*tok = '>';
+
+	if (ops->target.name == NULL)
+		return -1;
+	target.addr = map__objdump_2mem(map, ops->target.addr);
+
+	if (map_groups__find_ams(&target) == 0 &&
+	    map__rip_2objdump(target.map, map->map_ip(target.map, target.addr)) == ops->target.addr)
+		ops->target.sym = target.sym;
+
+	return 0;
+}
+
+static int call__scnprintf(struct ins *ins, char *bf, size_t size,
+			   struct ins_operands *ops);
+
+static struct ins_ops s390_call_ops = {
+	.parse	   = s390_call__parse,
+	.scnprintf = call__scnprintf,
+};
+
 static struct ins_ops *s390__associate_ins_ops(struct arch *arch, const char *name)
 {
 	struct ins_ops *ops = NULL;
@@ -14,7 +65,7 @@ static struct ins_ops *s390__associate_ins_ops(struct arch *arch, const char *na
 	if (!strcmp(name, "bras") ||
 	    !strcmp(name, "brasl") ||
 	    !strcmp(name, "basr"))
-		ops = &call_ops;
+		ops = &s390_call_ops;
 	if (!strcmp(name, "br"))
 		ops = &ret_ops;
 
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 49ff825f745c..bc3302da702b 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -248,7 +248,7 @@ static struct ins_ops call_ops = {
 
 bool ins__is_call(const struct ins *ins)
 {
-	return ins->ops == &call_ops;
+	return ins->ops == &call_ops || ins->ops == &s390_call_ops;
 }
 
 static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map *map __maybe_unused)
-- 
cgit v1.2.3


From 8ef278bb9305e1269f236013718801fe06a183d1 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 7 Mar 2018 16:50:02 +0100
Subject: perf report: Fix the output for stdio events list

Changing the output header for reporting forced groups via --groups
option on non grouped events, like:

  $ perf record -e 'cycles,instructions'
  $ perf report --stdio --group

Before:

  # Samples: 24  of event 'anon group { cycles:u, instructions:u }'

After:

  # Samples: 24  of events 'cycles:u, instructions:u'

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Fixes: ad52b8cb4886 ("perf report: Add support to display group output for non group events")
Link: http://lkml.kernel.org/r/20180307155020.32613-2-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-report.c    | 17 ++++++++++++++---
 tools/perf/ui/browsers/hists.c |  5 +++--
 tools/perf/util/evsel.c        | 20 ++++++++++++++++----
 tools/perf/util/evsel.h        |  1 +
 4 files changed, 34 insertions(+), 9 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 1eedb1815c4c..c3603d4c0c57 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -400,8 +400,10 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report
 
 	nr_samples = convert_unit(nr_samples, &unit);
 	ret = fprintf(fp, "# Samples: %lu%c", nr_samples, unit);
-	if (evname != NULL)
-		ret += fprintf(fp, " of event '%s'", evname);
+	if (evname != NULL) {
+		ret += fprintf(fp, " of event%s '%s'",
+			       evsel->nr_members > 1 ? "s" : "", evname);
+	}
 
 	if (rep->time_str)
 		ret += fprintf(fp, " (time slices: %s)", rep->time_str);
@@ -1175,8 +1177,17 @@ repeat:
 	has_br_stack = perf_header__has_feat(&session->header,
 					     HEADER_BRANCH_STACK);
 
-	if (group_set && !session->evlist->nr_groups)
+	/*
+	 * Events in data file are not collect in groups, but we still want
+	 * the group display. Set the artificial group and set the leader's
+	 * forced_leader flag to notify the display code.
+	 */
+	if (group_set && !session->evlist->nr_groups) {
+		struct perf_evsel *leader = perf_evlist__first(session->evlist);
+
 		perf_evlist__set_leader(session->evlist);
+		leader->forced_leader = true;
+	}
 
 	if (itrace_synth_opts.last_branch)
 		has_br_stack = true;
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index de2bde232cb3..8b4e82548f8e 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -2261,8 +2261,9 @@ static int perf_evsel_browser_title(struct hist_browser *browser,
 
 	nr_samples = convert_unit(nr_samples, &unit);
 	printed = scnprintf(bf, size,
-			   "Samples: %lu%c of event '%s',%s%sEvent count (approx.): %" PRIu64,
-			   nr_samples, unit, ev_name, sample_freq_str, enable_ref ? ref : " ", nr_events);
+			   "Samples: %lu%c of event%s '%s',%s%sEvent count (approx.): %" PRIu64,
+			   nr_samples, unit, evsel->nr_members > 1 ? "s" : "",
+			   ev_name, sample_freq_str, enable_ref ? ref : " ", nr_events);
 
 
 	if (hists->uid_filter_str)
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index e937894654b2..1ac8d9236efd 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -622,22 +622,34 @@ const char *perf_evsel__group_name(struct perf_evsel *evsel)
 	return evsel->group_name ?: "anon group";
 }
 
+/*
+ * Returns the group details for the specified leader,
+ * with following rules.
+ *
+ *  For record -e '{cycles,instructions}'
+ *    'anon group { cycles:u, instructions:u }'
+ *
+ *  For record -e 'cycles,instructions' and report --group
+ *    'cycles:u, instructions:u'
+ */
 int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size)
 {
-	int ret;
+	int ret = 0;
 	struct perf_evsel *pos;
 	const char *group_name = perf_evsel__group_name(evsel);
 
-	ret = scnprintf(buf, size, "%s", group_name);
+	if (!evsel->forced_leader)
+		ret = scnprintf(buf, size, "%s { ", group_name);
 
-	ret += scnprintf(buf + ret, size - ret, " { %s",
+	ret += scnprintf(buf + ret, size - ret, "%s",
 			 perf_evsel__name(evsel));
 
 	for_each_group_member(pos, evsel)
 		ret += scnprintf(buf + ret, size - ret, ", %s",
 				 perf_evsel__name(pos));
 
-	ret += scnprintf(buf + ret, size - ret, " }");
+	if (!evsel->forced_leader)
+		ret += scnprintf(buf + ret, size - ret, " }");
 
 	return ret;
 }
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 55ae1cda7396..d3ee3af618ef 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -125,6 +125,7 @@ struct perf_evsel {
 	bool			per_pkg;
 	bool			precise_max;
 	bool			ignore_missing_thread;
+	bool			forced_leader;
 	/* parse modifier helper */
 	int			exclude_GH;
 	int			nr_members;
-- 
cgit v1.2.3


From e971a5a8399a5e84164239a5c2d59b8a51314241 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 7 Mar 2018 16:50:03 +0100
Subject: perf report: Display perf.data header info

Display more header info from perf.data file, following values:

  $ perf report -i perf.data --header-only
  ...
  # header version : 1
  # data offset    : 424
  # data size      : 3364280
  # feat offset    : 3364704

It's handy for debuging.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180307155020.32613-3-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/header.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index a326e0d8b5b6..e0c3cad0fd8d 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -2318,7 +2318,12 @@ int perf_header__fprintf_info(struct perf_session *session, FILE *fp, bool full)
 	if (ret == -1)
 		return -1;
 
-	fprintf(fp, "# captured on: %s", ctime(&st.st_ctime));
+	fprintf(fp, "# captured on    : %s", ctime(&st.st_ctime));
+
+	fprintf(fp, "# header version : %u\n", header->version);
+	fprintf(fp, "# data offset    : %" PRIu64 "\n", header->data_offset);
+	fprintf(fp, "# data size      : %" PRIu64 "\n", header->data_size);
+	fprintf(fp, "# feat offset    : %" PRIu64 "\n", header->feat_offset);
 
 	perf_header__process_sections(header, fd, &hd,
 				      perf_file_section__fprintf_info);
-- 
cgit v1.2.3


From 20a8a3cf90215cebd916048e51a5bfa6c3707778 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 7 Mar 2018 16:50:04 +0100
Subject: perf record: Move machine variable down the function

It's used far more down to be declared on the top of the __cmd_record.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180307155020.32613-4-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-record.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 753ffcecf254..31d4d70e2f6a 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -854,7 +854,6 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 	int status = 0;
 	unsigned long waking = 0;
 	const bool forks = argc > 0;
-	struct machine *machine;
 	struct perf_tool *tool = &rec->tool;
 	struct record_opts *opts = &rec->opts;
 	struct perf_data *data = &rec->data;
@@ -959,8 +958,6 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 		goto out_child;
 	}
 
-	machine = &session->machines.host;
-
 	err = record__synthesize(rec, false);
 	if (err < 0)
 		goto out_child;
@@ -988,6 +985,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 	 * Let the child rip
 	 */
 	if (forks) {
+		struct machine *machine = &session->machines.host;
 		union perf_event *event;
 		pid_t tgid;
 
-- 
cgit v1.2.3


From 915b4e27f1d32b3c7de4874ac1e3fe1f801151ca Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 7 Mar 2018 16:50:05 +0100
Subject: perf record: Remove progname from struct record

It's no longer used.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180307155020.32613-5-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-record.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 31d4d70e2f6a..b81494587120 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -71,7 +71,6 @@ struct record {
 	struct auxtrace_record	*itr;
 	struct perf_evlist	*evlist;
 	struct perf_session	*session;
-	const char		*progname;
 	int			realtime_prio;
 	bool			no_buildid;
 	bool			no_buildid_set;
@@ -861,8 +860,6 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 	bool disabled = false, draining = false;
 	int fd;
 
-	rec->progname = argv[0];
-
 	atexit(record__sig_exit);
 	signal(SIGCHLD, sig_handler);
 	signal(SIGINT, sig_handler);
-- 
cgit v1.2.3


From 9f87498f1cb72958c6f8725eb93d2f7ef81fa11e Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 7 Mar 2018 16:50:06 +0100
Subject: perf tools: Add refcnt into struct mem_info

It's passed along several hists entries in --hierarchy mode, so it's
better we keep track of it.

The current fail I see is that it gets removed in hierarchy --mem-mode
mode, where it's shared in the different hierarchies, but removed from
the template hist entry, so the report crashes.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180307155020.32613-6-jolsa@kernel.org
[ Rename mem_info__aloc() to mem_info__new(), to fix the typo and use the convention for constructors ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/hist.c    |  4 ++--
 tools/perf/util/machine.c |  2 +-
 tools/perf/util/symbol.c  | 22 ++++++++++++++++++++++
 tools/perf/util/symbol.h  | 19 ++++++++++++++++---
 4 files changed, 41 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 44a8456cea10..7d968892ee39 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -536,7 +536,7 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
 			 * This mem info was allocated from sample__resolve_mem
 			 * and will not be used anymore.
 			 */
-			zfree(&entry->mem_info);
+			mem_info__zput(entry->mem_info);
 
 			/* If the map of an existing hist_entry has
 			 * become out-of-date due to an exec() or
@@ -1139,7 +1139,7 @@ void hist_entry__delete(struct hist_entry *he)
 	if (he->mem_info) {
 		map__zput(he->mem_info->iaddr.map);
 		map__zput(he->mem_info->daddr.map);
-		zfree(&he->mem_info);
+		mem_info__zput(he->mem_info);
 	}
 
 	zfree(&he->stat_acc);
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 12b7427444a3..43fbbee409ec 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1697,7 +1697,7 @@ static void ip__resolve_data(struct thread *thread,
 struct mem_info *sample__resolve_mem(struct perf_sample *sample,
 				     struct addr_location *al)
 {
-	struct mem_info *mi = zalloc(sizeof(*mi));
+	struct mem_info *mi = mem_info__new();
 
 	if (!mi)
 		return NULL;
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index a1a312d99f30..62b2dd2253eb 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -2221,3 +2221,25 @@ int symbol__config_symfs(const struct option *opt __maybe_unused,
 	free(bf);
 	return 0;
 }
+
+struct mem_info *mem_info__get(struct mem_info *mi)
+{
+	if (mi)
+		refcount_inc(&mi->refcnt);
+	return mi;
+}
+
+void mem_info__put(struct mem_info *mi)
+{
+	if (mi && refcount_dec_and_test(&mi->refcnt))
+		free(mi);
+}
+
+struct mem_info *mem_info__new(void)
+{
+	struct mem_info *mi = zalloc(sizeof(*mi));
+
+	if (mi)
+		refcount_set(&mi->refcnt, 1);
+	return mi;
+}
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 0563f33c1eb3..70c16741f50a 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -200,9 +200,10 @@ struct branch_info {
 };
 
 struct mem_info {
-	struct addr_map_symbol iaddr;
-	struct addr_map_symbol daddr;
-	union perf_mem_data_src data_src;
+	struct addr_map_symbol	iaddr;
+	struct addr_map_symbol	daddr;
+	union perf_mem_data_src	data_src;
+	refcount_t		refcnt;
 };
 
 struct addr_location {
@@ -389,4 +390,16 @@ int sdt_notes__get_count(struct list_head *start);
 #define SDT_NOTE_NAME "stapsdt"
 #define NR_ADDR 3
 
+struct mem_info *mem_info__new(void);
+struct mem_info *mem_info__get(struct mem_info *mi);
+void   mem_info__put(struct mem_info *mi);
+
+static inline void __mem_info__zput(struct mem_info **mi)
+{
+	mem_info__put(*mi);
+	*mi = NULL;
+}
+
+#define mem_info__zput(mi) __mem_info__zput(&mi)
+
 #endif /* __PERF_SYMBOL */
-- 
cgit v1.2.3


From 5cedb413a63d452356aa634e0d4ffcc24f7dbcb5 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 7 Mar 2018 16:50:07 +0100
Subject: perf c2c: Use mem_info refcnt logic

Switch to refcnt logic instead of duplicating mem_info objects. No
functional change, just saving some memory.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180307155020.32613-7-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-c2c.c | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 539c3d460158..98d243fa0c06 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -237,9 +237,12 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
 	if (mi == NULL)
 		return -ENOMEM;
 
-	mi_dup = memdup(mi, sizeof(*mi));
-	if (!mi_dup)
-		goto free_mi;
+	/*
+	 * The mi object is released in hists__add_entry_ops,
+	 * if it gets sorted out into existing data, so we need
+	 * to take the copy now.
+	 */
+	mi_dup = mem_info__get(mi);
 
 	c2c_decode_stats(&stats, mi);
 
@@ -247,7 +250,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
 				  &al, NULL, NULL, mi,
 				  sample, true);
 	if (he == NULL)
-		goto free_mi_dup;
+		goto free_mi;
 
 	c2c_he = container_of(he, struct c2c_hist_entry, he);
 	c2c_add_stats(&c2c_he->stats, &stats);
@@ -272,19 +275,15 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
 
 		mi = mi_dup;
 
-		mi_dup = memdup(mi, sizeof(*mi));
-		if (!mi_dup)
-			goto free_mi;
-
 		c2c_hists = he__get_c2c_hists(he, c2c.cl_sort, 2);
 		if (!c2c_hists)
-			goto free_mi_dup;
+			goto free_mi;
 
 		he = hists__add_entry_ops(&c2c_hists->hists, &c2c_entry_ops,
 					  &al, NULL, NULL, mi,
 					  sample, true);
 		if (he == NULL)
-			goto free_mi_dup;
+			goto free_mi;
 
 		c2c_he = container_of(he, struct c2c_hist_entry, he);
 		c2c_add_stats(&c2c_he->stats, &stats);
@@ -303,10 +302,9 @@ out:
 	addr_location__put(&al);
 	return ret;
 
-free_mi_dup:
-	free(mi_dup);
 free_mi:
-	free(mi);
+	mem_info__put(mi_dup);
+	mem_info__put(mi);
 	ret = -ENOMEM;
 	goto out;
 }
-- 
cgit v1.2.3


From e2091cedd51bf5306bcd5dd498d2977abfe20e88 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 7 Mar 2018 16:50:08 +0100
Subject: perf tools: Add MEM_TOPOLOGY feature to perf data file

Adding MEM_TOPOLOGY feature to perf data file,
that will carry physical memory map and its
node assignments.

The format of data in MEM_TOPOLOGY is as follows:

  0 - version          | for future changes
  8 - block_size_bytes | /sys/devices/system/memory/block_size_bytes
 16 - count            | number of nodes

 For each node we store map of physical indexes for
 each node:

 32 - node id          | node index
 40 - size             | size of bitmap
 48 - bitmap           | bitmap of memory indexes that belongs to node
                       | /sys/devices/system/node/node<NODE>/memory<INDEX>

The MEM_TOPOLOGY could be displayed with following
report command:

  $ perf report --header-only -I
  ...
  # memory nodes (nr 1, block size 0x8000000):
  #    0 [7G]: 0-23,32-69

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180307155020.32613-8-jolsa@kernel.org
[ Rename 'index' to 'idx', as this breaks the build in rhel5, 6 and other systems where this is used by glibc headers ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/linux/bitmap.h |   2 +-
 tools/perf/util/env.h        |   9 ++
 tools/perf/util/header.c     | 305 +++++++++++++++++++++++++++++++++++++++++++
 tools/perf/util/header.h     |   1 +
 4 files changed, 316 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h
index ca160270fdfa..63440cc8d618 100644
--- a/tools/include/linux/bitmap.h
+++ b/tools/include/linux/bitmap.h
@@ -98,7 +98,7 @@ static inline int test_and_set_bit(int nr, unsigned long *addr)
 
 /**
  * bitmap_alloc - Allocate bitmap
- * @nr: Bit to set
+ * @nbits: Number of bits
  */
 static inline unsigned long *bitmap_alloc(int nbits)
 {
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index bf970f57dce0..c4ef2e523367 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -27,6 +27,12 @@ struct numa_node {
 	struct cpu_map	*map;
 };
 
+struct memory_node {
+	u64		 node;
+	u64		 size;
+	unsigned long	*set;
+};
+
 struct perf_env {
 	char			*hostname;
 	char			*os_release;
@@ -43,6 +49,7 @@ struct perf_env {
 	int			nr_sibling_cores;
 	int			nr_sibling_threads;
 	int			nr_numa_nodes;
+	int			nr_memory_nodes;
 	int			nr_pmu_mappings;
 	int			nr_groups;
 	char			*cmdline;
@@ -54,6 +61,8 @@ struct perf_env {
 	struct cpu_cache_level	*caches;
 	int			 caches_cnt;
 	struct numa_node	*numa_nodes;
+	struct memory_node	*memory_nodes;
+	unsigned long long	 memory_bsize;
 };
 
 extern struct perf_env perf_env;
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index e0c3cad0fd8d..e14b3f7c7212 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -17,6 +17,7 @@
 #include <sys/stat.h>
 #include <sys/utsname.h>
 #include <linux/time64.h>
+#include <dirent.h>
 
 #include "evlist.h"
 #include "evsel.h"
@@ -37,6 +38,7 @@
 #include "asm/bug.h"
 #include "tool.h"
 #include "time-utils.h"
+#include "units.h"
 
 #include "sane_ctype.h"
 
@@ -131,6 +133,25 @@ int do_write(struct feat_fd *ff, const void *buf, size_t size)
 	return __do_write_buf(ff, buf, size);
 }
 
+/* Return: 0 if succeded, -ERR if failed. */
+static int do_write_bitmap(struct feat_fd *ff, unsigned long *set, u64 size)
+{
+	u64 *p = (u64 *) set;
+	int i, ret;
+
+	ret = do_write(ff, &size, sizeof(size));
+	if (ret < 0)
+		return ret;
+
+	for (i = 0; (u64) i < BITS_TO_U64(size); i++) {
+		ret = do_write(ff, p + i, sizeof(*p));
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
 /* Return: 0 if succeded, -ERR if failed. */
 int write_padded(struct feat_fd *ff, const void *bf,
 		 size_t count, size_t count_aligned)
@@ -243,6 +264,38 @@ static char *do_read_string(struct feat_fd *ff)
 	return NULL;
 }
 
+/* Return: 0 if succeded, -ERR if failed. */
+static int do_read_bitmap(struct feat_fd *ff, unsigned long **pset, u64 *psize)
+{
+	unsigned long *set;
+	u64 size, *p;
+	int i, ret;
+
+	ret = do_read_u64(ff, &size);
+	if (ret)
+		return ret;
+
+	set = bitmap_alloc(size);
+	if (!set)
+		return -ENOMEM;
+
+	bitmap_zero(set, size);
+
+	p = (u64 *) set;
+
+	for (i = 0; (u64) i < BITS_TO_U64(size); i++) {
+		ret = do_read_u64(ff, p + i);
+		if (ret < 0) {
+			free(set);
+			return ret;
+		}
+	}
+
+	*pset  = set;
+	*psize = size;
+	return 0;
+}
+
 static int write_tracing_data(struct feat_fd *ff,
 			      struct perf_evlist *evlist)
 {
@@ -1196,6 +1249,176 @@ static int write_sample_time(struct feat_fd *ff,
 			sizeof(evlist->last_sample_time));
 }
 
+
+static int memory_node__read(struct memory_node *n, unsigned long idx)
+{
+	unsigned int phys, size = 0;
+	char path[PATH_MAX];
+	struct dirent *ent;
+	DIR *dir;
+
+#define for_each_memory(mem, dir)					\
+	while ((ent = readdir(dir)))					\
+		if (strcmp(ent->d_name, ".") &&				\
+		    strcmp(ent->d_name, "..") &&			\
+		    sscanf(ent->d_name, "memory%u", &mem) == 1)
+
+	scnprintf(path, PATH_MAX,
+		  "%s/devices/system/node/node%lu",
+		  sysfs__mountpoint(), idx);
+
+	dir = opendir(path);
+	if (!dir) {
+		pr_warning("failed: cant' open memory sysfs data\n");
+		return -1;
+	}
+
+	for_each_memory(phys, dir) {
+		size = max(phys, size);
+	}
+
+	size++;
+
+	n->set = bitmap_alloc(size);
+	if (!n->set) {
+		closedir(dir);
+		return -ENOMEM;
+	}
+
+	bitmap_zero(n->set, size);
+	n->node = idx;
+	n->size = size;
+
+	rewinddir(dir);
+
+	for_each_memory(phys, dir) {
+		set_bit(phys, n->set);
+	}
+
+	closedir(dir);
+	return 0;
+}
+
+static int memory_node__sort(const void *a, const void *b)
+{
+	const struct memory_node *na = a;
+	const struct memory_node *nb = b;
+
+	return na->node - nb->node;
+}
+
+static int build_mem_topology(struct memory_node *nodes, u64 size, u64 *cntp)
+{
+	char path[PATH_MAX];
+	struct dirent *ent;
+	DIR *dir;
+	u64 cnt = 0;
+	int ret = 0;
+
+	scnprintf(path, PATH_MAX, "%s/devices/system/node/",
+		  sysfs__mountpoint());
+
+	dir = opendir(path);
+	if (!dir) {
+		pr_warning("failed: can't open node sysfs data\n");
+		return -1;
+	}
+
+	while (!ret && (ent = readdir(dir))) {
+		unsigned int idx;
+		int r;
+
+		if (!strcmp(ent->d_name, ".") ||
+		    !strcmp(ent->d_name, ".."))
+			continue;
+
+		r = sscanf(ent->d_name, "node%u", &idx);
+		if (r != 1)
+			continue;
+
+		if (WARN_ONCE(cnt >= size,
+			      "failed to write MEM_TOPOLOGY, way too many nodes\n"))
+			return -1;
+
+		ret = memory_node__read(&nodes[cnt++], idx);
+	}
+
+	*cntp = cnt;
+	closedir(dir);
+
+	if (!ret)
+		qsort(nodes, cnt, sizeof(nodes[0]), memory_node__sort);
+
+	return ret;
+}
+
+#define MAX_MEMORY_NODES 2000
+
+/*
+ * The MEM_TOPOLOGY holds physical memory map for every
+ * node in system. The format of data is as follows:
+ *
+ *  0 - version          | for future changes
+ *  8 - block_size_bytes | /sys/devices/system/memory/block_size_bytes
+ * 16 - count            | number of nodes
+ *
+ * For each node we store map of physical indexes for
+ * each node:
+ *
+ * 32 - node id          | node index
+ * 40 - size             | size of bitmap
+ * 48 - bitmap           | bitmap of memory indexes that belongs to node
+ */
+static int write_mem_topology(struct feat_fd *ff __maybe_unused,
+			      struct perf_evlist *evlist __maybe_unused)
+{
+	static struct memory_node nodes[MAX_MEMORY_NODES];
+	u64 bsize, version = 1, i, nr;
+	int ret;
+
+	ret = sysfs__read_xll("devices/system/memory/block_size_bytes",
+			      (unsigned long long *) &bsize);
+	if (ret)
+		return ret;
+
+	ret = build_mem_topology(&nodes[0], MAX_MEMORY_NODES, &nr);
+	if (ret)
+		return ret;
+
+	ret = do_write(ff, &version, sizeof(version));
+	if (ret < 0)
+		goto out;
+
+	ret = do_write(ff, &bsize, sizeof(bsize));
+	if (ret < 0)
+		goto out;
+
+	ret = do_write(ff, &nr, sizeof(nr));
+	if (ret < 0)
+		goto out;
+
+	for (i = 0; i < nr; i++) {
+		struct memory_node *n = &nodes[i];
+
+		#define _W(v)						\
+			ret = do_write(ff, &n->v, sizeof(n->v));	\
+			if (ret < 0)					\
+				goto out;
+
+		_W(node)
+		_W(size)
+
+		#undef _W
+
+		ret = do_write_bitmap(ff, n->set, n->size);
+		if (ret < 0)
+			goto out;
+	}
+
+out:
+	return ret;
+}
+
 static void print_hostname(struct feat_fd *ff, FILE *fp)
 {
 	fprintf(fp, "# hostname : %s\n", ff->ph->env.hostname);
@@ -1543,6 +1766,35 @@ static void print_sample_time(struct feat_fd *ff, FILE *fp)
 	fprintf(fp, "# sample duration : %10.3f ms\n", d);
 }
 
+static void memory_node__fprintf(struct memory_node *n,
+				 unsigned long long bsize, FILE *fp)
+{
+	char buf_map[100], buf_size[50];
+	unsigned long long size;
+
+	size = bsize * bitmap_weight(n->set, n->size);
+	unit_number__scnprintf(buf_size, 50, size);
+
+	bitmap_scnprintf(n->set, n->size, buf_map, 100);
+	fprintf(fp, "#  %3" PRIu64 " [%s]: %s\n", n->node, buf_size, buf_map);
+}
+
+static void print_mem_topology(struct feat_fd *ff, FILE *fp)
+{
+	struct memory_node *nodes;
+	int i, nr;
+
+	nodes = ff->ph->env.memory_nodes;
+	nr    = ff->ph->env.nr_memory_nodes;
+
+	fprintf(fp, "# memory nodes (nr %d, block size 0x%llx):\n",
+		nr, ff->ph->env.memory_bsize);
+
+	for (i = 0; i < nr; i++) {
+		memory_node__fprintf(&nodes[i], ff->ph->env.memory_bsize, fp);
+	}
+}
+
 static int __event_process_build_id(struct build_id_event *bev,
 				    char *filename,
 				    struct perf_session *session)
@@ -2205,6 +2457,58 @@ static int process_sample_time(struct feat_fd *ff, void *data __maybe_unused)
 	return 0;
 }
 
+static int process_mem_topology(struct feat_fd *ff,
+				void *data __maybe_unused)
+{
+	struct memory_node *nodes;
+	u64 version, i, nr, bsize;
+	int ret = -1;
+
+	if (do_read_u64(ff, &version))
+		return -1;
+
+	if (version != 1)
+		return -1;
+
+	if (do_read_u64(ff, &bsize))
+		return -1;
+
+	if (do_read_u64(ff, &nr))
+		return -1;
+
+	nodes = zalloc(sizeof(*nodes) * nr);
+	if (!nodes)
+		return -1;
+
+	for (i = 0; i < nr; i++) {
+		struct memory_node n;
+
+		#define _R(v)				\
+			if (do_read_u64(ff, &n.v))	\
+				goto out;		\
+
+		_R(node)
+		_R(size)
+
+		#undef _R
+
+		if (do_read_bitmap(ff, &n.set, &n.size))
+			goto out;
+
+		nodes[i] = n;
+	}
+
+	ff->ph->env.memory_bsize    = bsize;
+	ff->ph->env.memory_nodes    = nodes;
+	ff->ph->env.nr_memory_nodes = nr;
+	ret = 0;
+
+out:
+	if (ret)
+		free(nodes);
+	return ret;
+}
+
 struct feature_ops {
 	int (*write)(struct feat_fd *ff, struct perf_evlist *evlist);
 	void (*print)(struct feat_fd *ff, FILE *fp);
@@ -2263,6 +2567,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
 	FEAT_OPN(STAT,		stat,		false),
 	FEAT_OPN(CACHE,		cache,		true),
 	FEAT_OPR(SAMPLE_TIME,	sample_time,	false),
+	FEAT_OPR(MEM_TOPOLOGY,	mem_topology,	true),
 };
 
 struct header_print_data {
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 942bdec6d70d..90d4577a92dc 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -36,6 +36,7 @@ enum {
 	HEADER_STAT,
 	HEADER_CACHE,
 	HEADER_SAMPLE_TIME,
+	HEADER_MEM_TOPOLOGY,
 	HEADER_LAST_FEATURE,
 	HEADER_FEAT_BITS	= 256,
 };
-- 
cgit v1.2.3


From ed3956293f2913047c622c238b5bf7ff4fe250bf Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 7 Mar 2018 16:50:17 +0100
Subject: perf tools: Update tags with .cpp files

We have some .cpp files, make ctags/cscope aware of them.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180307155020.32613-17-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Makefile.perf | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 4679e237a7f5..f7517e1b73f8 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -708,15 +708,15 @@ TAG_FILES= ../../include/uapi/linux/perf_event.h
 
 TAGS:
 	$(QUIET_GEN)$(RM) TAGS; \
-	$(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs etags -a $(TAG_FILES)
+	$(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print -o -name '*.cpp' -print | xargs etags -a $(TAG_FILES)
 
 tags:
 	$(QUIET_GEN)$(RM) tags; \
-	$(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs ctags -a $(TAG_FILES)
+	$(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print -o -name '*.cpp' -print | xargs ctags -a $(TAG_FILES)
 
 cscope:
 	$(QUIET_GEN)$(RM) cscope*; \
-	$(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs cscope -b $(TAG_FILES)
+	$(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print -o -name '*.cpp' -print | xargs cscope -b $(TAG_FILES)
 
 ### Testing rules
 
-- 
cgit v1.2.3


From bd476684584ecc3e269167d595c6dd5375011ca0 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 7 Mar 2018 16:50:18 +0100
Subject: perf build: Add llvm/clang/cxx make tests into FEATURE_TESTS_EXTRA

So we can see the status when we build perf, like:

  $ make LIBCLANGLLVM=1 VF=1
  ...                           cxx: [ on  ]
  ...                          llvm: [ on  ]
  ...                  llvm-version: [ on  ]
  ...                         clang: [ on  ]

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180307155020.32613-18-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/build/Makefile.feature | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index c378f003b007..5b6dda3b1ca8 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -82,7 +82,11 @@ FEATURE_TESTS_EXTRA :=                  \
          liberty-z                      \
          libunwind-debug-frame          \
          libunwind-debug-frame-arm      \
-         libunwind-debug-frame-aarch64
+         libunwind-debug-frame-aarch64  \
+         cxx                            \
+         llvm                           \
+         llvm-version                   \
+         clang
 
 FEATURE_TESTS ?= $(FEATURE_TESTS_BASIC)
 
-- 
cgit v1.2.3


From 36f9dc33b9128b85e096f6fff83e05d49448b28e Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 7 Mar 2018 16:50:19 +0100
Subject: perf build: Add llvm/clang make targets to FILES

So they can follow the OUTPUT variable setup as the rest of the
features.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180307155020.32613-19-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/build/feature/Makefile | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index 0a490cb15149..f8ad640ffe5d 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -54,7 +54,10 @@ FILES=                                          \
          test-jvmti.bin				\
          test-sched_getcpu.bin			\
          test-setns.bin				\
-         test-libopencsd.bin
+         test-libopencsd.bin			\
+         test-clang.bin				\
+         test-llvm.bin				\
+         test-llvm-version.bin
 
 FILES := $(addprefix $(OUTPUT),$(FILES))
 
-- 
cgit v1.2.3


From 5fb3d8b7b50d2101b0c15d2e90264deb7a01c2d8 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 7 Mar 2018 16:50:20 +0100
Subject: perf build: Force llvm/clang test compile output to .make.output

So we can see the output of feature compile in following files:

  tools/build/feature/test-llvm.make.output
  tools/build/feature/test-llvm-version.make.output
  tools/build/feature/test-clang.make.output

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180307155020.32613-20-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/build/feature/Makefile | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index f8ad640ffe5d..dac9563b5470 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -260,11 +260,13 @@ $(OUTPUT)test-llvm.bin:
 		-I$(shell $(LLVM_CONFIG) --includedir) 		\
 		-L$(shell $(LLVM_CONFIG) --libdir)		\
 		$(shell $(LLVM_CONFIG) --libs Core BPF)		\
-		$(shell $(LLVM_CONFIG) --system-libs)
+		$(shell $(LLVM_CONFIG) --system-libs)		\
+		> $(@:.bin=.make.output) 2>&1
 
 $(OUTPUT)test-llvm-version.bin:
 	$(BUILDXX) -std=gnu++11 				\
-		-I$(shell $(LLVM_CONFIG) --includedir)
+		-I$(shell $(LLVM_CONFIG) --includedir)		\
+		> $(@:.bin=.make.output) 2>&1
 
 $(OUTPUT)test-clang.bin:
 	$(BUILDXX) -std=gnu++11 				\
@@ -274,7 +276,8 @@ $(OUTPUT)test-clang.bin:
 		  -lclangFrontend -lclangEdit -lclangLex	\
 		  -lclangAST -Wl,--end-group 			\
 		$(shell $(LLVM_CONFIG) --libs Core option)	\
-		$(shell $(LLVM_CONFIG) --system-libs)
+		$(shell $(LLVM_CONFIG) --system-libs)		\
+		> $(@:.bin=.make.output) 2>&1
 
 -include $(OUTPUT)*.d
 
-- 
cgit v1.2.3


From ea85ab24c502720d2eb3dec30bedb6df06d4900b Mon Sep 17 00:00:00 2001
From: Wang YanQing <udknight@gmail.com>
Date: Thu, 8 Mar 2018 11:28:50 +0800
Subject: perf report: Provide libtraceevent with a kernel symbol resolver

So that beautifiers wanting to resolve kernel function addresses to
names can do its work, and when we use "perf report" for output of "perf
kmem record", we will get kernel symbol output.

This patch affect the output of "perf report" for the record data
generated by "perf kmem record" looks like below:

Before patch:
0.01%  call_site=ffffffff814e5828 ptr=0x99bb000 bytes_req=3616 bytes_alloc=4096 gfp_flags=GFP_ATOMIC
0.01%  call_site=ffffffff81370b87 ptr=0x428a3060 bytes_req=32 bytes_alloc=32 gfp_flags=GFP_KERNEL|GFP_ZERO

After patch:
0.01%  (aa_alloc_task_context+0x27) call_site=ffffffff81370b87 ptr=0x428a3060 bytes_req=32 bytes_alloc=32 gfp_flags=GFP_KERNEL|GFP_ZERO
0.01%  (__tty_buffer_request_room+0x88) call_site=ffffffff814e5828 ptr=0x99bb000 bytes_req=3616 bytes_alloc=4096 gfp_flags=GFP_ATOMIC

Signed-off-by: Wang YanQing <udknight@gmail.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180308032850.GA12383@udknight-ThinkPad-E550
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-report.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index c3603d4c0c57..971ccba85464 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -1348,6 +1348,15 @@ repeat:
 		report.range_num = 1;
 	}
 
+	if (session->tevent.pevent &&
+	    pevent_set_function_resolver(session->tevent.pevent,
+					 machine__resolve_kernel_addr,
+					 &session->machines.host) < 0) {
+		pr_err("%s: failed to set libtraceevent function resolver\n",
+		       __func__);
+		return -1;
+	}
+
 	sort__setup_elide(stdout);
 
 	ret = __cmd_report(&report);
-- 
cgit v1.2.3


From bb848c14f80d93059cb10b1e1446cc6823d77142 Mon Sep 17 00:00:00 2001
From: Jin Yao <yao.jin@linux.intel.com>
Date: Tue, 27 Feb 2018 17:38:47 +0800
Subject: perf annotate: Support to display the IPC/Cycle in TUI mode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Unlike the perf report interactive annotate mode, the perf annotate
doesn't display the IPC/Cycle even if branch info is recorded in perf
data file.

perf record -b ...
perf annotate function

It should show IPC/cycle, but it doesn't.

This patch lets perf annotate support the displaying of IPC/Cycle if
branch info is in perf data.

For example,

  perf annotate compute_flag

  Percent│ IPC Cycle
         │
         │
         │                Disassembly of section .text:
         │
         │                0000000000400640 <compute_flag>:
         │                compute_flag():
         │                volatile int count;
         │                static unsigned int s_randseed;
         │
         │                __attribute__((noinline))
         │                int compute_flag()
         │                {
   22.96 │1.18   584        sub    $0x8,%rsp
         │                        int i;
         │
         │                        i = rand() % 2;
   23.02 │1.18     1      → callq  rand@plt
         │
         │                        return i;
   27.05 │3.37              mov    %eax,%edx
         │                }
         │3.37              add    $0x8,%rsp
         │                {
         │                        int i;
         │
         │                        i = rand() % 2;
         │
         │                        return i;
         │3.37              shr    $0x1f,%edx
         │3.37              add    %edx,%eax
         │3.37              and    $0x1,%eax
         │3.37              sub    %edx,%eax
         │                }
   26.97 │3.37     2      ← retq

Note that, this patch only supports TUI mode. For stdio, now it just keeps
original behavior. Will support it in a follow-up patch.

  $ perf annotate compute_flag --stdio

   Percent |      Source code & Disassembly of div for cycles:ppp (7993 samples)
  ------------------------------------------------------------------------------
           :
           :
           :
           :            Disassembly of section .text:
           :
           :            0000000000400640 <compute_flag>:
           :            compute_flag():
           :            volatile int count;
           :            static unsigned int s_randseed;
           :
           :            __attribute__((noinline))
           :            int compute_flag()
           :            {
      0.29 :   400640:       sub    $0x8,%rsp     # +100.00%
           :                    int i;
           :
           :                    i = rand() % 2;
     42.93 :   400644:       callq  400490 <rand@plt>     # -100.00% (p:100.00%)
           :
           :                    return i;
      0.10 :   400649:       mov    %eax,%edx     # +100.00%
           :            }
      0.94 :   40064b:       add    $0x8,%rsp
           :            {
           :                    int i;
           :
           :                    i = rand() % 2;
           :
           :                    return i;
     27.02 :   40064f:       shr    $0x1f,%edx
      0.15 :   400652:       add    %edx,%eax
      1.24 :   400654:       and    $0x1,%eax
      2.08 :   400657:       sub    %edx,%eax
           :            }
     25.26 :   400659:       retq # -100.00% (p:100.00%)

Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Link: http://lkml.kernel.org/r/20180223170210.GC7045@tassilo.jf.intel.com
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1519724327-7773-1-git-send-email-yao.jin@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-annotate.c | 88 ++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 82 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index f15731a3d438..ead6ae4549e5 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -44,6 +44,7 @@ struct perf_annotate {
 	bool	   full_paths;
 	bool	   print_line;
 	bool	   skip_missing;
+	bool	   has_br_stack;
 	const char *sym_hist_filter;
 	const char *cpu_list;
 	DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
@@ -146,16 +147,73 @@ static void process_branch_stack(struct branch_stack *bs, struct addr_location *
 	free(bi);
 }
 
+static int hist_iter__branch_callback(struct hist_entry_iter *iter,
+				      struct addr_location *al __maybe_unused,
+				      bool single __maybe_unused,
+				      void *arg __maybe_unused)
+{
+	struct hist_entry *he = iter->he;
+	struct branch_info *bi;
+	struct perf_sample *sample = iter->sample;
+	struct perf_evsel *evsel = iter->evsel;
+	int err;
+
+	hist__account_cycles(sample->branch_stack, al, sample, false);
+
+	bi = he->branch_info;
+	err = addr_map_symbol__inc_samples(&bi->from, sample, evsel->idx);
+
+	if (err)
+		goto out;
+
+	err = addr_map_symbol__inc_samples(&bi->to, sample, evsel->idx);
+
+out:
+	return err;
+}
+
+static int process_branch_callback(struct perf_evsel *evsel,
+				   struct perf_sample *sample,
+				   struct addr_location *al __maybe_unused,
+				   struct perf_annotate *ann,
+				   struct machine *machine)
+{
+	struct hist_entry_iter iter = {
+		.evsel		= evsel,
+		.sample		= sample,
+		.add_entry_cb	= hist_iter__branch_callback,
+		.hide_unresolved	= symbol_conf.hide_unresolved,
+		.ops		= &hist_iter_branch,
+	};
+
+	struct addr_location a;
+	int ret;
+
+	if (machine__resolve(machine, &a, sample) < 0)
+		return -1;
+
+	if (a.sym == NULL)
+		return 0;
+
+	if (a.map != NULL)
+		a.map->dso->hit = 1;
+
+	ret = hist_entry_iter__add(&iter, &a, PERF_MAX_STACK_DEPTH, ann);
+	return ret;
+}
+
 static int perf_evsel__add_sample(struct perf_evsel *evsel,
 				  struct perf_sample *sample,
 				  struct addr_location *al,
-				  struct perf_annotate *ann)
+				  struct perf_annotate *ann,
+				  struct machine *machine)
 {
 	struct hists *hists = evsel__hists(evsel);
 	struct hist_entry *he;
 	int ret;
 
-	if (ann->sym_hist_filter != NULL &&
+	if ((!ann->has_br_stack || !ui__has_annotation()) &&
+	    ann->sym_hist_filter != NULL &&
 	    (al->sym == NULL ||
 	     strcmp(ann->sym_hist_filter, al->sym->name) != 0)) {
 		/* We're only interested in a symbol named sym_hist_filter */
@@ -178,6 +236,9 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel,
 	 */
 	process_branch_stack(sample->branch_stack, al, sample);
 
+	if (ann->has_br_stack && ui__has_annotation())
+		return process_branch_callback(evsel, sample, al, ann, machine);
+
 	he = hists__add_entry(hists, al, NULL, NULL, NULL, sample, true);
 	if (he == NULL)
 		return -ENOMEM;
@@ -206,7 +267,8 @@ static int process_sample_event(struct perf_tool *tool,
 	if (ann->cpu_list && !test_bit(sample->cpu, ann->cpu_bitmap))
 		goto out_put;
 
-	if (!al.filtered && perf_evsel__add_sample(evsel, sample, &al, ann)) {
+	if (!al.filtered &&
+	    perf_evsel__add_sample(evsel, sample, &al, ann, machine)) {
 		pr_warning("problem incrementing symbol count, "
 			   "skipping event\n");
 		ret = -1;
@@ -238,6 +300,10 @@ static void hists__find_annotations(struct hists *hists,
 		if (he->ms.sym == NULL || he->ms.map->dso->annotate_warned)
 			goto find_next;
 
+		if (ann->sym_hist_filter &&
+		    (strcmp(he->ms.sym->name, ann->sym_hist_filter) != 0))
+			goto find_next;
+
 		notes = symbol__annotation(he->ms.sym);
 		if (notes->src == NULL) {
 find_next:
@@ -269,6 +335,7 @@ find_next:
 			nd = rb_next(nd);
 		} else if (use_browser == 1) {
 			key = hist_entry__tui_annotate(he, evsel, NULL);
+
 			switch (key) {
 			case -1:
 				if (!ann->skip_missing)
@@ -489,6 +556,9 @@ int cmd_annotate(int argc, const char **argv)
 	if (annotate.session == NULL)
 		return -1;
 
+	annotate.has_br_stack = perf_header__has_feat(&annotate.session->header,
+						      HEADER_BRANCH_STACK);
+
 	ret = symbol__annotation_init();
 	if (ret < 0)
 		goto out_delete;
@@ -499,9 +569,6 @@ int cmd_annotate(int argc, const char **argv)
 	if (ret < 0)
 		goto out_delete;
 
-	if (setup_sorting(NULL) < 0)
-		usage_with_options(annotate_usage, options);
-
 	if (annotate.use_stdio)
 		use_browser = 0;
 	else if (annotate.use_tui)
@@ -511,6 +578,15 @@ int cmd_annotate(int argc, const char **argv)
 
 	setup_browser(true);
 
+	if (use_browser == 1 && annotate.has_br_stack) {
+		sort__mode = SORT_MODE__BRANCH;
+		if (setup_sorting(annotate.session->evlist) < 0)
+			usage_with_options(annotate_usage, options);
+	} else {
+		if (setup_sorting(NULL) < 0)
+			usage_with_options(annotate_usage, options);
+	}
+
 	ret = __cmd_annotate(&annotate);
 
 out_delete:
-- 
cgit v1.2.3


From 0b4b6b78a3ce07023052e44b967f5d42fa3d802c Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.vnet.ibm.com>
Date: Thu, 8 Mar 2018 13:09:13 +0100
Subject: perf annotate: Handle s390 PC relative load and store instruction.

S390 has several load and store instructions with target operand
addressing relative to the program counter, for example lrl, lgrl, strl,
stgrl.

These instructions are handled similar to x86. Objdump output displays
those instructions as:

   9595c: c4 2d 00 09 9c 54   lgrl   %r7,1c8540 <mp_+0x60>

This output is parsed (like on x86) and perf annotate shows those lines
as:

   lgrl   %r7,mp_+0x60

This patch handles the s390 specific instruction parsing for PC relative
load and store instructions.

Signed-off-by: Thomas Richter <tmricht@linux.vnet.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Link: http://lkml.kernel.org/r/20180308120913.14802-1-tmricht@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/s390/annotate/instructions.c | 63 ++++++++++++++++++++++++++++
 1 file changed, 63 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/arch/s390/annotate/instructions.c b/tools/perf/arch/s390/annotate/instructions.c
index e80589fc5b58..46c21831f2ac 100644
--- a/tools/perf/arch/s390/annotate/instructions.c
+++ b/tools/perf/arch/s390/annotate/instructions.c
@@ -52,6 +52,61 @@ static struct ins_ops s390_call_ops = {
 	.scnprintf = call__scnprintf,
 };
 
+static int s390_mov__parse(struct arch *arch __maybe_unused,
+			   struct ins_operands *ops,
+			   struct map *map __maybe_unused)
+{
+	char *s = strchr(ops->raw, ','), *target, *endptr;
+
+	if (s == NULL)
+		return -1;
+
+	*s = '\0';
+	ops->source.raw = strdup(ops->raw);
+	*s = ',';
+
+	if (ops->source.raw == NULL)
+		return -1;
+
+	target = ++s;
+	ops->target.raw = strdup(target);
+	if (ops->target.raw == NULL)
+		goto out_free_source;
+
+	ops->target.addr = strtoull(target, &endptr, 16);
+	if (endptr == target)
+		goto out_free_target;
+
+	s = strchr(endptr, '<');
+	if (s == NULL)
+		goto out_free_target;
+	endptr = strchr(s + 1, '>');
+	if (endptr == NULL)
+		goto out_free_target;
+
+	*endptr = '\0';
+	ops->target.name = strdup(s + 1);
+	*endptr = '>';
+	if (ops->target.name == NULL)
+		goto out_free_target;
+
+	return 0;
+
+out_free_target:
+	zfree(&ops->target.raw);
+out_free_source:
+	zfree(&ops->source.raw);
+	return -1;
+}
+
+static int mov__scnprintf(struct ins *ins, char *bf, size_t size,
+			  struct ins_operands *ops);
+
+static struct ins_ops s390_mov_ops = {
+	.parse	   = s390_mov__parse,
+	.scnprintf = mov__scnprintf,
+};
+
 static struct ins_ops *s390__associate_ins_ops(struct arch *arch, const char *name)
 {
 	struct ins_ops *ops = NULL;
@@ -68,6 +123,14 @@ static struct ins_ops *s390__associate_ins_ops(struct arch *arch, const char *na
 		ops = &s390_call_ops;
 	if (!strcmp(name, "br"))
 		ops = &ret_ops;
+	/* override load/store relative to PC */
+	if (!strcmp(name, "lrl") ||
+	    !strcmp(name, "lgrl") ||
+	    !strcmp(name, "lgfrl") ||
+	    !strcmp(name, "llgfrl") ||
+	    !strcmp(name, "strl") ||
+	    !strcmp(name, "stgrl"))
+		ops = &s390_mov_ops;
 
 	if (ops)
 		arch__associate_ins_ops(arch, name, ops);
-- 
cgit v1.2.3


From 2427b432e63b4b911100f717c48289195b7a7d62 Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Wed, 7 Mar 2018 23:59:45 -0800
Subject: perf tools: Update quipper information

This patch updates the links to the Quipper library.  It is now
available from GitHub and has been updated.

Reported-by: Lakshman Annadorai <lakshmana@google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1520495985-2147-1-git-send-email-eranian@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf.data-file-format.txt | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt
index f7d85e89a98a..d00f0d51cab8 100644
--- a/tools/perf/Documentation/perf.data-file-format.txt
+++ b/tools/perf/Documentation/perf.data-file-format.txt
@@ -485,10 +485,5 @@ in pmu-tools parser. This allows to read perf.data from python and dump it.
 quipper
 
 The quipper C++ parser is available at
-https://chromium.googlesource.com/chromiumos/platform2
+http://github.com/google/perf_data_converter/tree/master/src/quipper
 
-It is under the chromiumos-wide-profiling/ subdirectory. This library can
-convert a perf data file to a protobuf and vice versa.
-
-Unfortunately this parser tends to be many versions behind and may not be able
-to parse data files generated by recent perf.
-- 
cgit v1.2.3


From 032db28e5fa3594dfa3df585c54d8b612657f537 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Mon, 12 Mar 2018 14:45:48 +0100
Subject: perf tests: Add breakpoint accounting/modify test

Adding test that:

  - detects the number of watch/break-points,
    skip test if any is missing
  - detects PERF_EVENT_IOC_MODIFY_ATTRIBUTES ioctl,
    skip test if it's missing
  - detects if watchpoints and breakpoints share
    same slots
  - create all possible watchpoints on cpu 0
  - change one of it to breakpoint
  - in case wp and bp do not share slots,
    we create another watchpoint to ensure
    the slot accounting is correct

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Hari Bathini <hbathini@linux.vnet.ibm.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Milind Chabbi <chabbi.milind@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Oleg Nesterov <onestero@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Link: http://lkml.kernel.org/r/20180312134548.31532-9-jolsa@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/perf/tests/Build          |   1 +
 tools/perf/tests/bp_account.c   | 195 ++++++++++++++++++++++++++++++++++++++++
 tools/perf/tests/builtin-test.c |   4 +
 tools/perf/tests/tests.h        |   1 +
 4 files changed, 201 insertions(+)
 create mode 100644 tools/perf/tests/bp_account.c

(limited to 'tools')

diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index 87bf3edb037c..62ca0174d5e1 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -20,6 +20,7 @@ perf-y += hists_cumulate.o
 perf-y += python-use.o
 perf-y += bp_signal.o
 perf-y += bp_signal_overflow.o
+perf-y += bp_account.o
 perf-y += task-exit.o
 perf-y += sw-clock.o
 perf-y += mmap-thread-lookup.o
diff --git a/tools/perf/tests/bp_account.c b/tools/perf/tests/bp_account.c
new file mode 100644
index 000000000000..2f75fa0c4fef
--- /dev/null
+++ b/tools/perf/tests/bp_account.c
@@ -0,0 +1,195 @@
+/*
+ * Powerpc needs __SANE_USERSPACE_TYPES__ before <linux/types.h> to select
+ * 'int-ll64.h' and avoid compile warnings when printing __u64 with %llu.
+ */
+#define __SANE_USERSPACE_TYPES__
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <time.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/mman.h>
+#include <linux/compiler.h>
+#include <linux/hw_breakpoint.h>
+#include <sys/ioctl.h>
+
+#include "tests.h"
+#include "debug.h"
+#include "perf.h"
+#include "cloexec.h"
+
+volatile long the_var;
+
+static noinline int test_function(void)
+{
+	return 0;
+}
+
+static int __event(bool is_x, void *addr, struct perf_event_attr *attr)
+{
+	int fd;
+
+	memset(attr, 0, sizeof(struct perf_event_attr));
+	attr->type = PERF_TYPE_BREAKPOINT;
+	attr->size = sizeof(struct perf_event_attr);
+
+	attr->config = 0;
+	attr->bp_type = is_x ? HW_BREAKPOINT_X : HW_BREAKPOINT_W;
+	attr->bp_addr = (unsigned long) addr;
+	attr->bp_len = sizeof(long);
+
+	attr->sample_period = 1;
+	attr->sample_type = PERF_SAMPLE_IP;
+
+	attr->exclude_kernel = 1;
+	attr->exclude_hv = 1;
+
+	fd = sys_perf_event_open(attr, -1, 0, -1,
+				 perf_event_open_cloexec_flag());
+	if (fd < 0) {
+		pr_debug("failed opening event %llx\n", attr->config);
+		return TEST_FAIL;
+	}
+
+	return fd;
+}
+
+static int wp_event(void *addr, struct perf_event_attr *attr)
+{
+	return __event(false, addr, attr);
+}
+
+static int bp_event(void *addr, struct perf_event_attr *attr)
+{
+	return __event(true, addr, attr);
+}
+
+static int bp_accounting(int wp_cnt, int share)
+{
+	struct perf_event_attr attr, attr_mod, attr_new;
+	int i, fd[wp_cnt], fd_wp, ret;
+
+	for (i = 0; i < wp_cnt; i++) {
+		fd[i] = wp_event((void *)&the_var, &attr);
+		TEST_ASSERT_VAL("failed to create wp\n", fd[i] != -1);
+		pr_debug("wp %d created\n", i);
+	}
+
+	attr_mod = attr;
+	attr_mod.bp_type = HW_BREAKPOINT_X;
+	attr_mod.bp_addr = (unsigned long) test_function;
+
+	ret = ioctl(fd[0], PERF_EVENT_IOC_MODIFY_ATTRIBUTES, &attr_mod);
+	TEST_ASSERT_VAL("failed to modify wp\n", ret == 0);
+
+	pr_debug("wp 0 modified to bp\n");
+
+	if (!share) {
+		fd_wp = wp_event((void *)&the_var, &attr_new);
+		TEST_ASSERT_VAL("failed to create max wp\n", fd_wp != -1);
+		pr_debug("wp max created\n");
+	}
+
+	for (i = 0; i < wp_cnt; i++)
+		close(fd[i]);
+
+	return 0;
+}
+
+static int detect_cnt(bool is_x)
+{
+	struct perf_event_attr attr;
+	void *addr = is_x ? test_function : (void *) &the_var;
+	int fd[100], cnt = 0, i;
+
+	while (1) {
+		fd[cnt] = __event(is_x, addr, &attr);
+
+		if (fd[cnt] < 0)
+			break;
+
+		if (cnt == 100) {
+			pr_debug("way too many debug registers, fix the test\n");
+			return 0;
+		}
+
+		cnt++;
+	}
+
+	for (i = 0; i < cnt; i++)
+		close(fd[i]);
+
+	return cnt;
+}
+
+static int detect_ioctl(void)
+{
+	struct perf_event_attr attr;
+	int fd, ret = 1;
+
+	fd = wp_event((void *) &the_var, &attr);
+	if (fd > 0) {
+		ret = ioctl(fd, PERF_EVENT_IOC_MODIFY_ATTRIBUTES, &attr);
+		close(fd);
+	}
+
+	return ret ? 0 : 1;
+}
+
+static int detect_share(int wp_cnt, int bp_cnt)
+{
+	struct perf_event_attr attr;
+	int i, fd[wp_cnt + bp_cnt], ret;
+
+	for (i = 0; i < wp_cnt; i++) {
+		fd[i] = wp_event((void *)&the_var, &attr);
+		TEST_ASSERT_VAL("failed to create wp\n", fd[i] != -1);
+	}
+
+	for (; i < (bp_cnt + wp_cnt); i++) {
+		fd[i] = bp_event((void *)test_function, &attr);
+		if (fd[i] == -1)
+			break;
+	}
+
+	ret = i != (bp_cnt + wp_cnt);
+
+	while (i--)
+		close(fd[i]);
+
+	return ret;
+}
+
+/*
+ * This test does following:
+ *   - detects the number of watch/break-points,
+ *     skip test if any is missing
+ *   - detects PERF_EVENT_IOC_MODIFY_ATTRIBUTES ioctl,
+ *     skip test if it's missing
+ *   - detects if watchpoints and breakpoints share
+ *     same slots
+ *   - create all possible watchpoints on cpu 0
+ *   - change one of it to breakpoint
+ *   - in case wp and bp do not share slots,
+ *     we create another watchpoint to ensure
+ *     the slot accounting is correct
+ */
+int test__bp_accounting(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	int has_ioctl = detect_ioctl();
+	int wp_cnt = detect_cnt(false);
+	int bp_cnt = detect_cnt(true);
+	int share  = detect_share(wp_cnt, bp_cnt);
+
+	pr_debug("watchpoints count %d, breakpoints count %d, has_ioctl %d, share %d\n",
+		 wp_cnt, bp_cnt, has_ioctl, share);
+
+	if (!wp_cnt || !bp_cnt || !has_ioctl)
+		return TEST_SKIP;
+
+	return bp_accounting(wp_cnt, share);
+}
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index fafa014240cd..38bf109ce106 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -115,6 +115,10 @@ static struct test generic_tests[] = {
 		.func = test__bp_signal_overflow,
 		.is_supported = test__bp_signal_is_supported,
 	},
+	{
+		.desc = "Breakpoint accounting",
+		.func = test__bp_accounting,
+	},
 	{
 		.desc = "Number of exit events of a simple workload",
 		.func = test__task_exit,
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 2862b80bc288..9f51edac44ae 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -58,6 +58,7 @@ int test__hists_link(struct test *test, int subtest);
 int test__python_use(struct test *test, int subtest);
 int test__bp_signal(struct test *test, int subtest);
 int test__bp_signal_overflow(struct test *test, int subtest);
+int test__bp_accounting(struct test *test, int subtest);
 int test__task_exit(struct test *test, int subtest);
 int test__mem(struct test *test, int subtest);
 int test__sw_clock_freq(struct test *test, int subtest);
-- 
cgit v1.2.3


From 32ff77e8cc9e66cc4fb38098f64fd54cc8f54573 Mon Sep 17 00:00:00 2001
From: Milind Chabbi <chabbi.milind@gmail.com>
Date: Mon, 12 Mar 2018 14:45:47 +0100
Subject: perf/core: Implement fast breakpoint modification via
 _IOC_MODIFY_ATTRIBUTES

Problem and motivation: Once a breakpoint perf event (PERF_TYPE_BREAKPOINT)
is created, there is no flexibility to change the breakpoint type
(bp_type), breakpoint address (bp_addr), or breakpoint length (bp_len). The
only option is to close the perf event and configure a new breakpoint
event. This inflexibility has a significant performance overhead. For
example, sampling-based, lightweight performance profilers (and also
concurrency bug detection tools),  monitor different addresses for a short
duration using PERF_TYPE_BREAKPOINT and change the address (bp_addr) to
another address or change the kind of breakpoint (bp_type) from  "write" to
a "read" or vice-versa or change the length (bp_len) of the address being
monitored. The cost of these modifications is prohibitive since it involves
unmapping the circular buffer associated with the perf event, closing the
perf event, opening another perf event and mmaping another circular buffer.

Solution: The new ioctl flag for perf events,
PERF_EVENT_IOC_MODIFY_ATTRIBUTES, introduced in this patch takes a pointer
to a struct perf_event_attr as an argument to update an old breakpoint
event with new address, type, and size. This facility allows retaining a
previous mmaped perf events ring buffer and avoids having to close and
reopen another perf event.

This patch supports only changing PERF_TYPE_BREAKPOINT event type; future
implementations can extend this feature. The patch replicates some of its
functionality of modify_user_hw_breakpoint() in
kernel/events/hw_breakpoint.c. modify_user_hw_breakpoint cannot be called
directly since perf_event_ctx_lock() is already held in _perf_ioctl().

Evidence: Experiments show that the baseline (not able to modify an already
created breakpoint) costs an order of magnitude (~10x) more than the
suggested optimization (having the ability to dynamically modifying a
configured breakpoint via ioctl). When the breakpoints typically do not
trap, the speedup due to the suggested optimization is ~10x; even when the
breakpoints always trap, the speedup is ~4x due to the suggested
optimization.

Testing: tests posted at
https://github.com/linux-contrib/perf_event_modify_bp demonstrate the
performance significance of this patch. Tests also check the functional
correctness of the patch.

Signed-off-by: Milind Chabbi <chabbi.milind@gmail.com>
[ Using modify_user_hw_breakpoint_check function. ]
[ Reformated PERF_EVENT_IOC_*, so the values are all in one column. ]
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Hari Bathini <hbathini@linux.vnet.ibm.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Oleg Nesterov <onestero@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Link: http://lkml.kernel.org/r/20180312134548.31532-8-jolsa@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/hw_breakpoint.h         |  7 +++++
 include/uapi/linux/perf_event.h       | 23 +++++++++--------
 kernel/events/core.c                  | 48 +++++++++++++++++++++++++++++++++++
 kernel/events/hw_breakpoint.c         |  2 +-
 tools/include/uapi/linux/perf_event.h | 23 +++++++++--------
 5 files changed, 80 insertions(+), 23 deletions(-)

(limited to 'tools')

diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index cf045885a499..6058c3844a76 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -53,6 +53,9 @@ register_user_hw_breakpoint(struct perf_event_attr *attr,
 /* FIXME: only change from the attr, and don't unregister */
 extern int
 modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr);
+extern int
+modify_user_hw_breakpoint_check(struct perf_event *bp, struct perf_event_attr *attr,
+				bool check);
 
 /*
  * Kernel breakpoints are not associated with any particular thread.
@@ -97,6 +100,10 @@ register_user_hw_breakpoint(struct perf_event_attr *attr,
 static inline int
 modify_user_hw_breakpoint(struct perf_event *bp,
 			  struct perf_event_attr *attr)	{ return -ENOSYS; }
+static inline int
+modify_user_hw_breakpoint_check(struct perf_event *bp, struct perf_event_attr *attr,
+				bool check)	{ return -ENOSYS; }
+
 static inline struct perf_event *
 register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr,
 				perf_overflow_handler_t	 triggered,
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 6f873503552d..912b85b52344 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -448,17 +448,18 @@ struct perf_event_query_bpf {
 /*
  * Ioctls that can be done on a perf event fd:
  */
-#define PERF_EVENT_IOC_ENABLE		_IO ('$', 0)
-#define PERF_EVENT_IOC_DISABLE		_IO ('$', 1)
-#define PERF_EVENT_IOC_REFRESH		_IO ('$', 2)
-#define PERF_EVENT_IOC_RESET		_IO ('$', 3)
-#define PERF_EVENT_IOC_PERIOD		_IOW('$', 4, __u64)
-#define PERF_EVENT_IOC_SET_OUTPUT	_IO ('$', 5)
-#define PERF_EVENT_IOC_SET_FILTER	_IOW('$', 6, char *)
-#define PERF_EVENT_IOC_ID		_IOR('$', 7, __u64 *)
-#define PERF_EVENT_IOC_SET_BPF		_IOW('$', 8, __u32)
-#define PERF_EVENT_IOC_PAUSE_OUTPUT	_IOW('$', 9, __u32)
-#define PERF_EVENT_IOC_QUERY_BPF	_IOWR('$', 10, struct perf_event_query_bpf *)
+#define PERF_EVENT_IOC_ENABLE			_IO ('$', 0)
+#define PERF_EVENT_IOC_DISABLE			_IO ('$', 1)
+#define PERF_EVENT_IOC_REFRESH			_IO ('$', 2)
+#define PERF_EVENT_IOC_RESET			_IO ('$', 3)
+#define PERF_EVENT_IOC_PERIOD			_IOW('$', 4, __u64)
+#define PERF_EVENT_IOC_SET_OUTPUT		_IO ('$', 5)
+#define PERF_EVENT_IOC_SET_FILTER		_IOW('$', 6, char *)
+#define PERF_EVENT_IOC_ID			_IOR('$', 7, __u64 *)
+#define PERF_EVENT_IOC_SET_BPF			_IOW('$', 8, __u32)
+#define PERF_EVENT_IOC_PAUSE_OUTPUT		_IOW('$', 9, __u32)
+#define PERF_EVENT_IOC_QUERY_BPF		_IOWR('$', 10, struct perf_event_query_bpf *)
+#define PERF_EVENT_IOC_MODIFY_ATTRIBUTES	_IOW('$', 11, struct perf_event_attr *)
 
 enum perf_event_ioc_flags {
 	PERF_IOC_FLAG_GROUP		= 1U << 0,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index ee145bdee6ed..3b4c7792a6ac 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2846,6 +2846,41 @@ int perf_event_refresh(struct perf_event *event, int refresh)
 }
 EXPORT_SYMBOL_GPL(perf_event_refresh);
 
+static int perf_event_modify_breakpoint(struct perf_event *bp,
+					 struct perf_event_attr *attr)
+{
+	int err;
+
+	_perf_event_disable(bp);
+
+	err = modify_user_hw_breakpoint_check(bp, attr, true);
+	if (err) {
+		if (!bp->attr.disabled)
+			_perf_event_enable(bp);
+
+		return err;
+	}
+
+	if (!attr->disabled)
+		_perf_event_enable(bp);
+	return 0;
+}
+
+static int perf_event_modify_attr(struct perf_event *event,
+				  struct perf_event_attr *attr)
+{
+	if (event->attr.type != attr->type)
+		return -EINVAL;
+
+	switch (event->attr.type) {
+	case PERF_TYPE_BREAKPOINT:
+		return perf_event_modify_breakpoint(event, attr);
+	default:
+		/* Place holder for future additions. */
+		return -EOPNOTSUPP;
+	}
+}
+
 static void ctx_sched_out(struct perf_event_context *ctx,
 			  struct perf_cpu_context *cpuctx,
 			  enum event_type_t event_type)
@@ -4952,6 +4987,8 @@ static int perf_event_set_output(struct perf_event *event,
 				 struct perf_event *output_event);
 static int perf_event_set_filter(struct perf_event *event, void __user *arg);
 static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd);
+static int perf_copy_attr(struct perf_event_attr __user *uattr,
+			  struct perf_event_attr *attr);
 
 static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned long arg)
 {
@@ -5024,6 +5061,17 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon
 
 	case PERF_EVENT_IOC_QUERY_BPF:
 		return perf_event_query_prog_array(event, (void __user *)arg);
+
+	case PERF_EVENT_IOC_MODIFY_ATTRIBUTES: {
+		struct perf_event_attr new_attr;
+		int err = perf_copy_attr((struct perf_event_attr __user *)arg,
+					 &new_attr);
+
+		if (err)
+			return err;
+
+		return perf_event_modify_attr(event,  &new_attr);
+	}
 	default:
 		return -ENOTTY;
 	}
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index 0c82663395f7..6253d5519cd8 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -456,7 +456,7 @@ register_user_hw_breakpoint(struct perf_event_attr *attr,
 }
 EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
 
-static int
+int
 modify_user_hw_breakpoint_check(struct perf_event *bp, struct perf_event_attr *attr,
 			        bool check)
 {
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 6f873503552d..912b85b52344 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -448,17 +448,18 @@ struct perf_event_query_bpf {
 /*
  * Ioctls that can be done on a perf event fd:
  */
-#define PERF_EVENT_IOC_ENABLE		_IO ('$', 0)
-#define PERF_EVENT_IOC_DISABLE		_IO ('$', 1)
-#define PERF_EVENT_IOC_REFRESH		_IO ('$', 2)
-#define PERF_EVENT_IOC_RESET		_IO ('$', 3)
-#define PERF_EVENT_IOC_PERIOD		_IOW('$', 4, __u64)
-#define PERF_EVENT_IOC_SET_OUTPUT	_IO ('$', 5)
-#define PERF_EVENT_IOC_SET_FILTER	_IOW('$', 6, char *)
-#define PERF_EVENT_IOC_ID		_IOR('$', 7, __u64 *)
-#define PERF_EVENT_IOC_SET_BPF		_IOW('$', 8, __u32)
-#define PERF_EVENT_IOC_PAUSE_OUTPUT	_IOW('$', 9, __u32)
-#define PERF_EVENT_IOC_QUERY_BPF	_IOWR('$', 10, struct perf_event_query_bpf *)
+#define PERF_EVENT_IOC_ENABLE			_IO ('$', 0)
+#define PERF_EVENT_IOC_DISABLE			_IO ('$', 1)
+#define PERF_EVENT_IOC_REFRESH			_IO ('$', 2)
+#define PERF_EVENT_IOC_RESET			_IO ('$', 3)
+#define PERF_EVENT_IOC_PERIOD			_IOW('$', 4, __u64)
+#define PERF_EVENT_IOC_SET_OUTPUT		_IO ('$', 5)
+#define PERF_EVENT_IOC_SET_FILTER		_IOW('$', 6, char *)
+#define PERF_EVENT_IOC_ID			_IOR('$', 7, __u64 *)
+#define PERF_EVENT_IOC_SET_BPF			_IOW('$', 8, __u32)
+#define PERF_EVENT_IOC_PAUSE_OUTPUT		_IOW('$', 9, __u32)
+#define PERF_EVENT_IOC_QUERY_BPF		_IOWR('$', 10, struct perf_event_query_bpf *)
+#define PERF_EVENT_IOC_MODIFY_ATTRIBUTES	_IOW('$', 11, struct perf_event_attr *)
 
 enum perf_event_ioc_flags {
 	PERF_IOC_FLAG_GROUP		= 1U << 0,
-- 
cgit v1.2.3


From e725920cdb1c79fdc71f2f164f59be8c411cad68 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Fri, 9 Mar 2018 11:14:34 +0100
Subject: perf env: Free memory nodes data

Forgot to free env's memory nodes, adding needed code to perf_env__exit.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180309101442.9224-2-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/env.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index 6d311868d850..4c842762e3f2 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -32,6 +32,10 @@ void perf_env__exit(struct perf_env *env)
 	for (i = 0; i < env->caches_cnt; i++)
 		cpu_cache_level__free(&env->caches[i]);
 	zfree(&env->caches);
+
+	for (i = 0; i < env->nr_memory_nodes; i++)
+		free(env->memory_nodes[i].set);
+	zfree(&env->memory_nodes);
 }
 
 int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[])
-- 
cgit v1.2.3


From 4acf6142de3fbc4fc9cc8da0a1aec073f05b724f Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Fri, 9 Mar 2018 11:14:35 +0100
Subject: perf tools: Add mem2node object

Adding mem2node object to allow the easy lookup of the node for the
physical address.

It has following interface:

  int  mem2node__init(struct mem2node *map, struct perf_env *env);
  void mem2node__exit(struct mem2node *map);
  int  mem2node__node(struct mem2node *map, u64 addr);

The mem2node__toolsinit initialize object from the perf data file
MEM_TOPOLOGY feature data. Following calls to mem2node__node will return
node number for given physical address. The mem2node__exit function
frees the object.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180309101442.9224-3-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/Build      |   1 +
 tools/perf/util/mem2node.c | 134 +++++++++++++++++++++++++++++++++++++++++++++
 tools/perf/util/mem2node.h |  19 +++++++
 3 files changed, 154 insertions(+)
 create mode 100644 tools/perf/util/mem2node.c
 create mode 100644 tools/perf/util/mem2node.h

(limited to 'tools')

diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index ea0a452550b0..8052373bcd6a 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -106,6 +106,7 @@ libperf-y += units.o
 libperf-y += time-utils.o
 libperf-y += expr-bison.o
 libperf-y += branch.o
+libperf-y += mem2node.o
 
 libperf-$(CONFIG_LIBBPF) += bpf-loader.o
 libperf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
diff --git a/tools/perf/util/mem2node.c b/tools/perf/util/mem2node.c
new file mode 100644
index 000000000000..c6fd81c02586
--- /dev/null
+++ b/tools/perf/util/mem2node.c
@@ -0,0 +1,134 @@
+#include <errno.h>
+#include <inttypes.h>
+#include <linux/bitmap.h>
+#include "mem2node.h"
+#include "util.h"
+
+struct phys_entry {
+	struct rb_node	rb_node;
+	u64	start;
+	u64	end;
+	u64	node;
+};
+
+static void phys_entry__insert(struct phys_entry *entry, struct rb_root *root)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct phys_entry *e;
+
+	while (*p != NULL) {
+		parent = *p;
+		e = rb_entry(parent, struct phys_entry, rb_node);
+
+		if (entry->start < e->start)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	rb_link_node(&entry->rb_node, parent, p);
+	rb_insert_color(&entry->rb_node, root);
+}
+
+static void
+phys_entry__init(struct phys_entry *entry, u64 start, u64 bsize, u64 node)
+{
+	entry->start = start;
+	entry->end   = start + bsize;
+	entry->node  = node;
+	RB_CLEAR_NODE(&entry->rb_node);
+}
+
+int mem2node__init(struct mem2node *map, struct perf_env *env)
+{
+	struct memory_node *n, *nodes = &env->memory_nodes[0];
+	struct phys_entry *entries, *tmp_entries;
+	u64 bsize = env->memory_bsize;
+	int i, j = 0, max = 0;
+
+	memset(map, 0x0, sizeof(*map));
+	map->root = RB_ROOT;
+
+	for (i = 0; i < env->nr_memory_nodes; i++) {
+		n = &nodes[i];
+		max += bitmap_weight(n->set, n->size);
+	}
+
+	entries = zalloc(sizeof(*entries) * max);
+	if (!entries)
+		return -ENOMEM;
+
+	for (i = 0; i < env->nr_memory_nodes; i++) {
+		u64 bit;
+
+		n = &nodes[i];
+
+		for (bit = 0; bit < n->size; bit++) {
+			u64 start;
+
+			if (!test_bit(bit, n->set))
+				continue;
+
+			start = bit * bsize;
+
+			/*
+			 * Merge nearby areas, we walk in order
+			 * through the bitmap, so no need to sort.
+			 */
+			if (j > 0) {
+				struct phys_entry *prev = &entries[j - 1];
+
+				if ((prev->end == start) &&
+				    (prev->node == n->node)) {
+					prev->end += bsize;
+					continue;
+				}
+			}
+
+			phys_entry__init(&entries[j++], start, bsize, n->node);
+		}
+	}
+
+	/* Cut unused entries, due to merging. */
+	tmp_entries = realloc(entries, sizeof(*entries) * j);
+	if (tmp_entries)
+		entries = tmp_entries;
+
+	for (i = 0; i < j; i++) {
+		pr_debug("mem2node %03" PRIu64 " [0x%016" PRIx64 "-0x%016" PRIx64 "]\n",
+			 entries[i].node, entries[i].start, entries[i].end);
+
+		phys_entry__insert(&entries[i], &map->root);
+	}
+
+	map->entries = entries;
+	return 0;
+}
+
+void mem2node__exit(struct mem2node *map)
+{
+	zfree(&map->entries);
+}
+
+int mem2node__node(struct mem2node *map, u64 addr)
+{
+	struct rb_node **p, *parent = NULL;
+	struct phys_entry *entry;
+
+	p = &map->root.rb_node;
+	while (*p != NULL) {
+		parent = *p;
+		entry = rb_entry(parent, struct phys_entry, rb_node);
+		if (addr < entry->start)
+			p = &(*p)->rb_left;
+		else if (addr >= entry->end)
+			p = &(*p)->rb_right;
+		else
+			goto out;
+	}
+
+	entry = NULL;
+out:
+	return entry ? (int) entry->node : -1;
+}
diff --git a/tools/perf/util/mem2node.h b/tools/perf/util/mem2node.h
new file mode 100644
index 000000000000..59c4752a2181
--- /dev/null
+++ b/tools/perf/util/mem2node.h
@@ -0,0 +1,19 @@
+#ifndef __MEM2NODE_H
+#define __MEM2NODE_H
+
+#include <linux/rbtree.h>
+#include "env.h"
+
+struct phys_entry;
+
+struct mem2node {
+	struct rb_root		 root;
+	struct phys_entry	*entries;
+	int			 cnt;
+};
+
+int  mem2node__init(struct mem2node *map, struct perf_env *env);
+void mem2node__exit(struct mem2node *map);
+int  mem2node__node(struct mem2node *map, u64 addr);
+
+#endif /* __MEM2NODE_H */
-- 
cgit v1.2.3


From 8185850ad603acfc66f5b3d284955809dffa5d2c Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Fri, 9 Mar 2018 11:14:36 +0100
Subject: perf tests: Add mem2node object test

Adding mem2node object automated test.

The test prepares few artificial nodes - memory maps and verifies the
mem2node object returns proper node values to given addresses.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180309101442.9224-4-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/Build          |  1 +
 tools/perf/tests/builtin-test.c |  4 +++
 tools/perf/tests/mem2node.c     | 75 +++++++++++++++++++++++++++++++++++++++++
 tools/perf/tests/tests.h        |  1 +
 4 files changed, 81 insertions(+)
 create mode 100644 tools/perf/tests/mem2node.c

(limited to 'tools')

diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index 62ca0174d5e1..6c108fa79ae3 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -48,6 +48,7 @@ perf-y += bitmap.o
 perf-y += perf-hooks.o
 perf-y += clang.o
 perf-y += unit_number__scnprintf.o
+perf-y += mem2node.o
 
 $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build
 	$(call rule_mkdir)
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 38bf109ce106..625f5a6772af 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -274,6 +274,10 @@ static struct test generic_tests[] = {
 		.desc = "unit_number__scnprintf",
 		.func = test__unit_number__scnprint,
 	},
+	{
+		.desc = "mem2node",
+		.func = test__mem2node,
+	},
 	{
 		.func = NULL,
 	},
diff --git a/tools/perf/tests/mem2node.c b/tools/perf/tests/mem2node.c
new file mode 100644
index 000000000000..0c3c87f86e03
--- /dev/null
+++ b/tools/perf/tests/mem2node.c
@@ -0,0 +1,75 @@
+#include <linux/compiler.h>
+#include <linux/bitmap.h>
+#include "cpumap.h"
+#include "mem2node.h"
+#include "tests.h"
+
+static struct node {
+	int		 node;
+	const char 	*map;
+} test_nodes[] = {
+	{ .node = 0, .map = "0"     },
+	{ .node = 1, .map = "1-2"   },
+	{ .node = 3, .map = "5-7,9" },
+};
+
+#define T TEST_ASSERT_VAL
+
+static unsigned long *get_bitmap(const char *str, int nbits)
+{
+	struct cpu_map *map = cpu_map__new(str);
+	unsigned long *bm = NULL;
+	int i;
+
+	bm = bitmap_alloc(nbits);
+
+	if (map && bm) {
+		bitmap_zero(bm, nbits);
+
+		for (i = 0; i < map->nr; i++) {
+			set_bit(map->map[i], bm);
+		}
+	}
+
+	if (map)
+		cpu_map__put(map);
+	else
+		free(bm);
+
+	return bm && map ? bm : NULL;
+}
+
+int test__mem2node(struct test *t __maybe_unused, int subtest __maybe_unused)
+{
+	struct mem2node map;
+	struct memory_node nodes[3];
+	struct perf_env env = {
+		.memory_nodes    = (struct memory_node *) &nodes[0],
+		.nr_memory_nodes = ARRAY_SIZE(nodes),
+		.memory_bsize    = 0x100,
+	};
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(nodes); i++) {
+		nodes[i].node = test_nodes[i].node;
+		nodes[i].size = 10;
+
+		T("failed: alloc bitmap",
+		  (nodes[i].set = get_bitmap(test_nodes[i].map, 10)));
+	}
+
+	T("failed: mem2node__init", !mem2node__init(&map, &env));
+	T("failed: mem2node__node",  0 == mem2node__node(&map,   0x50));
+	T("failed: mem2node__node",  1 == mem2node__node(&map,  0x100));
+	T("failed: mem2node__node",  1 == mem2node__node(&map,  0x250));
+	T("failed: mem2node__node",  3 == mem2node__node(&map,  0x500));
+	T("failed: mem2node__node",  3 == mem2node__node(&map,  0x650));
+	T("failed: mem2node__node", -1 == mem2node__node(&map,  0x450));
+	T("failed: mem2node__node", -1 == mem2node__node(&map, 0x1050));
+
+	for (i = 0; i < ARRAY_SIZE(nodes); i++)
+		free(nodes[i].set);
+
+	mem2node__exit(&map);
+	return 0;
+}
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 9f51edac44ae..a9760e790563 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -103,6 +103,7 @@ int test__clang(struct test *test, int subtest);
 const char *test__clang_subtest_get_desc(int subtest);
 int test__clang_subtest_get_nr(void);
 int test__unit_number__scnprint(struct test *test, int subtest);
+int test__mem2node(struct test *t, int subtest);
 
 bool test__bp_signal_is_supported(void);
 
-- 
cgit v1.2.3


From 8fab7843a15078814764e01c303d175c92b500c1 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Fri, 9 Mar 2018 11:14:37 +0100
Subject: perf c2c record: Record physical addresses in samples

We are going to display NUMA node information in following patches. For
this we need to have physical address data in the sample.

Adding --phys-data as a default option for perf c2c record.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Joe Mario <jmario@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180309101442.9224-5-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-c2c.txt | 2 +-
 tools/perf/builtin-c2c.c              | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt
index 822414235170..095aebdc5bb7 100644
--- a/tools/perf/Documentation/perf-c2c.txt
+++ b/tools/perf/Documentation/perf-c2c.txt
@@ -116,7 +116,7 @@ and calls standard perf record command.
 Following perf record options are configured by default:
 (check perf record man page for details)
 
-  -W,-d,--sample-cpu
+  -W,-d,--phys-data,--sample-cpu
 
 Unless specified otherwise with '-e' option, following events are monitored by
 default:
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 98d243fa0c06..95765a1db903 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -2704,7 +2704,7 @@ static int perf_c2c__record(int argc, const char **argv)
 	argc = parse_options(argc, argv, options, record_mem_usage,
 			     PARSE_OPT_KEEP_UNKNOWN);
 
-	rec_argc = argc + 10; /* max number of arguments */
+	rec_argc = argc + 11; /* max number of arguments */
 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
 	if (!rec_argv)
 		return -1;
@@ -2720,6 +2720,7 @@ static int perf_c2c__record(int argc, const char **argv)
 		rec_argv[i++] = "-W";
 
 	rec_argv[i++] = "-d";
+	rec_argv[i++] = "--phys-data";
 	rec_argv[i++] = "--sample-cpu";
 
 	for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
-- 
cgit v1.2.3


From 3773138828b38f3f1364ef318cd876b16182388a Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Fri, 9 Mar 2018 11:14:38 +0100
Subject: perf c2c report: Make calc_width work with struct c2c_hist_entry

We are going to calculate tje column width based on the struct
c2c_hist_entry data, so making calc_width to work with struct
c2c_hist_entry.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Joe Mario <jmario@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180309101442.9224-6-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-c2c.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 95765a1db903..43ce55550c9d 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -1839,20 +1839,24 @@ static inline int valid_hitm_or_store(struct hist_entry *he)
 	return has_hitm || c2c_he->stats.store;
 }
 
-static void calc_width(struct hist_entry *he)
+static void calc_width(struct c2c_hist_entry *c2c_he)
 {
 	struct c2c_hists *c2c_hists;
 
-	c2c_hists = container_of(he->hists, struct c2c_hists, hists);
-	hists__calc_col_len(&c2c_hists->hists, he);
+	c2c_hists = container_of(c2c_he->he.hists, struct c2c_hists, hists);
+	hists__calc_col_len(&c2c_hists->hists, &c2c_he->he);
 }
 
 static int filter_cb(struct hist_entry *he)
 {
+	struct c2c_hist_entry *c2c_he;
+
+	c2c_he = container_of(he, struct c2c_hist_entry, he);
+
 	if (c2c.show_src && !he->srcline)
 		he->srcline = hist_entry__get_srcline(he);
 
-	calc_width(he);
+	calc_width(c2c_he);
 
 	if (!valid_hitm_or_store(he))
 		he->filtered = HIST_FILTER__C2C;
@@ -1869,7 +1873,7 @@ static int resort_cl_cb(struct hist_entry *he)
 	c2c_he = container_of(he, struct c2c_hist_entry, he);
 	c2c_hists = c2c_he->hists;
 
-	calc_width(he);
+	calc_width(c2c_he);
 
 	if (display && c2c_hists) {
 		static unsigned int idx;
-- 
cgit v1.2.3


From bc229c21f2c79ef0f7b30d3a2fce8c2886ffa6c7 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Fri, 9 Mar 2018 11:14:39 +0100
Subject: perf c2c report: Call calc_width() only for displayed entries

There's no need to calculate column widths for entries that are not
going to be displayed.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Joe Mario <jmario@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180309101442.9224-7-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-c2c.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 43ce55550c9d..821112e8ba97 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -1873,12 +1873,11 @@ static int resort_cl_cb(struct hist_entry *he)
 	c2c_he = container_of(he, struct c2c_hist_entry, he);
 	c2c_hists = c2c_he->hists;
 
-	calc_width(c2c_he);
-
 	if (display && c2c_hists) {
 		static unsigned int idx;
 
 		c2c_he->cacheline_idx = idx++;
+		calc_width(c2c_he);
 
 		c2c_hists__reinit(c2c_hists, c2c.cl_output, c2c.cl_resort);
 
-- 
cgit v1.2.3


From 7f834c2e84bbcf94a1ed65a2ae648129e1901370 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Fri, 9 Mar 2018 11:14:40 +0100
Subject: perf c2c report: Display node for cacheline address

Adding the NUMA node info for the data cacheline. Adding the new column
to both "Shared Data Cache Line Table" and "Shared Cache Line
Distribution Pareto".

Note the new 'Node' column next to the 'Cacheline'.

  $ perf c2c report --stdio
  =================================================
             Shared Data Cache Line Table
  =================================================
  #
  #                                    Total      Tot  ----- LLC Load Hitm -----
  # Index           Cacheline  Node  records     Hitm    Total      Lcl      Rmt
  # .....  ..................  ....  .......  .......  .......  .......  .......
  #
        0      0x7f0830100000     0       84   10.53%        8        8        0
        1  0xffff922a93154200     0        3    2.63%        2        2        0
        2  0xffff922a93154500     0        4    2.63%        2        2        0
  ...

Note the new 'Node' column next to the 'Offset'.

  =================================================
        Shared Cache Line Distribution Pareto
  =================================================
  #
  #        ----- HITM -----  -- Store Refs --        Data address
  #   Num      Rmt      Lcl   L1 Hit  L1 Miss              Offset  Node      Pid
  # .....  .......  .......  .......  .......  ..................  ....  .......
  #
    -------------------------------------------------------------
        0        0        8       32        2      0x7f0830100000
    -------------------------------------------------------------
             0.00%   75.00%   21.88%    0.00%                0x18     0     1791
             0.00%   12.50%   37.50%    0.00%                0x18     0     1791
             0.00%    0.00%   34.38%    0.00%                0x18     0     1791

Using the mem2node object to get the NUMA node data.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Joe Mario <jmario@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180309101442.9224-8-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-c2c.c | 119 +++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 114 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 821112e8ba97..45c047fdd7ac 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -32,6 +32,7 @@
 #include "evsel.h"
 #include "ui/browsers/hists.h"
 #include "thread.h"
+#include "mem2node.h"
 
 struct c2c_hists {
 	struct hists		hists;
@@ -49,6 +50,7 @@ struct c2c_hist_entry {
 	struct c2c_hists	*hists;
 	struct c2c_stats	 stats;
 	unsigned long		*cpuset;
+	unsigned long		*nodeset;
 	struct c2c_stats	*node_stats;
 	unsigned int		 cacheline_idx;
 
@@ -59,6 +61,11 @@ struct c2c_hist_entry {
 	 * because of its callchain dynamic entry
 	 */
 	struct hist_entry	he;
+
+	unsigned long		 paddr;
+	unsigned long		 paddr_cnt;
+	bool			 paddr_zero;
+	char			*nodestr;
 };
 
 static char const *coalesce_default = "pid,iaddr";
@@ -66,6 +73,7 @@ static char const *coalesce_default = "pid,iaddr";
 struct perf_c2c {
 	struct perf_tool	tool;
 	struct c2c_hists	hists;
+	struct mem2node		mem2node;
 
 	unsigned long		**nodes;
 	int			 nodes_cnt;
@@ -123,6 +131,10 @@ static void *c2c_he_zalloc(size_t size)
 	if (!c2c_he->cpuset)
 		return NULL;
 
+	c2c_he->nodeset = bitmap_alloc(c2c.nodes_cnt);
+	if (!c2c_he->nodeset)
+		return NULL;
+
 	c2c_he->node_stats = zalloc(c2c.nodes_cnt * sizeof(*c2c_he->node_stats));
 	if (!c2c_he->node_stats)
 		return NULL;
@@ -145,6 +157,8 @@ static void c2c_he_free(void *he)
 	}
 
 	free(c2c_he->cpuset);
+	free(c2c_he->nodeset);
+	free(c2c_he->nodestr);
 	free(c2c_he->node_stats);
 	free(c2c_he);
 }
@@ -194,6 +208,28 @@ static void c2c_he__set_cpu(struct c2c_hist_entry *c2c_he,
 	set_bit(sample->cpu, c2c_he->cpuset);
 }
 
+static void c2c_he__set_node(struct c2c_hist_entry *c2c_he,
+			     struct perf_sample *sample)
+{
+	int node;
+
+	if (!sample->phys_addr) {
+		c2c_he->paddr_zero = true;
+		return;
+	}
+
+	node = mem2node__node(&c2c.mem2node, sample->phys_addr);
+	if (WARN_ONCE(node < 0, "WARNING: failed to find node\n"))
+		return;
+
+	set_bit(node, c2c_he->nodeset);
+
+	if (c2c_he->paddr != sample->phys_addr) {
+		c2c_he->paddr_cnt++;
+		c2c_he->paddr = sample->phys_addr;
+	}
+}
+
 static void compute_stats(struct c2c_hist_entry *c2c_he,
 			  struct c2c_stats *stats,
 			  u64 weight)
@@ -257,6 +293,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
 	c2c_add_stats(&c2c_hists->stats, &stats);
 
 	c2c_he__set_cpu(c2c_he, sample);
+	c2c_he__set_node(c2c_he, sample);
 
 	hists__inc_nr_samples(&c2c_hists->hists, he->filtered);
 	ret = hist_entry__append_callchain(he, sample);
@@ -293,6 +330,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
 		compute_stats(c2c_he, &stats, sample->weight);
 
 		c2c_he__set_cpu(c2c_he, sample);
+		c2c_he__set_node(c2c_he, sample);
 
 		hists__inc_nr_samples(&c2c_hists->hists, he->filtered);
 		ret = hist_entry__append_callchain(he, sample);
@@ -455,6 +493,20 @@ static int dcacheline_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
 	return scnprintf(hpp->buf, hpp->size, "%*s", width, HEX_STR(buf, addr));
 }
 
+static int
+dcacheline_node_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+		      struct hist_entry *he)
+{
+	struct c2c_hist_entry *c2c_he;
+	int width = c2c_width(fmt, hpp, he->hists);
+
+	c2c_he = container_of(he, struct c2c_hist_entry, he);
+	if (WARN_ON_ONCE(!c2c_he->nodestr))
+		return 0;
+
+	return scnprintf(hpp->buf, hpp->size, "%*s", width, c2c_he->nodestr);
+}
+
 static int offset_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
 			struct hist_entry *he)
 {
@@ -1207,6 +1259,14 @@ static struct c2c_dimension dim_dcacheline = {
 	.width		= 18,
 };
 
+static struct c2c_dimension dim_dcacheline_node = {
+	.header		= HEADER_LOW("Node"),
+	.name		= "dcacheline_node",
+	.cmp		= empty_cmp,
+	.entry		= dcacheline_node_entry,
+	.width		= 4,
+};
+
 static struct c2c_header header_offset_tui = HEADER_LOW("Off");
 
 static struct c2c_dimension dim_offset = {
@@ -1217,6 +1277,14 @@ static struct c2c_dimension dim_offset = {
 	.width		= 18,
 };
 
+static struct c2c_dimension dim_offset_node = {
+	.header		= HEADER_LOW("Node"),
+	.name		= "offset_node",
+	.cmp		= empty_cmp,
+	.entry		= dcacheline_node_entry,
+	.width		= 4,
+};
+
 static struct c2c_dimension dim_iaddr = {
 	.header		= HEADER_LOW("Code address"),
 	.name		= "iaddr",
@@ -1536,7 +1604,9 @@ static struct c2c_dimension dim_dcacheline_num_empty = {
 
 static struct c2c_dimension *dimensions[] = {
 	&dim_dcacheline,
+	&dim_dcacheline_node,
 	&dim_offset,
+	&dim_offset_node,
 	&dim_iaddr,
 	&dim_tot_hitm,
 	&dim_lcl_hitm,
@@ -1839,12 +1909,44 @@ static inline int valid_hitm_or_store(struct hist_entry *he)
 	return has_hitm || c2c_he->stats.store;
 }
 
+static void set_node_width(struct c2c_hist_entry *c2c_he, int len)
+{
+	struct c2c_dimension *dim;
+
+	dim = &c2c.hists == c2c_he->hists ?
+	      &dim_dcacheline_node : &dim_offset_node;
+
+	if (len > dim->width)
+		dim->width = len;
+}
+
+static int set_nodestr(struct c2c_hist_entry *c2c_he)
+{
+	char buf[30];
+	int len;
+
+	if (c2c_he->nodestr)
+		return 0;
+
+	if (bitmap_weight(c2c_he->nodeset, c2c.nodes_cnt)) {
+		len = bitmap_scnprintf(c2c_he->nodeset, c2c.nodes_cnt,
+				      buf, sizeof(buf));
+	} else {
+		len = scnprintf(buf, sizeof(buf), "N/A");
+	}
+
+	set_node_width(c2c_he, len);
+	c2c_he->nodestr = strdup(buf);
+	return c2c_he->nodestr ? 0 : -ENOMEM;
+}
+
 static void calc_width(struct c2c_hist_entry *c2c_he)
 {
 	struct c2c_hists *c2c_hists;
 
 	c2c_hists = container_of(c2c_he->he.hists, struct c2c_hists, hists);
 	hists__calc_col_len(&c2c_hists->hists, &c2c_he->he);
+	set_nodestr(c2c_he);
 }
 
 static int filter_cb(struct hist_entry *he)
@@ -2474,7 +2576,7 @@ static int build_cl_output(char *cl_sort, bool no_source)
 		"percent_lcl_hitm,"
 		"percent_stores_l1hit,"
 		"percent_stores_l1miss,"
-		"offset,",
+		"offset,offset_node,",
 		add_pid   ? "pid," : "",
 		add_tid   ? "tid," : "",
 		add_iaddr ? "iaddr," : "",
@@ -2603,17 +2705,21 @@ static int perf_c2c__report(int argc, const char **argv)
 		goto out;
 	}
 
-	err = setup_callchain(session->evlist);
+	err = mem2node__init(&c2c.mem2node, &session->header.env);
 	if (err)
 		goto out_session;
 
+	err = setup_callchain(session->evlist);
+	if (err)
+		goto out_mem2node;
+
 	if (symbol__init(&session->header.env) < 0)
-		goto out_session;
+		goto out_mem2node;
 
 	/* No pipe support at the moment. */
 	if (perf_data__is_pipe(session->data)) {
 		pr_debug("No pipe support at the moment.\n");
-		goto out_session;
+		goto out_mem2node;
 	}
 
 	if (c2c.use_stdio)
@@ -2626,12 +2732,13 @@ static int perf_c2c__report(int argc, const char **argv)
 	err = perf_session__process_events(session);
 	if (err) {
 		pr_err("failed to process sample\n");
-		goto out_session;
+		goto out_mem2node;
 	}
 
 	c2c_hists__reinit(&c2c.hists,
 			"cl_idx,"
 			"dcacheline,"
+			"dcacheline_node,"
 			"tot_recs,"
 			"percent_hitm,"
 			"tot_hitm,lcl_hitm,rmt_hitm,"
@@ -2657,6 +2764,8 @@ static int perf_c2c__report(int argc, const char **argv)
 
 	perf_c2c_display(session);
 
+out_mem2node:
+	mem2node__exit(&c2c.mem2node);
 out_session:
 	perf_session__delete(session);
 out:
-- 
cgit v1.2.3


From d0802b1ee2c8b95e960f46fa14fe0fee562cb79a Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Fri, 9 Mar 2018 11:14:41 +0100
Subject: perf c2c report: Add span header over cacheline data

Forcing the NUMA node output to be grouped with the "Cacheline" column
in both "Shared Data Cache Line Table" and "Shared Cache Line
Distribution Pareto" tables.

Before:
  #                                    Total      Tot  ----- LLC Load Hitm -----
  # Index           Cacheline  Node  records     Hitm    Total      Lcl      Rmt
  # .....  ..................  ....  .......  .......  .......  .......  .......
  #
        0      0x7f0830100000     0       84   10.53%        8        8        0
        1  0xffff922a93154200     0        3    2.63%        2        2        0
        2  0xffff922a93154500     0        4    2.63%        2        2        0

After:
  #        ------- Cacheline ------    Total      Tot  ----- LLC Load Hitm -----
  # Index             Address  Node  records     Hitm    Total      Lcl      Rmt
  # .....  ..................  ....  .......  .......  .......  .......  .......
  #
        0      0x7f0830100000     0       84   10.53%        8        8        0
        1  0xffff922a93154200     0        3    2.63%        2        2        0
        2  0xffff922a93154500     0        4    2.63%        2        2        0

Before:
  #        ----- HITM -----  -- Store Refs --        Data address
  #   Num      Rmt      Lcl   L1 Hit  L1 Miss              Offset  Node      Pid
  # .....  .......  .......  .......  .......  ..................  ....  .......
  #
    -------------------------------------------------------------
        0        0        8       32        2      0x7f0830100000
    -------------------------------------------------------------
             0.00%   75.00%   21.88%    0.00%                0x18     0     1791
             0.00%   12.50%   37.50%    0.00%                0x18     0     1791
             0.00%    0.00%   34.38%    0.00%                0x18     0     1791

After:
  #        ----- HITM -----  -- Store Refs --  ----- Data address -----
  #   Num      Rmt      Lcl   L1 Hit  L1 Miss              Offset  Node      Pid
  # .....  .......  .......  .......  .......  ..................  ....  .......
  #
    -------------------------------------------------------------
        0        0        8       32        2      0x7f0830100000
    -------------------------------------------------------------
             0.00%   75.00%   21.88%    0.00%                0x18     0     1791
             0.00%   12.50%   37.50%    0.00%                0x18     0     1791
             0.00%    0.00%   34.38%    0.00%                0x18     0     1791

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Joe Mario <jmario@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180309101442.9224-9-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-c2c.c | 63 ++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 58 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 45c047fdd7ac..a6336e4e2850 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -1252,7 +1252,7 @@ cl_idx_empty_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
 	}
 
 static struct c2c_dimension dim_dcacheline = {
-	.header		= HEADER_LOW("Cacheline"),
+	.header		= HEADER_SPAN("--- Cacheline ----", "Address", 1),
 	.name		= "dcacheline",
 	.cmp		= dcacheline_cmp,
 	.entry		= dcacheline_entry,
@@ -1267,10 +1267,10 @@ static struct c2c_dimension dim_dcacheline_node = {
 	.width		= 4,
 };
 
-static struct c2c_header header_offset_tui = HEADER_LOW("Off");
+static struct c2c_header header_offset_tui = HEADER_SPAN("-----", "Off", 1);
 
 static struct c2c_dimension dim_offset = {
-	.header		= HEADER_BOTH("Data address", "Offset"),
+	.header		= HEADER_SPAN("--- Data address -", "Offset", 1),
 	.name		= "offset",
 	.cmp		= offset_cmp,
 	.entry		= offset_entry,
@@ -2453,14 +2453,64 @@ static void perf_c2c_display(struct perf_session *session)
 }
 #endif /* HAVE_SLANG_SUPPORT */
 
-static void ui_quirks(void)
+static char *fill_line(const char *orig, int len)
 {
+	int i, j, olen = strlen(orig);
+	char *buf;
+
+	buf = zalloc(len + 1);
+	if (!buf)
+		return NULL;
+
+	j = len / 2 - olen / 2;
+
+	for (i = 0; i < j - 1; i++)
+		buf[i] = '-';
+
+	buf[i++] = ' ';
+
+	strcpy(buf + i, orig);
+
+	i += olen;
+
+	buf[i++] = ' ';
+
+	for (; i < len; i++)
+		buf[i] = '-';
+
+	return buf;
+}
+
+static int ui_quirks(void)
+{
+	const char *nodestr = "Data address";
+	char *buf;
+
 	if (!c2c.use_stdio) {
 		dim_offset.width  = 5;
 		dim_offset.header = header_offset_tui;
+		nodestr = "CL";
 	}
 
 	dim_percent_hitm.header = percent_hitm_header[c2c.display];
+
+	/* Fix the zero line for dcacheline column. */
+	buf = fill_line("Cacheline", dim_dcacheline.width +
+				     dim_dcacheline_node.width + 2);
+	if (!buf)
+		return -ENOMEM;
+
+	dim_dcacheline.header.line[0].text = buf;
+
+	/* Fix the zero line for offset column. */
+	buf = fill_line(nodestr, dim_offset.width +
+			      dim_offset_node.width + 2);
+	if (!buf)
+		return -ENOMEM;
+
+	dim_offset.header.line[0].text = buf;
+
+	return 0;
 }
 
 #define CALLCHAIN_DEFAULT_OPT  "graph,0.5,caller,function,percent"
@@ -2760,7 +2810,10 @@ static int perf_c2c__report(int argc, const char **argv)
 
 	ui_progress__finish();
 
-	ui_quirks();
+	if (ui_quirks()) {
+		pr_err("failed to setup UI\n");
+		goto out_mem2node;
+	}
 
 	perf_c2c_display(session);
 
-- 
cgit v1.2.3


From 03d9fcb701340de3446b4ff4ddb9f5407d1412f5 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Fri, 9 Mar 2018 11:14:42 +0100
Subject: perf c2c report: Add cacheline address count column

Adding the 'PA cnt' column grouped under data cacheline address.

It shows how many times the physical addresses changed for the hist
entry. It does not show the number of different physical addresses for
entry, because we don't store those. We only track the number of times
we got different address than we currently hold, which is not expensive
and gives similar info.

  $ perf c2c report --stdio

  #        ----------- Cacheline ----------    Total      Tot  ----- LLC Load Hitm -----
  # Index             Address  Node  PA cnt  records     Hitm    Total      Lcl      Rmt
  # .....  ..................  ....  ......  .......  .......  .......  .......  .......
  #
        0  0xffff9ad56dca0a80     0       9       10    7.69%        2        2        0
        1  0xffff9ad56dce0a80     0       9        9    7.69%        2        2        0
        2  0xffff9ad37659ad80     0       1        2    3.85%        1        1        0

  ...

  #        ----- HITM -----  -- Store Refs --  --------- Data address ---------
  #   Num      Rmt      Lcl   L1 Hit  L1 Miss              Offset  Node  PA cnt      Pid
  # .....  .......  .......  .......  .......  ..................  ....  ......  .......
  #
    -------------------------------------------------------------
        0        0        2        3        0  0xffff9ad56dca0a80
    -------------------------------------------------------------
             0.00%    0.00%   33.33%    0.00%                 0x0     0       1     2510
             0.00%    0.00%   33.33%    0.00%                 0x4     0       1     2476
             0.00%    0.00%   33.33%    0.00%                0x20     0       1        0
             0.00%  100.00%    0.00%    0.00%                0x38     0       1        0

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Joe Mario <jmario@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180309101442.9224-10-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-c2c.c | 35 +++++++++++++++++++++++++++++------
 1 file changed, 29 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index a6336e4e2850..2126bfbcb385 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -507,6 +507,17 @@ dcacheline_node_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
 	return scnprintf(hpp->buf, hpp->size, "%*s", width, c2c_he->nodestr);
 }
 
+static int
+dcacheline_node_count(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+		      struct hist_entry *he)
+{
+	struct c2c_hist_entry *c2c_he;
+	int width = c2c_width(fmt, hpp, he->hists);
+
+	c2c_he = container_of(he, struct c2c_hist_entry, he);
+	return scnprintf(hpp->buf, hpp->size, "%*lu", width, c2c_he->paddr_cnt);
+}
+
 static int offset_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
 			struct hist_entry *he)
 {
@@ -1252,7 +1263,7 @@ cl_idx_empty_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
 	}
 
 static struct c2c_dimension dim_dcacheline = {
-	.header		= HEADER_SPAN("--- Cacheline ----", "Address", 1),
+	.header		= HEADER_SPAN("--- Cacheline ----", "Address", 2),
 	.name		= "dcacheline",
 	.cmp		= dcacheline_cmp,
 	.entry		= dcacheline_entry,
@@ -1267,10 +1278,18 @@ static struct c2c_dimension dim_dcacheline_node = {
 	.width		= 4,
 };
 
-static struct c2c_header header_offset_tui = HEADER_SPAN("-----", "Off", 1);
+static struct c2c_dimension dim_dcacheline_count = {
+	.header		= HEADER_LOW("PA cnt"),
+	.name		= "dcacheline_count",
+	.cmp		= empty_cmp,
+	.entry		= dcacheline_node_count,
+	.width		= 6,
+};
+
+static struct c2c_header header_offset_tui = HEADER_SPAN("-----", "Off", 2);
 
 static struct c2c_dimension dim_offset = {
-	.header		= HEADER_SPAN("--- Data address -", "Offset", 1),
+	.header		= HEADER_SPAN("--- Data address -", "Offset", 2),
 	.name		= "offset",
 	.cmp		= offset_cmp,
 	.entry		= offset_entry,
@@ -1605,6 +1624,7 @@ static struct c2c_dimension dim_dcacheline_num_empty = {
 static struct c2c_dimension *dimensions[] = {
 	&dim_dcacheline,
 	&dim_dcacheline_node,
+	&dim_dcacheline_count,
 	&dim_offset,
 	&dim_offset_node,
 	&dim_iaddr,
@@ -2496,7 +2516,8 @@ static int ui_quirks(void)
 
 	/* Fix the zero line for dcacheline column. */
 	buf = fill_line("Cacheline", dim_dcacheline.width +
-				     dim_dcacheline_node.width + 2);
+				     dim_dcacheline_node.width +
+				     dim_dcacheline_count.width + 4);
 	if (!buf)
 		return -ENOMEM;
 
@@ -2504,7 +2525,8 @@ static int ui_quirks(void)
 
 	/* Fix the zero line for offset column. */
 	buf = fill_line(nodestr, dim_offset.width +
-			      dim_offset_node.width + 2);
+			         dim_offset_node.width +
+				 dim_dcacheline_count.width + 4);
 	if (!buf)
 		return -ENOMEM;
 
@@ -2626,7 +2648,7 @@ static int build_cl_output(char *cl_sort, bool no_source)
 		"percent_lcl_hitm,"
 		"percent_stores_l1hit,"
 		"percent_stores_l1miss,"
-		"offset,offset_node,",
+		"offset,offset_node,dcacheline_count,",
 		add_pid   ? "pid," : "",
 		add_tid   ? "tid," : "",
 		add_iaddr ? "iaddr," : "",
@@ -2789,6 +2811,7 @@ static int perf_c2c__report(int argc, const char **argv)
 			"cl_idx,"
 			"dcacheline,"
 			"dcacheline_node,"
+			"dcacheline_count,"
 			"tot_recs,"
 			"percent_hitm,"
 			"tot_hitm,lcl_hitm,rmt_hitm,"
-- 
cgit v1.2.3


From 744e9a91cf898bf027dbe65cc61a5d7565335cba Mon Sep 17 00:00:00 2001
From: Kim Phillips <kim.phillips@arm.com>
Date: Thu, 8 Mar 2018 21:10:30 -0600
Subject: perf tools arm64: Add libdw DWARF post unwind support for ARM64

Based on prior work:

  https://lkml.org/lkml/2014/5/6/395

and on how other arches add libdw unwind support.  Includes support for
running the unwind test, e.g., on a system with only elfutils' libdw
0.170, the test now runs, and successfully:

  $ ./perf test unwind
  56: Test dwarf unwind                 : Ok

Originally-by: Jean Pihet <jean.pihet@linaro.org>
Reported-by: Christian Hansen <chansen3@cisco.com>
Signed-off-by: Kim Phillips <kim.phillips@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180308211030.4ee4a0d6ff6dc5cda1b567d4@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Makefile.config                 |  2 +-
 tools/perf/arch/arm64/include/arch-tests.h | 12 ++++++
 tools/perf/arch/arm64/tests/Build          |  2 +
 tools/perf/arch/arm64/tests/arch-tests.c   | 16 ++++++++
 tools/perf/arch/arm64/util/Build           |  1 +
 tools/perf/arch/arm64/util/unwind-libdw.c  | 60 ++++++++++++++++++++++++++++++
 6 files changed, 92 insertions(+), 1 deletion(-)
 create mode 100644 tools/perf/arch/arm64/include/arch-tests.h
 create mode 100644 tools/perf/arch/arm64/tests/arch-tests.c
 create mode 100644 tools/perf/arch/arm64/util/unwind-libdw.c

(limited to 'tools')

diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 89cb2a36b8ff..98ff73648b51 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -75,7 +75,7 @@ endif
 # Disable it on all other architectures in case libdw unwind
 # support is detected in system. Add supported architectures
 # to the check.
-ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm powerpc s390))
+ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm arm64 powerpc s390))
   NO_LIBDW_DWARF_UNWIND := 1
 endif
 
diff --git a/tools/perf/arch/arm64/include/arch-tests.h b/tools/perf/arch/arm64/include/arch-tests.h
new file mode 100644
index 000000000000..90ec4c8cb880
--- /dev/null
+++ b/tools/perf/arch/arm64/include/arch-tests.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ARCH_TESTS_H
+#define ARCH_TESTS_H
+
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+struct thread;
+struct perf_sample;
+#endif
+
+extern struct test arch_tests[];
+
+#endif
diff --git a/tools/perf/arch/arm64/tests/Build b/tools/perf/arch/arm64/tests/Build
index b30eff9bcc83..883c57ff0c08 100644
--- a/tools/perf/arch/arm64/tests/Build
+++ b/tools/perf/arch/arm64/tests/Build
@@ -1,2 +1,4 @@
 libperf-y += regs_load.o
 libperf-y += dwarf-unwind.o
+
+libperf-y += arch-tests.o
diff --git a/tools/perf/arch/arm64/tests/arch-tests.c b/tools/perf/arch/arm64/tests/arch-tests.c
new file mode 100644
index 000000000000..5b1543c98022
--- /dev/null
+++ b/tools/perf/arch/arm64/tests/arch-tests.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+#include "tests/tests.h"
+#include "arch-tests.h"
+
+struct test arch_tests[] = {
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+	{
+		.desc = "DWARF unwind",
+		.func = test__dwarf_unwind,
+	},
+#endif
+	{
+		.func = NULL,
+	},
+};
diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build
index c0b8dfef98ba..68f8a8eb3ad0 100644
--- a/tools/perf/arch/arm64/util/Build
+++ b/tools/perf/arch/arm64/util/Build
@@ -2,6 +2,7 @@ libperf-y += header.o
 libperf-y += sym-handling.o
 libperf-$(CONFIG_DWARF)     += dwarf-regs.o
 libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
+libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
 
 libperf-$(CONFIG_AUXTRACE) += ../../arm/util/pmu.o \
 			      ../../arm/util/auxtrace.o \
diff --git a/tools/perf/arch/arm64/util/unwind-libdw.c b/tools/perf/arch/arm64/util/unwind-libdw.c
new file mode 100644
index 000000000000..7623d85e77f3
--- /dev/null
+++ b/tools/perf/arch/arm64/util/unwind-libdw.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <elfutils/libdwfl.h>
+#include "../../util/unwind-libdw.h"
+#include "../../util/perf_regs.h"
+#include "../../util/event.h"
+
+bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
+{
+	struct unwind_info *ui = arg;
+	struct regs_dump *user_regs = &ui->sample->user_regs;
+	Dwarf_Word dwarf_regs[PERF_REG_ARM64_MAX], dwarf_pc;
+
+#define REG(r) ({						\
+	Dwarf_Word val = 0;					\
+	perf_reg_value(&val, user_regs, PERF_REG_ARM64_##r);	\
+	val;							\
+})
+
+	dwarf_regs[0]  = REG(X0);
+	dwarf_regs[1]  = REG(X1);
+	dwarf_regs[2]  = REG(X2);
+	dwarf_regs[3]  = REG(X3);
+	dwarf_regs[4]  = REG(X4);
+	dwarf_regs[5]  = REG(X5);
+	dwarf_regs[6]  = REG(X6);
+	dwarf_regs[7]  = REG(X7);
+	dwarf_regs[8]  = REG(X8);
+	dwarf_regs[9]  = REG(X9);
+	dwarf_regs[10] = REG(X10);
+	dwarf_regs[11] = REG(X11);
+	dwarf_regs[12] = REG(X12);
+	dwarf_regs[13] = REG(X13);
+	dwarf_regs[14] = REG(X14);
+	dwarf_regs[15] = REG(X15);
+	dwarf_regs[16] = REG(X16);
+	dwarf_regs[17] = REG(X17);
+	dwarf_regs[18] = REG(X18);
+	dwarf_regs[19] = REG(X19);
+	dwarf_regs[20] = REG(X20);
+	dwarf_regs[21] = REG(X21);
+	dwarf_regs[22] = REG(X22);
+	dwarf_regs[23] = REG(X23);
+	dwarf_regs[24] = REG(X24);
+	dwarf_regs[25] = REG(X25);
+	dwarf_regs[26] = REG(X26);
+	dwarf_regs[27] = REG(X27);
+	dwarf_regs[28] = REG(X28);
+	dwarf_regs[29] = REG(X29);
+	dwarf_regs[30] = REG(LR);
+	dwarf_regs[31] = REG(SP);
+
+	if (!dwfl_thread_state_registers(thread, 0, PERF_REG_ARM64_MAX,
+					 dwarf_regs))
+		return false;
+
+	dwarf_pc = REG(PC);
+	dwfl_thread_state_register_pc(thread, dwarf_pc);
+
+	return true;
+}
-- 
cgit v1.2.3


From 4c0ab16052054946b7b28f8b0ceee57c10d64cc7 Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Thu, 8 Mar 2018 18:58:26 +0800
Subject: perf vendor events: Drop incomplete multiple mapfile support

Currently jevents supports multiple mapfiles, but this is only in the
form where mapfile basename starts with 'mapfile.csv'

At the moment, no architectures actually use multiple mapfiles, so drop
the support for now.

This patch also solves a nuisance where, when the mapfile is edited and
the text editor may create a backup, jevents may use the backup, as
shown:

  jevents: Many mapfiles? Using pmu-events/arch/arm64/mapfile.csv~, ignoring pmu-events/arch/arm64/mapfile.csv

Signed-off-by: John Garry <john.garry@huawei.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ganapatrao Kulkarni <ganapatrao.kulkarni@cavium.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Shaokun Zhang <zhangshaokun@hisilicon.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: William Cohen <wcohen@redhat.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linuxarm@huawei.com
Link: http://lkml.kernel.org/r/1520506716-197429-2-git-send-email-john.garry@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/pmu-events/README    |  5 ++---
 tools/perf/pmu-events/jevents.c | 10 ++--------
 2 files changed, 4 insertions(+), 11 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/pmu-events/README b/tools/perf/pmu-events/README
index c2ee3e4417fe..2407abc1d441 100644
--- a/tools/perf/pmu-events/README
+++ b/tools/perf/pmu-events/README
@@ -11,9 +11,8 @@ tree tools/perf/pmu-events/arch/foo.
 	- Regular files with '.json' extension in the name are assumed to be
 	  JSON files, each of which describes a set of PMU events.
 
-	- Regular files with basename starting with 'mapfile.csv' are assumed
-	  to be a CSV file that maps a specific CPU to its set of PMU events.
-	  (see below for mapfile format)
+	- The CSV file that maps a specific CPU to its set of PMU events is to
+	  be named 'mapfile.csv' (see below for mapfile format).
 
 	- Directories are traversed, but all other files are ignored.
 
diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
index b578aa26e375..9e0a21e74a67 100644
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -798,16 +798,10 @@ static int process_one_file(const char *fpath, const struct stat *sb,
 	 * after processing all JSON files (so we can write out the
 	 * mapping table after all PMU events tables).
 	 *
-	 * TODO: Allow for multiple mapfiles? Punt for now.
 	 */
 	if (level == 1 && is_file) {
-		if (!strncmp(bname, "mapfile.csv", 11)) {
-			if (mapfile) {
-				pr_info("%s: Many mapfiles? Using %s, ignoring %s\n",
-						prog, mapfile, fpath);
-			} else {
-				mapfile = strdup(fpath);
-			}
+		if (!strcmp(bname, "mapfile.csv")) {
+			mapfile = strdup(fpath);
 			return 0;
 		}
 
-- 
cgit v1.2.3


From 931ef5dc5c18717d24e5b8d8a968e35638508051 Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Thu, 8 Mar 2018 18:58:27 +0800
Subject: perf vendor events: Fix error code in json_events()

When EXPECT macro fails an assertion, the error code is not properly set
after the first loop of tokens in function json_events().

This is because err is set to the return value from func function
pointer call, which must be 0 to continue to loop, yet it is not reset
for for each loop. I assume that this was not the intention, so change
the code so err is set appropriately in EXPECT macro itself.

In addition to this, the indention in EXPECT macro is tidied. The
current indention alludes that the 2 statements following the if
statement are in the body, which is not true.

Signed-off-by: John Garry <john.garry@huawei.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ganapatrao Kulkarni <ganapatrao.kulkarni@cavium.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Shaokun Zhang <zhangshaokun@hisilicon.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: William Cohen <wcohen@redhat.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linuxarm@huawei.com
Link: http://lkml.kernel.org/r/1520506716-197429-3-git-send-email-john.garry@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/pmu-events/jevents.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
index 9e0a21e74a67..edff989fbcea 100644
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -249,9 +249,10 @@ static const char *field_to_perf(struct map *table, char *map, jsmntok_t *val)
 	jsmntok_t *loc = (t);					\
 	if (!(t)->start && (t) > tokens)			\
 		loc = (t) - 1;					\
-		pr_err("%s:%d: " m ", got %s\n", fn,		\
-			json_line(map, loc),			\
-			json_name(t));				\
+	pr_err("%s:%d: " m ", got %s\n", fn,			\
+	       json_line(map, loc),				\
+	       json_name(t));					\
+	err = -EIO;						\
 	goto out_free;						\
 } } while (0)
 
@@ -416,7 +417,7 @@ int json_events(const char *fn,
 		      char *metric_name, char *metric_group),
 	  void *data)
 {
-	int err = -EIO;
+	int err;
 	size_t size;
 	jsmntok_t *tokens, *tok;
 	int i, j, len;
-- 
cgit v1.2.3


From 6f2f2ca3454ec4fa03fcd4507bdd7fe97303065b Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Thu, 8 Mar 2018 18:58:28 +0800
Subject: perf vendor events: Drop support for unused topic directories

Currently a topic subdirectory is supported in the pmu-events dir, in
the following sample structure: /arch/platform/subtopic/mysubtopic.json

Upto 256 levels of topic subdirectories are supported. So this means
that JSONs may be located in a topic dir as well as the platform dir.

This topic subdirectory causes problems if we want to add support for a
vendor dir in the pmu-events structure (in the form
arch/platform/vendor), in that we cannot differentiate between a vendor
dir and a topic dir.

Since the topic dir feature is not used, drop it so it does not block
adding vendor subdirectory support.

Signed-off-by: John Garry <john.garry@huawei.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ganapatrao Kulkarni <ganapatrao.kulkarni@cavium.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Shaokun Zhang <zhangshaokun@hisilicon.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: William Cohen <wcohen@redhat.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linuxarm@huawei.com
Link: http://lkml.kernel.org/r/1520506716-197429-4-git-send-email-john.garry@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/pmu-events/jevents.c | 37 ++++++++++---------------------------
 1 file changed, 10 insertions(+), 27 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
index edff989fbcea..1d02fafdc34d 100644
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -256,25 +256,18 @@ static const char *field_to_perf(struct map *table, char *map, jsmntok_t *val)
 	goto out_free;						\
 } } while (0)
 
-#define TOPIC_DEPTH 256
-static char *topic_array[TOPIC_DEPTH];
-static int   topic_level;
+static char *topic;
 
 static char *get_topic(void)
 {
-	char *tp_old, *tp = NULL;
+	char *tp;
 	int i;
 
-	for (i = 0; i < topic_level + 1; i++) {
-		int n;
-
-		tp_old = tp;
-		n = asprintf(&tp, "%s%s", tp ?: "", topic_array[i]);
-		if (n < 0) {
-			pr_info("%s: asprintf() error %s\n", prog);
-			return NULL;
-		}
-		free(tp_old);
+	/* tp is free'd in process_one_file() */
+	i = asprintf(&tp, "%s", topic);
+	if (i < 0) {
+		pr_info("%s: asprintf() error %s\n", prog);
+		return NULL;
 	}
 
 	for (i = 0; i < (int) strlen(tp); i++) {
@@ -291,25 +284,15 @@ static char *get_topic(void)
 	return tp;
 }
 
-static int add_topic(int level, char *bname)
+static int add_topic(char *bname)
 {
-	char *topic;
-
-	level -= 2;
-
-	if (level >= TOPIC_DEPTH)
-		return -EINVAL;
-
+	free(topic);
 	topic = strdup(bname);
 	if (!topic) {
 		pr_info("%s: strdup() error %s for file %s\n", prog,
 				strerror(errno), bname);
 		return -ENOMEM;
 	}
-
-	free(topic_array[topic_level]);
-	topic_array[topic_level] = topic;
-	topic_level              = level;
 	return 0;
 }
 
@@ -824,7 +807,7 @@ static int process_one_file(const char *fpath, const struct stat *sb,
 		}
 	}
 
-	if (level > 1 && add_topic(level, bname))
+	if (level > 1 && add_topic(bname))
 		return -ENOMEM;
 
 	/*
-- 
cgit v1.2.3


From 51ce1dcc5d0d3e40e26893a7fa9e30538960ee7e Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Thu, 8 Mar 2018 18:58:29 +0800
Subject: perf vendor events: Add support for pmu events vendor subdirectory

For some architectures (like arm), it is required to support a vendor
subdirectory and not locate all the JSONs for a specific vendor in the
same folder.

This is because all the events for the same vendor will be placed in the
same pmu events table, which may cause conflict.  This conflict would be
in the instance that a vendor's custom implemented events do have the
same meaning on different platforms, so events in the pmu table would
conflict. In addition, per list command may show events which are not
even supported for a given platform.

This patch adds support for a arch/vendor/platform directory hierarchy,
while maintaining backwards-compatibility for existing arch/platform
structure. In this, each platform would always have its own pmu events
table.

In generated file pmu_events.c, each platform table name is in the
format pme{_vendor}_platform, like this:

struct pmu_events_map pmu_events_map[] = {
{
	.cpuid = "0x00000000420f5160",
	.version = "v1",
	.type = "core",
	.table = pme_cavium_thunderx2
},
{
	.cpuid = 0,
	.version = 0,
	.type = 0,
	.table = 0,
},
};

Signed-off-by: John Garry <john.garry@huawei.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ganapatrao Kulkarni <ganapatrao.kulkarni@cavium.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Shaokun Zhang <zhangshaokun@hisilicon.com>
Cc: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: William Cohen <wcohen@redhat.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linuxarm@huawei.com
Link: http://lkml.kernel.org/r/1520506716-197429-5-git-send-email-john.garry@huawei.com
Link: http://lkml.kernel.org/r/1521047452-28565-1-git-send-email-john.garry@huawei.com
[ Add missing limits.h include, fixing the build on at least all Alpine Linux versions tested (3.4 to 3.7 + edge), ]
[ Applied a patch to fix reading ./.. directories in XFS, see second Link tag ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/pmu-events/README    |  4 +++
 tools/perf/pmu-events/jevents.c | 68 +++++++++++++++++++++++++++++++++++++----
 2 files changed, 66 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/pmu-events/README b/tools/perf/pmu-events/README
index 2407abc1d441..655286ff8767 100644
--- a/tools/perf/pmu-events/README
+++ b/tools/perf/pmu-events/README
@@ -28,6 +28,10 @@ sub directory. Thus for the Silvermont X86 CPU:
 	Cache.json 	Memory.json 	Virtual-Memory.json
 	Frontend.json 	Pipeline.json
 
+The JSONs folder for a CPU model/family may be placed in the root arch
+folder, or may be placed in a vendor sub-folder under the arch folder
+for instances where the arch and vendor are not the same.
+
 Using the JSON files and the mapfile, 'jevents' generates the C source file,
 'pmu-events.c', which encodes the two sets of tables:
 
diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
index 1d02fafdc34d..0981d313064f 100644
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -39,6 +39,7 @@
 #include <unistd.h>
 #include <stdarg.h>
 #include <libgen.h>
+#include <limits.h>
 #include <dirent.h>
 #include <sys/time.h>			/* getrlimit */
 #include <sys/resource.h>		/* getrlimit */
@@ -572,7 +573,7 @@ static char *file_name_to_table_name(char *fname)
 	 * Derive rest of table name from basename of the JSON file,
 	 * replacing hyphens and stripping out .json suffix.
 	 */
-	n = asprintf(&tblname, "pme_%s", basename(fname));
+	n = asprintf(&tblname, "pme_%s", fname);
 	if (n < 0) {
 		pr_info("%s: asprintf() error %s for file %s\n", prog,
 				strerror(errno), fname);
@@ -582,7 +583,7 @@ static char *file_name_to_table_name(char *fname)
 	for (i = 0; i < strlen(tblname); i++) {
 		c = tblname[i];
 
-		if (c == '-')
+		if (c == '-' || c == '/')
 			tblname[i] = '_';
 		else if (c == '.') {
 			tblname[i] = '\0';
@@ -739,25 +740,80 @@ static int get_maxfds(void)
 static FILE *eventsfp;
 static char *mapfile;
 
+static int is_leaf_dir(const char *fpath)
+{
+	DIR *d;
+	struct dirent *dir;
+	int res = 1;
+
+	d = opendir(fpath);
+	if (!d)
+		return 0;
+
+	while ((dir = readdir(d)) != NULL) {
+		if (!strcmp(dir->d_name, ".") || !strcmp(dir->d_name, ".."))
+			continue;
+
+		if (dir->d_type == DT_DIR) {
+			res = 0;
+			break;
+		} else if (dir->d_type == DT_UNKNOWN) {
+			char path[PATH_MAX];
+			struct stat st;
+
+			sprintf(path, "%s/%s", fpath, dir->d_name);
+			if (stat(path, &st))
+				break;
+
+			if (S_ISDIR(st.st_mode)) {
+				res = 0;
+				break;
+			}
+		}
+	}
+
+	closedir(d);
+
+	return res;
+}
+
 static int process_one_file(const char *fpath, const struct stat *sb,
 			    int typeflag, struct FTW *ftwbuf)
 {
-	char *tblname, *bname  = (char *) fpath + ftwbuf->base;
+	char *tblname, *bname;
 	int is_dir  = typeflag == FTW_D;
 	int is_file = typeflag == FTW_F;
 	int level   = ftwbuf->level;
 	int err = 0;
 
+	if (level == 2 && is_dir) {
+		/*
+		 * For level 2 directory, bname will include parent name,
+		 * like vendor/platform. So search back from platform dir
+		 * to find this.
+		 */
+		bname = (char *) fpath + ftwbuf->base - 2;
+		for (;;) {
+			if (*bname == '/')
+				break;
+			bname--;
+		}
+		bname++;
+	} else
+		bname = (char *) fpath + ftwbuf->base;
+
 	pr_debug("%s %d %7jd %-20s %s\n",
 		 is_file ? "f" : is_dir ? "d" : "x",
 		 level, sb->st_size, bname, fpath);
 
-	/* base dir */
-	if (level == 0)
+	/* base dir or too deep */
+	if (level == 0 || level > 3)
 		return 0;
 
+
 	/* model directory, reset topic */
-	if (level == 1 && is_dir) {
+	if ((level == 1 && is_dir && is_leaf_dir(fpath)) ||
+	    (level == 2 && is_dir)) {
 		if (close_table)
 			print_events_table_suffix(eventsfp);
 
-- 
cgit v1.2.3


From e3b9f1e81de2083f359bacd2a94bf1c024f2ede0 Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Thu, 8 Mar 2018 18:58:30 +0800
Subject: perf vendor events arm64: Relocate ThunderX2 JSON to cavium
 subdirectory

Since jevents now supports vendor subdirectory, relocate
the ThunderX2 JSON to Cavium subdirectory.

Signed-off-by: John Garry <john.garry@huawei.com>
Tested-by: Ganapatrao Kulkarni <ganapatrao.kulkarni@cavium.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Shaokun Zhang <zhangshaokun@hisilicon.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: William Cohen <wcohen@redhat.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linuxarm@huawei.com
Link: http://lkml.kernel.org/r/1520506716-197429-6-git-send-email-john.garry@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../arch/arm64/cavium/thunderx2-imp-def.json       | 62 ----------------------
 .../arch/arm64/cavium/thunderx2/core-imp-def.json  | 62 ++++++++++++++++++++++
 tools/perf/pmu-events/arch/arm64/mapfile.csv       |  2 +-
 3 files changed, 63 insertions(+), 63 deletions(-)
 delete mode 100644 tools/perf/pmu-events/arch/arm64/cavium/thunderx2-imp-def.json
 create mode 100644 tools/perf/pmu-events/arch/arm64/cavium/thunderx2/core-imp-def.json

(limited to 'tools')

diff --git a/tools/perf/pmu-events/arch/arm64/cavium/thunderx2-imp-def.json b/tools/perf/pmu-events/arch/arm64/cavium/thunderx2-imp-def.json
deleted file mode 100644
index 2db45c40ebc7..000000000000
--- a/tools/perf/pmu-events/arch/arm64/cavium/thunderx2-imp-def.json
+++ /dev/null
@@ -1,62 +0,0 @@
-[
-    {
-        "PublicDescription": "Attributable Level 1 data cache access, read",
-        "EventCode": "0x40",
-        "EventName": "l1d_cache_rd",
-        "BriefDescription": "L1D cache read",
-    },
-    {
-        "PublicDescription": "Attributable Level 1 data cache access, write ",
-        "EventCode": "0x41",
-        "EventName": "l1d_cache_wr",
-        "BriefDescription": "L1D cache write",
-    },
-    {
-        "PublicDescription": "Attributable Level 1 data cache refill, read",
-        "EventCode": "0x42",
-        "EventName": "l1d_cache_refill_rd",
-        "BriefDescription": "L1D cache refill read",
-    },
-    {
-        "PublicDescription": "Attributable Level 1 data cache refill, write",
-        "EventCode": "0x43",
-        "EventName": "l1d_cache_refill_wr",
-        "BriefDescription": "L1D refill write",
-    },
-    {
-        "PublicDescription": "Attributable Level 1 data TLB refill, read",
-        "EventCode": "0x4C",
-        "EventName": "l1d_tlb_refill_rd",
-        "BriefDescription": "L1D tlb refill read",
-    },
-    {
-        "PublicDescription": "Attributable Level 1 data TLB refill, write",
-        "EventCode": "0x4D",
-        "EventName": "l1d_tlb_refill_wr",
-        "BriefDescription": "L1D tlb refill write",
-    },
-    {
-        "PublicDescription": "Attributable Level 1 data or unified TLB access, read",
-        "EventCode": "0x4E",
-        "EventName": "l1d_tlb_rd",
-        "BriefDescription": "L1D tlb read",
-    },
-    {
-        "PublicDescription": "Attributable Level 1 data or unified TLB access, write",
-        "EventCode": "0x4F",
-        "EventName": "l1d_tlb_wr",
-        "BriefDescription": "L1D tlb write",
-    },
-    {
-        "PublicDescription": "Bus access read",
-        "EventCode": "0x60",
-        "EventName": "bus_access_rd",
-        "BriefDescription": "Bus access read",
-   },
-   {
-        "PublicDescription": "Bus access write",
-        "EventCode": "0x61",
-        "EventName": "bus_access_wr",
-        "BriefDescription": "Bus access write",
-   }
-]
diff --git a/tools/perf/pmu-events/arch/arm64/cavium/thunderx2/core-imp-def.json b/tools/perf/pmu-events/arch/arm64/cavium/thunderx2/core-imp-def.json
new file mode 100644
index 000000000000..2db45c40ebc7
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/cavium/thunderx2/core-imp-def.json
@@ -0,0 +1,62 @@
+[
+    {
+        "PublicDescription": "Attributable Level 1 data cache access, read",
+        "EventCode": "0x40",
+        "EventName": "l1d_cache_rd",
+        "BriefDescription": "L1D cache read",
+    },
+    {
+        "PublicDescription": "Attributable Level 1 data cache access, write ",
+        "EventCode": "0x41",
+        "EventName": "l1d_cache_wr",
+        "BriefDescription": "L1D cache write",
+    },
+    {
+        "PublicDescription": "Attributable Level 1 data cache refill, read",
+        "EventCode": "0x42",
+        "EventName": "l1d_cache_refill_rd",
+        "BriefDescription": "L1D cache refill read",
+    },
+    {
+        "PublicDescription": "Attributable Level 1 data cache refill, write",
+        "EventCode": "0x43",
+        "EventName": "l1d_cache_refill_wr",
+        "BriefDescription": "L1D refill write",
+    },
+    {
+        "PublicDescription": "Attributable Level 1 data TLB refill, read",
+        "EventCode": "0x4C",
+        "EventName": "l1d_tlb_refill_rd",
+        "BriefDescription": "L1D tlb refill read",
+    },
+    {
+        "PublicDescription": "Attributable Level 1 data TLB refill, write",
+        "EventCode": "0x4D",
+        "EventName": "l1d_tlb_refill_wr",
+        "BriefDescription": "L1D tlb refill write",
+    },
+    {
+        "PublicDescription": "Attributable Level 1 data or unified TLB access, read",
+        "EventCode": "0x4E",
+        "EventName": "l1d_tlb_rd",
+        "BriefDescription": "L1D tlb read",
+    },
+    {
+        "PublicDescription": "Attributable Level 1 data or unified TLB access, write",
+        "EventCode": "0x4F",
+        "EventName": "l1d_tlb_wr",
+        "BriefDescription": "L1D tlb write",
+    },
+    {
+        "PublicDescription": "Bus access read",
+        "EventCode": "0x60",
+        "EventName": "bus_access_rd",
+        "BriefDescription": "Bus access read",
+   },
+   {
+        "PublicDescription": "Bus access write",
+        "EventCode": "0x61",
+        "EventName": "bus_access_wr",
+        "BriefDescription": "Bus access write",
+   }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv
index e61c9ca6cf9e..952a05cbf675 100644
--- a/tools/perf/pmu-events/arch/arm64/mapfile.csv
+++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv
@@ -12,5 +12,5 @@
 #
 #
 #Family-model,Version,Filename,EventType
-0x00000000420f5160,v1,cavium,core
+0x00000000420f5160,v1,cavium/thunderx2,core
 0x00000000410fd03[[:xdigit:]],v1,cortex-a53,core
-- 
cgit v1.2.3


From 82e6fdd6c01257d64009defbbea19d12ba667670 Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Thu, 8 Mar 2018 18:58:31 +0800
Subject: perf vendor events arm64: Relocate Cortex A53 JSONs to arm
 subdirectory

Since jevents now supports vendor subdirectory, relocate the Cortex-A53
JSONs to arm subdirectory.

Signed-off-by: John Garry <john.garry@huawei.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ganapatrao Kulkarni <ganapatrao.kulkarni@cavium.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Shaokun Zhang <zhangshaokun@hisilicon.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: William Cohen <wcohen@redhat.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linuxarm@huawei.com
Link: http://lkml.kernel.org/r/1520506716-197429-7-git-send-email-john.garry@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../arch/arm64/arm/cortex-a53/branch.json          | 27 +++++++++++
 .../pmu-events/arch/arm64/arm/cortex-a53/bus.json  | 22 +++++++++
 .../arch/arm64/arm/cortex-a53/cache.json           | 27 +++++++++++
 .../arch/arm64/arm/cortex-a53/memory.json          | 22 +++++++++
 .../arch/arm64/arm/cortex-a53/other.json           | 32 +++++++++++++
 .../arch/arm64/arm/cortex-a53/pipeline.json        | 52 ++++++++++++++++++++++
 .../pmu-events/arch/arm64/cortex-a53/branch.json   | 27 -----------
 .../perf/pmu-events/arch/arm64/cortex-a53/bus.json | 22 ---------
 .../pmu-events/arch/arm64/cortex-a53/cache.json    | 27 -----------
 .../pmu-events/arch/arm64/cortex-a53/memory.json   | 22 ---------
 .../pmu-events/arch/arm64/cortex-a53/other.json    | 32 -------------
 .../pmu-events/arch/arm64/cortex-a53/pipeline.json | 52 ----------------------
 tools/perf/pmu-events/arch/arm64/mapfile.csv       |  2 +-
 13 files changed, 183 insertions(+), 183 deletions(-)
 create mode 100644 tools/perf/pmu-events/arch/arm64/arm/cortex-a53/branch.json
 create mode 100644 tools/perf/pmu-events/arch/arm64/arm/cortex-a53/bus.json
 create mode 100644 tools/perf/pmu-events/arch/arm64/arm/cortex-a53/cache.json
 create mode 100644 tools/perf/pmu-events/arch/arm64/arm/cortex-a53/memory.json
 create mode 100644 tools/perf/pmu-events/arch/arm64/arm/cortex-a53/other.json
 create mode 100644 tools/perf/pmu-events/arch/arm64/arm/cortex-a53/pipeline.json
 delete mode 100644 tools/perf/pmu-events/arch/arm64/cortex-a53/branch.json
 delete mode 100644 tools/perf/pmu-events/arch/arm64/cortex-a53/bus.json
 delete mode 100644 tools/perf/pmu-events/arch/arm64/cortex-a53/cache.json
 delete mode 100644 tools/perf/pmu-events/arch/arm64/cortex-a53/memory.json
 delete mode 100644 tools/perf/pmu-events/arch/arm64/cortex-a53/other.json
 delete mode 100644 tools/perf/pmu-events/arch/arm64/cortex-a53/pipeline.json

(limited to 'tools')

diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/branch.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/branch.json
new file mode 100644
index 000000000000..3b6208763e50
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/branch.json
@@ -0,0 +1,27 @@
+[
+  {,
+    "EventCode": "0x7A",
+    "EventName": "BR_INDIRECT_SPEC",
+    "BriefDescription": "Branch speculatively executed - Indirect branch"
+  },
+  {,
+    "EventCode": "0xC9",
+    "EventName": "BR_COND",
+    "BriefDescription": "Conditional branch executed"
+  },
+  {,
+    "EventCode": "0xCA",
+    "EventName": "BR_INDIRECT_MISPRED",
+    "BriefDescription": "Indirect branch mispredicted"
+  },
+  {,
+    "EventCode": "0xCB",
+    "EventName": "BR_INDIRECT_MISPRED_ADDR",
+    "BriefDescription": "Indirect branch mispredicted because of address miscompare"
+  },
+  {,
+    "EventCode": "0xCC",
+    "EventName": "BR_COND_MISPRED",
+    "BriefDescription": "Conditional branch mispredicted"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/bus.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/bus.json
new file mode 100644
index 000000000000..480d9f7460ab
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/bus.json
@@ -0,0 +1,22 @@
+[
+  {,
+    "EventCode": "0x60",
+    "EventName": "BUS_ACCESS_LD",
+    "BriefDescription": "Bus access - Read"
+  },
+  {,
+    "EventCode": "0x61",
+    "EventName": "BUS_ACCESS_ST",
+    "BriefDescription": "Bus access - Write"
+  },
+  {,
+    "EventCode": "0xC0",
+    "EventName": "EXT_MEM_REQ",
+    "BriefDescription": "External memory request"
+  },
+  {,
+    "EventCode": "0xC1",
+    "EventName": "EXT_MEM_REQ_NC",
+    "BriefDescription": "Non-cacheable external memory request"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/cache.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/cache.json
new file mode 100644
index 000000000000..11baad6344b9
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/cache.json
@@ -0,0 +1,27 @@
+[
+  {,
+    "EventCode": "0xC2",
+    "EventName": "PREFETCH_LINEFILL",
+    "BriefDescription": "Linefill because of prefetch"
+  },
+  {,
+    "EventCode": "0xC3",
+    "EventName": "PREFETCH_LINEFILL_DROP",
+    "BriefDescription": "Instruction Cache Throttle occurred"
+  },
+  {,
+    "EventCode": "0xC4",
+    "EventName": "READ_ALLOC_ENTER",
+    "BriefDescription": "Entering read allocate mode"
+  },
+  {,
+    "EventCode": "0xC5",
+    "EventName": "READ_ALLOC",
+    "BriefDescription": "Read allocate mode"
+  },
+  {,
+    "EventCode": "0xC8",
+    "EventName": "EXT_SNOOP",
+    "BriefDescription": "SCU Snooped data from another CPU for this CPU"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/memory.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/memory.json
new file mode 100644
index 000000000000..480d9f7460ab
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/memory.json
@@ -0,0 +1,22 @@
+[
+  {,
+    "EventCode": "0x60",
+    "EventName": "BUS_ACCESS_LD",
+    "BriefDescription": "Bus access - Read"
+  },
+  {,
+    "EventCode": "0x61",
+    "EventName": "BUS_ACCESS_ST",
+    "BriefDescription": "Bus access - Write"
+  },
+  {,
+    "EventCode": "0xC0",
+    "EventName": "EXT_MEM_REQ",
+    "BriefDescription": "External memory request"
+  },
+  {,
+    "EventCode": "0xC1",
+    "EventName": "EXT_MEM_REQ_NC",
+    "BriefDescription": "Non-cacheable external memory request"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/other.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/other.json
new file mode 100644
index 000000000000..73a22402d003
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/other.json
@@ -0,0 +1,32 @@
+[
+  {,
+    "EventCode": "0x86",
+    "EventName": "EXC_IRQ",
+    "BriefDescription": "Exception taken, IRQ"
+  },
+  {,
+    "EventCode": "0x87",
+    "EventName": "EXC_FIQ",
+    "BriefDescription": "Exception taken, FIQ"
+  },
+  {,
+    "EventCode": "0xC6",
+    "EventName": "PRE_DECODE_ERR",
+    "BriefDescription": "Pre-decode error"
+  },
+  {,
+    "EventCode": "0xD0",
+    "EventName": "L1I_CACHE_ERR",
+    "BriefDescription": "L1 Instruction Cache (data or tag) memory error"
+  },
+  {,
+    "EventCode": "0xD1",
+    "EventName": "L1D_CACHE_ERR",
+    "BriefDescription": "L1 Data Cache (data, tag or dirty) memory error, correctable or non-correctable"
+  },
+  {,
+    "EventCode": "0xD2",
+    "EventName": "TLB_ERR",
+    "BriefDescription": "TLB memory error"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/pipeline.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/pipeline.json
new file mode 100644
index 000000000000..3149fb90555a
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/pipeline.json
@@ -0,0 +1,52 @@
+[
+  {,
+    "EventCode": "0xC7",
+    "EventName": "STALL_SB_FULL",
+    "BriefDescription": "Data Write operation that stalls the pipeline because the store buffer is full"
+  },
+  {,
+    "EventCode": "0xE0",
+    "EventName": "OTHER_IQ_DEP_STALL",
+    "BriefDescription": "Cycles that the DPU IQ is empty and that is not because of a recent micro-TLB miss, instruction cache miss or pre-decode error"
+  },
+  {,
+    "EventCode": "0xE1",
+    "EventName": "IC_DEP_STALL",
+    "BriefDescription": "Cycles the DPU IQ is empty and there is an instruction cache miss being processed"
+  },
+  {,
+    "EventCode": "0xE2",
+    "EventName": "IUTLB_DEP_STALL",
+    "BriefDescription": "Cycles the DPU IQ is empty and there is an instruction micro-TLB miss being processed"
+  },
+  {,
+    "EventCode": "0xE3",
+    "EventName": "DECODE_DEP_STALL",
+    "BriefDescription": "Cycles the DPU IQ is empty and there is a pre-decode error being processed"
+  },
+  {,
+    "EventCode": "0xE4",
+    "EventName": "OTHER_INTERLOCK_STALL",
+    "BriefDescription": "Cycles there is an interlock other than  Advanced SIMD/Floating-point instructions or load/store instruction"
+  },
+  {,
+    "EventCode": "0xE5",
+    "EventName": "AGU_DEP_STALL",
+    "BriefDescription": "Cycles there is an interlock for a load/store instruction waiting for data to calculate the address in the AGU"
+  },
+  {,
+    "EventCode": "0xE6",
+    "EventName": "SIMD_DEP_STALL",
+    "BriefDescription": "Cycles there is an interlock for an Advanced SIMD/Floating-point operation."
+  },
+  {,
+    "EventCode": "0xE7",
+    "EventName": "LD_DEP_STALL",
+    "BriefDescription": "Cycles there is a stall in the Wr stage because of a load miss"
+  },
+  {,
+    "EventCode": "0xE8",
+    "EventName": "ST_DEP_STALL",
+    "BriefDescription": "Cycles there is a stall in the Wr stage because of a store"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/branch.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/branch.json
deleted file mode 100644
index 3b6208763e50..000000000000
--- a/tools/perf/pmu-events/arch/arm64/cortex-a53/branch.json
+++ /dev/null
@@ -1,27 +0,0 @@
-[
-  {,
-    "EventCode": "0x7A",
-    "EventName": "BR_INDIRECT_SPEC",
-    "BriefDescription": "Branch speculatively executed - Indirect branch"
-  },
-  {,
-    "EventCode": "0xC9",
-    "EventName": "BR_COND",
-    "BriefDescription": "Conditional branch executed"
-  },
-  {,
-    "EventCode": "0xCA",
-    "EventName": "BR_INDIRECT_MISPRED",
-    "BriefDescription": "Indirect branch mispredicted"
-  },
-  {,
-    "EventCode": "0xCB",
-    "EventName": "BR_INDIRECT_MISPRED_ADDR",
-    "BriefDescription": "Indirect branch mispredicted because of address miscompare"
-  },
-  {,
-    "EventCode": "0xCC",
-    "EventName": "BR_COND_MISPRED",
-    "BriefDescription": "Conditional branch mispredicted"
-  }
-]
diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/bus.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/bus.json
deleted file mode 100644
index 480d9f7460ab..000000000000
--- a/tools/perf/pmu-events/arch/arm64/cortex-a53/bus.json
+++ /dev/null
@@ -1,22 +0,0 @@
-[
-  {,
-    "EventCode": "0x60",
-    "EventName": "BUS_ACCESS_LD",
-    "BriefDescription": "Bus access - Read"
-  },
-  {,
-    "EventCode": "0x61",
-    "EventName": "BUS_ACCESS_ST",
-    "BriefDescription": "Bus access - Write"
-  },
-  {,
-    "EventCode": "0xC0",
-    "EventName": "EXT_MEM_REQ",
-    "BriefDescription": "External memory request"
-  },
-  {,
-    "EventCode": "0xC1",
-    "EventName": "EXT_MEM_REQ_NC",
-    "BriefDescription": "Non-cacheable external memory request"
-  }
-]
diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/cache.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/cache.json
deleted file mode 100644
index 11baad6344b9..000000000000
--- a/tools/perf/pmu-events/arch/arm64/cortex-a53/cache.json
+++ /dev/null
@@ -1,27 +0,0 @@
-[
-  {,
-    "EventCode": "0xC2",
-    "EventName": "PREFETCH_LINEFILL",
-    "BriefDescription": "Linefill because of prefetch"
-  },
-  {,
-    "EventCode": "0xC3",
-    "EventName": "PREFETCH_LINEFILL_DROP",
-    "BriefDescription": "Instruction Cache Throttle occurred"
-  },
-  {,
-    "EventCode": "0xC4",
-    "EventName": "READ_ALLOC_ENTER",
-    "BriefDescription": "Entering read allocate mode"
-  },
-  {,
-    "EventCode": "0xC5",
-    "EventName": "READ_ALLOC",
-    "BriefDescription": "Read allocate mode"
-  },
-  {,
-    "EventCode": "0xC8",
-    "EventName": "EXT_SNOOP",
-    "BriefDescription": "SCU Snooped data from another CPU for this CPU"
-  }
-]
diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/memory.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/memory.json
deleted file mode 100644
index 480d9f7460ab..000000000000
--- a/tools/perf/pmu-events/arch/arm64/cortex-a53/memory.json
+++ /dev/null
@@ -1,22 +0,0 @@
-[
-  {,
-    "EventCode": "0x60",
-    "EventName": "BUS_ACCESS_LD",
-    "BriefDescription": "Bus access - Read"
-  },
-  {,
-    "EventCode": "0x61",
-    "EventName": "BUS_ACCESS_ST",
-    "BriefDescription": "Bus access - Write"
-  },
-  {,
-    "EventCode": "0xC0",
-    "EventName": "EXT_MEM_REQ",
-    "BriefDescription": "External memory request"
-  },
-  {,
-    "EventCode": "0xC1",
-    "EventName": "EXT_MEM_REQ_NC",
-    "BriefDescription": "Non-cacheable external memory request"
-  }
-]
diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/other.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/other.json
deleted file mode 100644
index 73a22402d003..000000000000
--- a/tools/perf/pmu-events/arch/arm64/cortex-a53/other.json
+++ /dev/null
@@ -1,32 +0,0 @@
-[
-  {,
-    "EventCode": "0x86",
-    "EventName": "EXC_IRQ",
-    "BriefDescription": "Exception taken, IRQ"
-  },
-  {,
-    "EventCode": "0x87",
-    "EventName": "EXC_FIQ",
-    "BriefDescription": "Exception taken, FIQ"
-  },
-  {,
-    "EventCode": "0xC6",
-    "EventName": "PRE_DECODE_ERR",
-    "BriefDescription": "Pre-decode error"
-  },
-  {,
-    "EventCode": "0xD0",
-    "EventName": "L1I_CACHE_ERR",
-    "BriefDescription": "L1 Instruction Cache (data or tag) memory error"
-  },
-  {,
-    "EventCode": "0xD1",
-    "EventName": "L1D_CACHE_ERR",
-    "BriefDescription": "L1 Data Cache (data, tag or dirty) memory error, correctable or non-correctable"
-  },
-  {,
-    "EventCode": "0xD2",
-    "EventName": "TLB_ERR",
-    "BriefDescription": "TLB memory error"
-  }
-]
diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/pipeline.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/pipeline.json
deleted file mode 100644
index 3149fb90555a..000000000000
--- a/tools/perf/pmu-events/arch/arm64/cortex-a53/pipeline.json
+++ /dev/null
@@ -1,52 +0,0 @@
-[
-  {,
-    "EventCode": "0xC7",
-    "EventName": "STALL_SB_FULL",
-    "BriefDescription": "Data Write operation that stalls the pipeline because the store buffer is full"
-  },
-  {,
-    "EventCode": "0xE0",
-    "EventName": "OTHER_IQ_DEP_STALL",
-    "BriefDescription": "Cycles that the DPU IQ is empty and that is not because of a recent micro-TLB miss, instruction cache miss or pre-decode error"
-  },
-  {,
-    "EventCode": "0xE1",
-    "EventName": "IC_DEP_STALL",
-    "BriefDescription": "Cycles the DPU IQ is empty and there is an instruction cache miss being processed"
-  },
-  {,
-    "EventCode": "0xE2",
-    "EventName": "IUTLB_DEP_STALL",
-    "BriefDescription": "Cycles the DPU IQ is empty and there is an instruction micro-TLB miss being processed"
-  },
-  {,
-    "EventCode": "0xE3",
-    "EventName": "DECODE_DEP_STALL",
-    "BriefDescription": "Cycles the DPU IQ is empty and there is a pre-decode error being processed"
-  },
-  {,
-    "EventCode": "0xE4",
-    "EventName": "OTHER_INTERLOCK_STALL",
-    "BriefDescription": "Cycles there is an interlock other than  Advanced SIMD/Floating-point instructions or load/store instruction"
-  },
-  {,
-    "EventCode": "0xE5",
-    "EventName": "AGU_DEP_STALL",
-    "BriefDescription": "Cycles there is an interlock for a load/store instruction waiting for data to calculate the address in the AGU"
-  },
-  {,
-    "EventCode": "0xE6",
-    "EventName": "SIMD_DEP_STALL",
-    "BriefDescription": "Cycles there is an interlock for an Advanced SIMD/Floating-point operation."
-  },
-  {,
-    "EventCode": "0xE7",
-    "EventName": "LD_DEP_STALL",
-    "BriefDescription": "Cycles there is a stall in the Wr stage because of a load miss"
-  },
-  {,
-    "EventCode": "0xE8",
-    "EventName": "ST_DEP_STALL",
-    "BriefDescription": "Cycles there is a stall in the Wr stage because of a store"
-  }
-]
diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv
index 952a05cbf675..cf14e23b6404 100644
--- a/tools/perf/pmu-events/arch/arm64/mapfile.csv
+++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv
@@ -12,5 +12,5 @@
 #
 #
 #Family-model,Version,Filename,EventType
+0x00000000410fd03[[:xdigit:]],v1,arm/cortex-a53,core
 0x00000000420f5160,v1,cavium/thunderx2,core
-0x00000000410fd03[[:xdigit:]],v1,cortex-a53,core
-- 
cgit v1.2.3


From e9d32c1bf0cd7a98358ec4aa1625bf2b3459b9ac Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Thu, 8 Mar 2018 18:58:32 +0800
Subject: perf vendor events: Add support for arch standard events

For some architectures (like arm), there are architecture- defined
events. Sometimes these events may be "recommended" according to the
architecture standard, in that the implementer is free ignore the
"recommendation" and create its custom event.

This patch adds support for parsing standard events from arch-defined
JSONs, and fixing up vendor events when they have implemented these
events as standard.

Support is also ensured that the vendor may implement their own custom
events.

A new step is added to the pmu events parsing to fix up the vendor
events with the arch-standard events.

The arch-defined JSONs must be placed in the arch root folder for
preprocessing prior to tree JSON processing.

In the vendor JSON, to specify that the arch event is supported, the
keyword "ArchStdEvent" should be used, like this:

[
    {
        "ArchStdEvent": "L1D_CACHE_WR",
    },
]

Matching is based on the "EventName" field in the architecture JSON.

No other JSON objects are strictly required. However, for other objects
added, these take precedence over architecture defined standard events,
thus supporting separate events which have the same event code.

Signed-off-by: John Garry <john.garry@huawei.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ganapatrao Kulkarni <ganapatrao.kulkarni@cavium.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Shaokun Zhang <zhangshaokun@hisilicon.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: William Cohen <wcohen@redhat.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linuxarm@huawei.com
Link: http://lkml.kernel.org/r/1520506716-197429-8-git-send-email-john.garry@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/pmu-events/Build     |   2 +
 tools/perf/pmu-events/README    |   6 ++
 tools/perf/pmu-events/jevents.c | 167 +++++++++++++++++++++++++++++++++++++++-
 3 files changed, 172 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/pmu-events/Build b/tools/perf/pmu-events/Build
index 999a4e878162..17783913d330 100644
--- a/tools/perf/pmu-events/Build
+++ b/tools/perf/pmu-events/Build
@@ -1,10 +1,12 @@
 hostprogs := jevents
 
 jevents-y	+= json.o jsmn.o jevents.o
+CHOSTFLAGS_jevents.o	= -I$(srctree)/tools/include
 pmu-events-y	+= pmu-events.o
 JDIR		=  pmu-events/arch/$(SRCARCH)
 JSON		=  $(shell [ -d $(JDIR) ] &&				\
 			find $(JDIR) -name '*.json' -o -name 'mapfile.csv')
+
 #
 # Locate/process JSON files in pmu-events/arch/
 # directory and create tables in pmu-events.c.
diff --git a/tools/perf/pmu-events/README b/tools/perf/pmu-events/README
index 655286ff8767..e62b09b6a844 100644
--- a/tools/perf/pmu-events/README
+++ b/tools/perf/pmu-events/README
@@ -16,6 +16,12 @@ tree tools/perf/pmu-events/arch/foo.
 
 	- Directories are traversed, but all other files are ignored.
 
+	- To reduce JSON event duplication per architecture, platform JSONs may
+	  use "ArchStdEvent" keyword to dereference an "Architecture standard
+	  events", defined in architecture standard JSONs.
+	  Architecture standard JSONs must be located in the architecture root
+	  folder. Matching is based on the "EventName" field.
+
 The PMU events supported by a CPU model are expected to grouped into topics
 such as Pipelining, Cache, Memory, Floating-point etc. All events for a topic
 should be placed in a separate JSON file - where the file name identifies
diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
index 0981d313064f..db3a594ee1e4 100644
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -45,6 +45,7 @@
 #include <sys/resource.h>		/* getrlimit */
 #include <ftw.h>
 #include <sys/stat.h>
+#include <linux/list.h>
 #include "jsmn.h"
 #include "json.h"
 #include "jevents.h"
@@ -351,6 +352,81 @@ static int print_events_table_entry(void *data, char *name, char *event,
 	return 0;
 }
 
+struct event_struct {
+	struct list_head list;
+	char *name;
+	char *event;
+	char *desc;
+	char *long_desc;
+	char *pmu;
+	char *unit;
+	char *perpkg;
+	char *metric_expr;
+	char *metric_name;
+	char *metric_group;
+};
+
+#define ADD_EVENT_FIELD(field) do { if (field) {		\
+	es->field = strdup(field);				\
+	if (!es->field)						\
+		goto out_free;					\
+} } while (0)
+
+#define FREE_EVENT_FIELD(field) free(es->field)
+
+#define TRY_FIXUP_FIELD(field) do { if (es->field && !*field) {\
+	*field = strdup(es->field);				\
+	if (!*field)						\
+		return -ENOMEM;					\
+} } while (0)
+
+#define FOR_ALL_EVENT_STRUCT_FIELDS(op) do {			\
+	op(name);						\
+	op(event);						\
+	op(desc);						\
+	op(long_desc);						\
+	op(pmu);						\
+	op(unit);						\
+	op(perpkg);						\
+	op(metric_expr);					\
+	op(metric_name);					\
+	op(metric_group);					\
+} while (0)
+
+static LIST_HEAD(arch_std_events);
+
+static void free_arch_std_events(void)
+{
+	struct event_struct *es, *next;
+
+	list_for_each_entry_safe(es, next, &arch_std_events, list) {
+		FOR_ALL_EVENT_STRUCT_FIELDS(FREE_EVENT_FIELD);
+		list_del(&es->list);
+		free(es);
+	}
+}
+
+static int save_arch_std_events(void *data, char *name, char *event,
+				char *desc, char *long_desc, char *pmu,
+				char *unit, char *perpkg, char *metric_expr,
+				char *metric_name, char *metric_group)
+{
+	struct event_struct *es;
+	struct stat *sb = data;
+
+	es = malloc(sizeof(*es));
+	if (!es)
+		return -ENOMEM;
+	memset(es, 0, sizeof(*es));
+	FOR_ALL_EVENT_STRUCT_FIELDS(ADD_EVENT_FIELD);
+	list_add_tail(&es->list, &arch_std_events);
+	return 0;
+out_free:
+	FOR_ALL_EVENT_STRUCT_FIELDS(FREE_EVENT_FIELD);
+	free(es);
+	return -ENOMEM;
+}
+
 static void print_events_table_suffix(FILE *outfp)
 {
 	fprintf(outfp, "{\n");
@@ -392,6 +468,32 @@ static char *real_event(const char *name, char *event)
 	return event;
 }
 
+static int
+try_fixup(const char *fn, char *arch_std, char **event, char **desc,
+	  char **name, char **long_desc, char **pmu, char **filter,
+	  char **perpkg, char **unit, char **metric_expr, char **metric_name,
+	  char **metric_group, unsigned long long eventcode)
+{
+	/* try to find matching event from arch standard values */
+	struct event_struct *es;
+
+	list_for_each_entry(es, &arch_std_events, list) {
+		if (!strcmp(arch_std, es->name)) {
+			if (!eventcode && es->event) {
+				/* allow EventCode to be overridden */
+				free(*event);
+				*event = NULL;
+			}
+			FOR_ALL_EVENT_STRUCT_FIELDS(TRY_FIXUP_FIELD);
+			return 0;
+		}
+	}
+
+	pr_err("%s: could not find matching %s for %s\n",
+					prog, arch_std, fn);
+	return -1;
+}
+
 /* Call func with each event in the json file */
 int json_events(const char *fn,
 	  int (*func)(void *data, char *name, char *event, char *desc,
@@ -427,6 +529,7 @@ int json_events(const char *fn,
 		char *metric_expr = NULL;
 		char *metric_name = NULL;
 		char *metric_group = NULL;
+		char *arch_std = NULL;
 		unsigned long long eventcode = 0;
 		struct msrmap *msr = NULL;
 		jsmntok_t *msrval = NULL;
@@ -512,6 +615,10 @@ int json_events(const char *fn,
 				addfield(map, &metric_expr, "", "", val);
 				for (s = metric_expr; *s; s++)
 					*s = tolower(*s);
+			} else if (json_streq(map, field, "ArchStdEvent")) {
+				addfield(map, &arch_std, "", "", val);
+				for (s = arch_std; *s; s++)
+					*s = tolower(*s);
 			}
 			/* ignore unknown fields */
 		}
@@ -536,8 +643,21 @@ int json_events(const char *fn,
 		if (name)
 			fixname(name);
 
+		if (arch_std) {
+			/*
+			 * An arch standard event is referenced, so try to
+			 * fixup any unassigned values.
+			 */
+			err = try_fixup(fn, arch_std, &event, &desc, &name,
+					&long_desc, &pmu, &filter, &perpkg,
+					&unit, &metric_expr, &metric_name,
+					&metric_group, eventcode);
+			if (err)
+				goto free_strings;
+		}
 		err = func(data, name, real_event(name, event), desc, long_desc,
 			   pmu, unit, perpkg, metric_expr, metric_name, metric_group);
+free_strings:
 		free(event);
 		free(desc);
 		free(name);
@@ -550,6 +670,8 @@ int json_events(const char *fn,
 		free(metric_expr);
 		free(metric_name);
 		free(metric_group);
+		free(arch_std);
+
 		if (err)
 			break;
 		tok += j;
@@ -777,6 +899,32 @@ static int is_leaf_dir(const char *fpath)
 	return res;
 }
 
+static int is_json_file(const char *name)
+{
+	const char *suffix;
+
+	if (strlen(name) < 5)
+		return 0;
+
+	suffix = name + strlen(name) - 5;
+
+	if (strncmp(suffix, ".json", 5) == 0)
+		return 1;
+	return 0;
+}
+
+static int preprocess_arch_std_files(const char *fpath, const struct stat *sb,
+				int typeflag, struct FTW *ftwbuf)
+{
+	int level = ftwbuf->level;
+	int is_file = typeflag == FTW_F;
+
+	if (level == 1 && is_file && is_json_file(fpath))
+		return json_events(fpath, save_arch_std_events, (void *)sb);
+
+	return 0;
+}
+
 static int process_one_file(const char *fpath, const struct stat *sb,
 			    int typeflag, struct FTW *ftwbuf)
 {
@@ -854,9 +1002,7 @@ static int process_one_file(const char *fpath, const struct stat *sb,
 	 * ignore it. It could be a readme.txt for instance.
 	 */
 	if (is_file) {
-		char *suffix = bname + strlen(bname) - 5;
-
-		if (strncmp(suffix, ".json", 5)) {
+		if (!is_json_file(bname)) {
 			pr_info("%s: Ignoring file without .json suffix %s\n", prog,
 				fpath);
 			return 0;
@@ -962,12 +1108,26 @@ int main(int argc, char *argv[])
 
 	maxfds = get_maxfds();
 	mapfile = NULL;
+	rc = nftw(ldirname, preprocess_arch_std_files, maxfds, 0);
+	if (rc && verbose) {
+		pr_info("%s: Error preprocessing arch standard files %s\n",
+			prog, ldirname);
+		goto empty_map;
+	} else if (rc < 0) {
+		/* Make build fail */
+		free_arch_std_events();
+		return 1;
+	} else if (rc) {
+		goto empty_map;
+	}
+
 	rc = nftw(ldirname, process_one_file, maxfds, 0);
 	if (rc && verbose) {
 		pr_info("%s: Error walking file tree %s\n", prog, ldirname);
 		goto empty_map;
 	} else if (rc < 0) {
 		/* Make build fail */
+		free_arch_std_events();
 		return 1;
 	} else if (rc) {
 		goto empty_map;
@@ -992,5 +1152,6 @@ int main(int argc, char *argv[])
 empty_map:
 	fclose(eventsfp);
 	create_empty_mapping(output_file);
+	free_arch_std_events();
 	return 0;
 }
-- 
cgit v1.2.3


From 360b7b03afee042d71ab54ba6ea55daf53edf538 Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Thu, 8 Mar 2018 18:58:33 +0800
Subject: perf vendor events arm64: Add armv8-recommended.json

Add JSON for ARMv8 IMPLEMENTATION DEFINED recommended events.

The JSON is copied from ARMv8 architecture reference manual, available
here:

	https://static.docs.arm.com/ddi0487/ca/DDI0487C_a_armv8_arm.pdf

Originally-from: Shaokun Zhang <zhangshaokun@hisilicon.com>
Signed-off-by: John Garry <john.garry@huawei.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ganapatrao Kulkarni <ganapatrao.kulkarni@cavium.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: William Cohen <wcohen@redhat.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linuxarm@huawei.com
Link: http://lkml.kernel.org/r/1520506716-197429-9-git-send-email-john.garry@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../pmu-events/arch/arm64/armv8-recommended.json   | 452 +++++++++++++++++++++
 1 file changed, 452 insertions(+)
 create mode 100644 tools/perf/pmu-events/arch/arm64/armv8-recommended.json

(limited to 'tools')

diff --git a/tools/perf/pmu-events/arch/arm64/armv8-recommended.json b/tools/perf/pmu-events/arch/arm64/armv8-recommended.json
new file mode 100644
index 000000000000..6328828c018c
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/armv8-recommended.json
@@ -0,0 +1,452 @@
+[
+    {
+        "PublicDescription": "Attributable Level 1 data cache access, read",
+        "EventCode": "0x40",
+        "EventName": "L1D_CACHE_RD",
+        "BriefDescription": "L1D cache access, read"
+    },
+    {
+        "PublicDescription": "Attributable Level 1 data cache access, write",
+        "EventCode": "0x41",
+        "EventName": "L1D_CACHE_WR",
+        "BriefDescription": "L1D cache access, write"
+    },
+    {
+        "PublicDescription": "Attributable Level 1 data cache refill, read",
+        "EventCode": "0x42",
+        "EventName": "L1D_CACHE_REFILL_RD",
+        "BriefDescription": "L1D cache refill, read"
+    },
+    {
+        "PublicDescription": "Attributable Level 1 data cache refill, write",
+        "EventCode": "0x43",
+        "EventName": "L1D_CACHE_REFILL_WR",
+        "BriefDescription": "L1D cache refill, write"
+    },
+    {
+        "PublicDescription": "Attributable Level 1 data cache refill, inner",
+        "EventCode": "0x44",
+        "EventName": "L1D_CACHE_REFILL_INNER",
+        "BriefDescription": "L1D cache refill, inner"
+    },
+    {
+        "PublicDescription": "Attributable Level 1 data cache refill, outer",
+        "EventCode": "0x45",
+        "EventName": "L1D_CACHE_REFILL_OUTER",
+        "BriefDescription": "L1D cache refill, outer"
+    },
+    {
+        "PublicDescription": "Attributable Level 1 data cache Write-Back, victim",
+        "EventCode": "0x46",
+        "EventName": "L1D_CACHE_WB_VICTIM",
+        "BriefDescription": "L1D cache Write-Back, victim"
+    },
+    {
+        "PublicDescription": "Level 1 data cache Write-Back, cleaning and coherency",
+        "EventCode": "0x47",
+        "EventName": "L1D_CACHE_WB_CLEAN",
+        "BriefDescription": "L1D cache Write-Back, cleaning and coherency"
+    },
+    {
+        "PublicDescription": "Attributable Level 1 data cache invalidate",
+        "EventCode": "0x48",
+        "EventName": "L1D_CACHE_INVAL",
+        "BriefDescription": "L1D cache invalidate"
+    },
+    {
+        "PublicDescription": "Attributable Level 1 data TLB refill, read",
+        "EventCode": "0x4C",
+        "EventName": "L1D_TLB_REFILL_RD",
+        "BriefDescription": "L1D tlb refill, read"
+    },
+    {
+        "PublicDescription": "Attributable Level 1 data TLB refill, write",
+        "EventCode": "0x4D",
+        "EventName": "L1D_TLB_REFILL_WR",
+        "BriefDescription": "L1D tlb refill, write"
+    },
+    {
+        "PublicDescription": "Attributable Level 1 data or unified TLB access, read",
+        "EventCode": "0x4E",
+        "EventName": "L1D_TLB_RD",
+        "BriefDescription": "L1D tlb access, read"
+    },
+    {
+        "PublicDescription": "Attributable Level 1 data or unified TLB access, write",
+        "EventCode": "0x4F",
+        "EventName": "L1D_TLB_WR",
+        "BriefDescription": "L1D tlb access, write"
+    },
+    {
+        "PublicDescription": "Attributable Level 2 data cache access, read",
+        "EventCode": "0x50",
+        "EventName": "L2D_CACHE_RD",
+        "BriefDescription": "L2D cache access, read"
+    },
+    {
+        "PublicDescription": "Attributable Level 2 data cache access, write",
+        "EventCode": "0x51",
+        "EventName": "L2D_CACHE_WR",
+        "BriefDescription": "L2D cache access, write"
+    },
+    {
+        "PublicDescription": "Attributable Level 2 data cache refill, read",
+        "EventCode": "0x52",
+        "EventName": "L2D_CACHE_REFILL_RD",
+        "BriefDescription": "L2D cache refill, read"
+    },
+    {
+        "PublicDescription": "Attributable Level 2 data cache refill, write",
+        "EventCode": "0x53",
+        "EventName": "L2D_CACHE_REFILL_WR",
+        "BriefDescription": "L2D cache refill, write"
+    },
+    {
+        "PublicDescription": "Attributable Level 2 data cache Write-Back, victim",
+        "EventCode": "0x56",
+        "EventName": "L2D_CACHE_WB_VICTIM",
+        "BriefDescription": "L2D cache Write-Back, victim"
+    },
+    {
+        "PublicDescription": "Level 2 data cache Write-Back, cleaning and coherency",
+        "EventCode": "0x57",
+        "EventName": "L2D_CACHE_WB_CLEAN",
+        "BriefDescription": "L2D cache Write-Back, cleaning and coherency"
+    },
+    {
+        "PublicDescription": "Attributable Level 2 data cache invalidate",
+        "EventCode": "0x58",
+        "EventName": "L2D_CACHE_INVAL",
+        "BriefDescription": "L2D cache invalidate"
+    },
+    {
+        "PublicDescription": "Attributable Level 2 data or unified TLB refill, read",
+        "EventCode": "0x5c",
+        "EventName": "L2D_TLB_REFILL_RD",
+        "BriefDescription": "L2D cache refill, read"
+    },
+    {
+        "PublicDescription": "Attributable Level 2 data or unified TLB refill, write",
+        "EventCode": "0x5d",
+        "EventName": "L2D_TLB_REFILL_WR",
+        "BriefDescription": "L2D cache refill, write"
+    },
+    {
+        "PublicDescription": "Attributable Level 2 data or unified TLB access, read",
+        "EventCode": "0x5e",
+        "EventName": "L2D_TLB_RD",
+        "BriefDescription": "L2D cache access, read"
+    },
+    {
+        "PublicDescription": "Attributable Level 2 data or unified TLB access, write",
+        "EventCode": "0x5f",
+        "EventName": "L2D_TLB_WR",
+        "BriefDescription": "L2D cache access, write"
+    },
+    {
+        "PublicDescription": "Bus access read",
+        "EventCode": "0x60",
+        "EventName": "BUS_ACCESS_RD",
+        "BriefDescription": "Bus access read"
+   },
+   {
+        "PublicDescription": "Bus access write",
+        "EventCode": "0x61",
+        "EventName": "BUS_ACCESS_WR",
+        "BriefDescription": "Bus access write"
+   }
+   {
+        "PublicDescription": "Bus access, Normal, Cacheable, Shareable",
+        "EventCode": "0x62",
+        "EventName": "BUS_ACCESS_SHARED",
+        "BriefDescription": "Bus access, Normal, Cacheable, Shareable"
+   }
+   {
+        "PublicDescription": "Bus access, not Normal, Cacheable, Shareable",
+        "EventCode": "0x63",
+        "EventName": "BUS_ACCESS_NOT_SHARED",
+        "BriefDescription": "Bus access, not Normal, Cacheable, Shareable"
+   }
+   {
+        "PublicDescription": "Bus access, Normal",
+        "EventCode": "0x64",
+        "EventName": "BUS_ACCESS_NORMAL",
+        "BriefDescription": "Bus access, Normal"
+   }
+   {
+        "PublicDescription": "Bus access, peripheral",
+        "EventCode": "0x65",
+        "EventName": "BUS_ACCESS_PERIPH",
+        "BriefDescription": "Bus access, peripheral"
+   }
+   {
+        "PublicDescription": "Data memory access, read",
+        "EventCode": "0x66",
+        "EventName": "MEM_ACCESS_RD",
+        "BriefDescription": "Data memory access, read"
+   }
+   {
+        "PublicDescription": "Data memory access, write",
+        "EventCode": "0x67",
+        "EventName": "MEM_ACCESS_WR",
+        "BriefDescription": "Data memory access, write"
+   }
+   {
+        "PublicDescription": "Unaligned access, read",
+        "EventCode": "0x68",
+        "EventName": "UNALIGNED_LD_SPEC",
+        "BriefDescription": "Unaligned access, read"
+   }
+   {
+        "PublicDescription": "Unaligned access, write",
+        "EventCode": "0x69",
+        "EventName": "UNALIGNED_ST_SPEC",
+        "BriefDescription": "Unaligned access, write"
+   }
+   {
+        "PublicDescription": "Unaligned access",
+        "EventCode": "0x6a",
+        "EventName": "UNALIGNED_LDST_SPEC",
+        "BriefDescription": "Unaligned access"
+   }
+   {
+        "PublicDescription": "Exclusive operation speculatively executed, LDREX or LDX",
+        "EventCode": "0x6c",
+        "EventName": "LDREX_SPEC",
+        "BriefDescription": "Exclusive operation speculatively executed, LDREX or LDX"
+   }
+   {
+        "PublicDescription": "Exclusive operation speculatively executed, STREX or STX pass",
+        "EventCode": "0x6d",
+        "EventName": "STREX_PASS_SPEC",
+        "BriefDescription": "Exclusive operation speculatively executed, STREX or STX pass"
+   }
+   {
+        "PublicDescription": "Exclusive operation speculatively executed, STREX or STX fail",
+        "EventCode": "0x6e",
+        "EventName": "STREX_FAIL_SPEC",
+        "BriefDescription": "Exclusive operation speculatively executed, STREX or STX fail"
+   }
+   {
+        "PublicDescription": "Exclusive operation speculatively executed, STREX or STX",
+        "EventCode": "0x6f",
+        "EventName": "STREX_SPEC",
+        "BriefDescription": "Exclusive operation speculatively executed, STREX or STX"
+   }
+   {
+        "PublicDescription": "Operation speculatively executed, load",
+        "EventCode": "0x70",
+        "EventName": "LD_SPEC",
+        "BriefDescription": "Operation speculatively executed, load"
+   }
+   {
+        "PublicDescription": "Operation speculatively executed, store"
+        "EventCode": "0x71",
+        "EventName": "ST_SPEC",
+        "BriefDescription": "Operation speculatively executed, store"
+   }
+   {
+        "PublicDescription": "Operation speculatively executed, load or store",
+        "EventCode": "0x72",
+        "EventName": "LDST_SPEC",
+        "BriefDescription": "Operation speculatively executed, load or store"
+   }
+   {
+        "PublicDescription": "Operation speculatively executed, integer data processing",
+        "EventCode": "0x73",
+        "EventName": "DP_SPEC",
+        "BriefDescription": "Operation speculatively executed, integer data processing"
+   }
+   {
+        "PublicDescription": "Operation speculatively executed, Advanced SIMD instruction",
+        "EventCode": "0x74",
+        "EventName": "ASE_SPEC",
+        "BriefDescription": "Operation speculatively executed, Advanced SIMD instruction",
+   }
+   {
+        "PublicDescription": "Operation speculatively executed, floating-point instruction",
+        "EventCode": "0x75",
+        "EventName": "VFP_SPEC",
+        "BriefDescription": "Operation speculatively executed, floating-point instruction"
+   }
+   {
+        "PublicDescription": "Operation speculatively executed, software change of the PC",
+        "EventCode": "0x76",
+        "EventName": "PC_WRITE_SPEC",
+        "BriefDescription": "Operation speculatively executed, software change of the PC"
+   }
+   {
+        "PublicDescription": "Operation speculatively executed, Cryptographic instruction",
+        "EventCode": "0x77",
+        "EventName": "CRYPTO_SPEC",
+        "BriefDescription": "Operation speculatively executed, Cryptographic instruction"
+   }
+   {
+        "PublicDescription": "Branch speculatively executed, immediate branch"
+        "EventCode": "0x78",
+        "EventName": "BR_IMMED_SPEC",
+        "BriefDescription": "Branch speculatively executed, immediate branch"
+   }
+   {
+        "PublicDescription": "Branch speculatively executed, procedure return"
+        "EventCode": "0x79",
+        "EventName": "BR_RETURN_SPEC",
+        "BriefDescription": "Branch speculatively executed, procedure return"
+   }
+   {
+        "PublicDescription": "Branch speculatively executed, indirect branch"
+        "EventCode": "0x7a",
+        "EventName": "BR_INDIRECT_SPEC",
+        "BriefDescription": "Branch speculatively executed, indirect branch"
+   }
+   {
+        "PublicDescription": "Barrier speculatively executed, ISB"
+        "EventCode": "0x7c",
+        "EventName": "ISB_SPEC",
+        "BriefDescription": "Barrier speculatively executed, ISB"
+   }
+   {
+        "PublicDescription": "Barrier speculatively executed, DSB"
+        "EventCode": "0x7d",
+        "EventName": "DSB_SPEC",
+        "BriefDescription": "Barrier speculatively executed, DSB"
+   }
+   {
+        "PublicDescription": "Barrier speculatively executed, DMB"
+        "EventCode": "0x7e",
+        "EventName": "DMB_SPEC",
+        "BriefDescription": "Barrier speculatively executed, DMB"
+   }
+   {
+        "PublicDescription": "Exception taken, Other synchronous"
+        "EventCode": "0x81",
+        "EventName": "EXC_UNDEF",
+        "BriefDescription": "Exception taken, Other synchronous"
+   }
+   {
+        "PublicDescription": "Exception taken, Supervisor Call"
+        "EventCode": "0x82",
+        "EventName": "EXC_SVC",
+        "BriefDescription": "Exception taken, Supervisor Call"
+   }
+   {
+        "PublicDescription": "Exception taken, Instruction Abort"
+        "EventCode": "0x83",
+        "EventName": "EXC_PABORT",
+        "BriefDescription": "Exception taken, Instruction Abort"
+   }
+   {
+        "PublicDescription": "Exception taken, Data Abort and SError"
+        "EventCode": "0x84",
+        "EventName": "EXC_DABORT",
+        "BriefDescription": "Exception taken, Data Abort and SError"
+   }
+   {
+        "PublicDescription": "Exception taken, IRQ"
+        "EventCode": "0x86",
+        "EventName": "EXC_IRQ",
+        "BriefDescription": "Exception taken, IRQ"
+   }
+   {
+        "PublicDescription": "Exception taken, FIQ"
+        "EventCode": "0x87",
+        "EventName": "EXC_FIQ",
+        "BriefDescription": "Exception taken, FIQ"
+   }
+   {
+        "PublicDescription": "Exception taken, Secure Monitor Call"
+        "EventCode": "0x88",
+        "EventName": "EXC_SMC",
+        "BriefDescription": "Exception taken, Secure Monitor Call"
+   }
+   {
+        "PublicDescription": "Exception taken, Hypervisor Call"
+        "EventCode": "0x8a",
+        "EventName": "EXC_HVC",
+        "BriefDescription": "Exception taken, Hypervisor Call"
+   }
+   {
+        "PublicDescription": "Exception taken, Instruction Abort not taken locally"
+        "EventCode": "0x8b",
+        "EventName": "EXC_TRAP_PABORT",
+        "BriefDescription": "Exception taken, Instruction Abort not taken locally"
+   }
+   {
+        "PublicDescription": "Exception taken, Data Abort or SError not taken locally"
+        "EventCode": "0x8c",
+        "EventName": "EXC_TRAP_DABORT",
+        "BriefDescription": "Exception taken, Data Abort or SError not taken locally"
+   }
+   {
+        "PublicDescription": "Exception taken, Other traps not taken locally"
+        "EventCode": "0x8d",
+        "EventName": "EXC_TRAP_OTHER",
+        "BriefDescription": "Exception taken, Other traps not taken locally"
+   }
+   {
+        "PublicDescription": "Exception taken, IRQ not taken locally"
+        "EventCode": "0x8e",
+        "EventName": "EXC_TRAP_IRQ",
+        "BriefDescription": "Exception taken, IRQ not taken locally"
+   }
+   {
+        "PublicDescription": "Exception taken, FIQ not taken locally"
+        "EventCode": "0x8f",
+        "EventName": "EXC_TRAP_FIQ",
+        "BriefDescription": "Exception taken, FIQ not taken locally"
+   }
+   {
+        "PublicDescription": "Release consistency operation speculatively executed, Load-Acquire"
+        "EventCode": "0x90",
+        "EventName": "RC_LD_SPEC",
+        "BriefDescription": "Release consistency operation speculatively executed, Load-Acquire"
+   }
+   {
+        "PublicDescription": "Release consistency operation speculatively executed, Store-Release"
+        "EventCode": "0x91",
+        "EventName": "RC_ST_SPEC",
+        "BriefDescription": "Release consistency operation speculatively executed, Store-Release"
+   }
+   {
+        "PublicDescription": "Attributable Level 3 data or unified cache access, read"
+        "EventCode": "0xa0",
+        "EventName": "L3D_CACHE_RD",
+        "BriefDescription": "Attributable Level 3 data or unified cache access, read"
+   }
+   {
+        "PublicDescription": "Attributable Level 3 data or unified cache access, write"
+        "EventCode": "0xa1",
+        "EventName": "L3D_CACHE_WR",
+        "BriefDescription": "Attributable Level 3 data or unified cache access, write"
+   }
+   {
+        "PublicDescription": "Attributable Level 3 data or unified cache refill, read"
+        "EventCode": "0xa2",
+        "EventName": "L3D_CACHE_REFILL_RD",
+        "BriefDescription": "Attributable Level 3 data or unified cache refill, read"
+   }
+   {
+        "PublicDescription": "Attributable Level 3 data or unified cache refill, write"
+        "EventCode": "0xa3",
+        "EventName": "L3D_CACHE_REFILL_WR",
+        "BriefDescription": "Attributable Level 3 data or unified cache refill, write"
+   }
+   {
+        "PublicDescription": "Attributable Level 3 data or unified cache Write-Back, victim"
+        "EventCode": "0xa6",
+        "EventName": "L3D_CACHE_WB_VICTIM",
+        "BriefDescription": "Attributable Level 3 data or unified cache Write-Back, victim"
+   }
+   {
+        "PublicDescription": "Attributable Level 3 data or unified cache Write-Back, cache clean"
+        "EventCode": "0xa7",
+        "EventName": "L3D_CACHE_WB_CLEAN",
+        "BriefDescription": "Attributable Level 3 data or unified cache Write-Back, cache clean"
+   }
+   {
+        "PublicDescription": "Attributable Level 3 data or unified cache access, invalidate"
+        "EventCode": "0xa8",
+        "EventName": "L3D_CACHE_INVAL",
+        "BriefDescription": "Attributable Level 3 data or unified cache access, invalidate"
+   }
+]
-- 
cgit v1.2.3


From ae43053bd2595dc98f0909505dc1d7e1ed8bd239 Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Thu, 8 Mar 2018 18:58:34 +0800
Subject: perf vendor events arm64: Fixup ThunderX2 to use recommended events

This patch fixes the Cavium ThunderX2 JSON to use event definitions from
the ARMv8 recommended events.

Signed-off-by: John Garry <john.garry@huawei.com>
Tested-by: Ganapatrao Kulkarni <ganapatrao.kulkarni@cavium.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Shaokun Zhang <zhangshaokun@hisilicon.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: William Cohen <wcohen@redhat.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linuxarm@huawei.com
Link: http://lkml.kernel.org/r/1520506716-197429-10-git-send-email-john.garry@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../arch/arm64/cavium/thunderx2/core-imp-def.json  | 50 +++++-----------------
 1 file changed, 10 insertions(+), 40 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/pmu-events/arch/arm64/cavium/thunderx2/core-imp-def.json b/tools/perf/pmu-events/arch/arm64/cavium/thunderx2/core-imp-def.json
index 2db45c40ebc7..bc03c06c3918 100644
--- a/tools/perf/pmu-events/arch/arm64/cavium/thunderx2/core-imp-def.json
+++ b/tools/perf/pmu-events/arch/arm64/cavium/thunderx2/core-imp-def.json
@@ -1,62 +1,32 @@
 [
     {
-        "PublicDescription": "Attributable Level 1 data cache access, read",
-        "EventCode": "0x40",
-        "EventName": "l1d_cache_rd",
-        "BriefDescription": "L1D cache read",
+        "ArchStdEvent": "L1D_CACHE_RD",
     },
     {
-        "PublicDescription": "Attributable Level 1 data cache access, write ",
-        "EventCode": "0x41",
-        "EventName": "l1d_cache_wr",
-        "BriefDescription": "L1D cache write",
+        "ArchStdEvent": "L1D_CACHE_WR",
     },
     {
-        "PublicDescription": "Attributable Level 1 data cache refill, read",
-        "EventCode": "0x42",
-        "EventName": "l1d_cache_refill_rd",
-        "BriefDescription": "L1D cache refill read",
+        "ArchStdEvent": "L1D_CACHE_REFILL_RD",
     },
     {
-        "PublicDescription": "Attributable Level 1 data cache refill, write",
-        "EventCode": "0x43",
-        "EventName": "l1d_cache_refill_wr",
-        "BriefDescription": "L1D refill write",
+        "ArchStdEvent": "L1D_CACHE_REFILL_WR",
     },
     {
-        "PublicDescription": "Attributable Level 1 data TLB refill, read",
-        "EventCode": "0x4C",
-        "EventName": "l1d_tlb_refill_rd",
-        "BriefDescription": "L1D tlb refill read",
+        "ArchStdEvent": "L1D_TLB_REFILL_RD",
     },
     {
-        "PublicDescription": "Attributable Level 1 data TLB refill, write",
-        "EventCode": "0x4D",
-        "EventName": "l1d_tlb_refill_wr",
-        "BriefDescription": "L1D tlb refill write",
+        "ArchStdEvent": "L1D_TLB_REFILL_WR",
     },
     {
-        "PublicDescription": "Attributable Level 1 data or unified TLB access, read",
-        "EventCode": "0x4E",
-        "EventName": "l1d_tlb_rd",
-        "BriefDescription": "L1D tlb read",
+        "ArchStdEvent": "L1D_TLB_RD",
     },
     {
-        "PublicDescription": "Attributable Level 1 data or unified TLB access, write",
-        "EventCode": "0x4F",
-        "EventName": "l1d_tlb_wr",
-        "BriefDescription": "L1D tlb write",
+        "ArchStdEvent": "L1D_TLB_WR",
     },
     {
-        "PublicDescription": "Bus access read",
-        "EventCode": "0x60",
-        "EventName": "bus_access_rd",
-        "BriefDescription": "Bus access read",
+        "ArchStdEvent": "BUS_ACCESS_RD",
    },
    {
-        "PublicDescription": "Bus access write",
-        "EventCode": "0x61",
-        "EventName": "bus_access_wr",
-        "BriefDescription": "Bus access write",
+        "ArchStdEvent": "BUS_ACCESS_WR",
    }
 ]
-- 
cgit v1.2.3


From afe4d089621d4d90ac0e089b83752ea4515325ac Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Thu, 8 Mar 2018 18:58:35 +0800
Subject: perf vendor events arm64: fixup A53 to use recommended events

This patch fixes the ARM Cortex-A53 json to use event definition from
the ARMv8 recommended events.

In addition to this change, other changes were made:

- remove stray ','
- remove mirrored events in memory.json and bus.json
- fixed indentation to be consistent with other ARM
  JSONs

Signed-off-by: John Garry <john.garry@huawei.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ganapatrao Kulkarni <ganapatrao.kulkarni@cavium.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Shaokun Zhang <zhangshaokun@hisilicon.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: William Cohen <wcohen@redhat.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linuxarm@huawei.com
Link: http://lkml.kernel.org/r/1520506716-197429-11-git-send-email-john.garry@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../arch/arm64/arm/cortex-a53/branch.json          | 14 +++----
 .../pmu-events/arch/arm64/arm/cortex-a53/bus.json  | 22 ++---------
 .../arch/arm64/arm/cortex-a53/cache.json           | 40 ++++++++++----------
 .../arch/arm64/arm/cortex-a53/memory.json          | 14 +------
 .../arch/arm64/arm/cortex-a53/other.json           | 44 ++++++++++------------
 .../arch/arm64/arm/cortex-a53/pipeline.json        | 20 +++++-----
 6 files changed, 62 insertions(+), 92 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/branch.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/branch.json
index 3b6208763e50..0b0e6b26605b 100644
--- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/branch.json
+++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/branch.json
@@ -1,25 +1,23 @@
 [
-  {,
-    "EventCode": "0x7A",
-    "EventName": "BR_INDIRECT_SPEC",
-    "BriefDescription": "Branch speculatively executed - Indirect branch"
+  {
+    "ArchStdEvent":  "BR_INDIRECT_SPEC",
   },
-  {,
+  {
     "EventCode": "0xC9",
     "EventName": "BR_COND",
     "BriefDescription": "Conditional branch executed"
   },
-  {,
+  {
     "EventCode": "0xCA",
     "EventName": "BR_INDIRECT_MISPRED",
     "BriefDescription": "Indirect branch mispredicted"
   },
-  {,
+  {
     "EventCode": "0xCB",
     "EventName": "BR_INDIRECT_MISPRED_ADDR",
     "BriefDescription": "Indirect branch mispredicted because of address miscompare"
   },
-  {,
+  {
     "EventCode": "0xCC",
     "EventName": "BR_COND_MISPRED",
     "BriefDescription": "Conditional branch mispredicted"
diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/bus.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/bus.json
index 480d9f7460ab..ce33b2553277 100644
--- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/bus.json
+++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/bus.json
@@ -1,22 +1,8 @@
 [
-  {,
-    "EventCode": "0x60",
-    "EventName": "BUS_ACCESS_LD",
-    "BriefDescription": "Bus access - Read"
+  {
+        "ArchStdEvent": "BUS_ACCESS_RD",
   },
-  {,
-    "EventCode": "0x61",
-    "EventName": "BUS_ACCESS_ST",
-    "BriefDescription": "Bus access - Write"
-  },
-  {,
-    "EventCode": "0xC0",
-    "EventName": "EXT_MEM_REQ",
-    "BriefDescription": "External memory request"
-  },
-  {,
-    "EventCode": "0xC1",
-    "EventName": "EXT_MEM_REQ_NC",
-    "BriefDescription": "Non-cacheable external memory request"
+  {
+        "ArchStdEvent": "BUS_ACCESS_WR",
   }
 ]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/cache.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/cache.json
index 11baad6344b9..5dfbec43c9f9 100644
--- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/cache.json
+++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/cache.json
@@ -1,27 +1,27 @@
 [
-  {,
-    "EventCode": "0xC2",
-    "EventName": "PREFETCH_LINEFILL",
-    "BriefDescription": "Linefill because of prefetch"
+  {
+        "EventCode": "0xC2",
+        "EventName": "PREFETCH_LINEFILL",
+        "BriefDescription": "Linefill because of prefetch"
   },
-  {,
-    "EventCode": "0xC3",
-    "EventName": "PREFETCH_LINEFILL_DROP",
-    "BriefDescription": "Instruction Cache Throttle occurred"
+  {
+        "EventCode": "0xC3",
+        "EventName": "PREFETCH_LINEFILL_DROP",
+        "BriefDescription": "Instruction Cache Throttle occurred"
   },
-  {,
-    "EventCode": "0xC4",
-    "EventName": "READ_ALLOC_ENTER",
-    "BriefDescription": "Entering read allocate mode"
+  {
+        "EventCode": "0xC4",
+        "EventName": "READ_ALLOC_ENTER",
+        "BriefDescription": "Entering read allocate mode"
   },
-  {,
-    "EventCode": "0xC5",
-    "EventName": "READ_ALLOC",
-    "BriefDescription": "Read allocate mode"
+  {
+        "EventCode": "0xC5",
+        "EventName": "READ_ALLOC",
+        "BriefDescription": "Read allocate mode"
   },
-  {,
-    "EventCode": "0xC8",
-    "EventName": "EXT_SNOOP",
-    "BriefDescription": "SCU Snooped data from another CPU for this CPU"
+  {
+        "EventCode": "0xC8",
+        "EventName": "EXT_SNOOP",
+        "BriefDescription": "SCU Snooped data from another CPU for this CPU"
   }
 ]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/memory.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/memory.json
index 480d9f7460ab..25ae642ba381 100644
--- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/memory.json
+++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/memory.json
@@ -1,20 +1,10 @@
 [
-  {,
-    "EventCode": "0x60",
-    "EventName": "BUS_ACCESS_LD",
-    "BriefDescription": "Bus access - Read"
-  },
-  {,
-    "EventCode": "0x61",
-    "EventName": "BUS_ACCESS_ST",
-    "BriefDescription": "Bus access - Write"
-  },
-  {,
+  {
     "EventCode": "0xC0",
     "EventName": "EXT_MEM_REQ",
     "BriefDescription": "External memory request"
   },
-  {,
+  {
     "EventCode": "0xC1",
     "EventName": "EXT_MEM_REQ_NC",
     "BriefDescription": "Non-cacheable external memory request"
diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/other.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/other.json
index 73a22402d003..6cc6cbd7bf0b 100644
--- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/other.json
+++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/other.json
@@ -1,32 +1,28 @@
 [
-  {,
-    "EventCode": "0x86",
-    "EventName": "EXC_IRQ",
-    "BriefDescription": "Exception taken, IRQ"
+  {
+        "ArchStdEvent": "EXC_IRQ",
   },
-  {,
-    "EventCode": "0x87",
-    "EventName": "EXC_FIQ",
-    "BriefDescription": "Exception taken, FIQ"
+  {
+        "ArchStdEvent": "EXC_FIQ",
   },
-  {,
-    "EventCode": "0xC6",
-    "EventName": "PRE_DECODE_ERR",
-    "BriefDescription": "Pre-decode error"
+  {
+        "EventCode": "0xC6",
+        "EventName": "PRE_DECODE_ERR",
+        "BriefDescription": "Pre-decode error"
   },
-  {,
-    "EventCode": "0xD0",
-    "EventName": "L1I_CACHE_ERR",
-    "BriefDescription": "L1 Instruction Cache (data or tag) memory error"
+  {
+        "EventCode": "0xD0",
+        "EventName": "L1I_CACHE_ERR",
+        "BriefDescription": "L1 Instruction Cache (data or tag) memory error"
   },
-  {,
-    "EventCode": "0xD1",
-    "EventName": "L1D_CACHE_ERR",
-    "BriefDescription": "L1 Data Cache (data, tag or dirty) memory error, correctable or non-correctable"
+  {
+        "EventCode": "0xD1",
+        "EventName": "L1D_CACHE_ERR",
+        "BriefDescription": "L1 Data Cache (data, tag or dirty) memory error, correctable or non-correctable"
   },
-  {,
-    "EventCode": "0xD2",
-    "EventName": "TLB_ERR",
-    "BriefDescription": "TLB memory error"
+  {
+        "EventCode": "0xD2",
+        "EventName": "TLB_ERR",
+        "BriefDescription": "TLB memory error"
   }
 ]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/pipeline.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/pipeline.json
index 3149fb90555a..f45a6b5d0025 100644
--- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/pipeline.json
+++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/pipeline.json
@@ -1,50 +1,50 @@
 [
-  {,
+  {
     "EventCode": "0xC7",
     "EventName": "STALL_SB_FULL",
     "BriefDescription": "Data Write operation that stalls the pipeline because the store buffer is full"
   },
-  {,
+  {
     "EventCode": "0xE0",
     "EventName": "OTHER_IQ_DEP_STALL",
     "BriefDescription": "Cycles that the DPU IQ is empty and that is not because of a recent micro-TLB miss, instruction cache miss or pre-decode error"
   },
-  {,
+  {
     "EventCode": "0xE1",
     "EventName": "IC_DEP_STALL",
     "BriefDescription": "Cycles the DPU IQ is empty and there is an instruction cache miss being processed"
   },
-  {,
+  {
     "EventCode": "0xE2",
     "EventName": "IUTLB_DEP_STALL",
     "BriefDescription": "Cycles the DPU IQ is empty and there is an instruction micro-TLB miss being processed"
   },
-  {,
+  {
     "EventCode": "0xE3",
     "EventName": "DECODE_DEP_STALL",
     "BriefDescription": "Cycles the DPU IQ is empty and there is a pre-decode error being processed"
   },
-  {,
+  {
     "EventCode": "0xE4",
     "EventName": "OTHER_INTERLOCK_STALL",
     "BriefDescription": "Cycles there is an interlock other than  Advanced SIMD/Floating-point instructions or load/store instruction"
   },
-  {,
+  {
     "EventCode": "0xE5",
     "EventName": "AGU_DEP_STALL",
     "BriefDescription": "Cycles there is an interlock for a load/store instruction waiting for data to calculate the address in the AGU"
   },
-  {,
+  {
     "EventCode": "0xE6",
     "EventName": "SIMD_DEP_STALL",
     "BriefDescription": "Cycles there is an interlock for an Advanced SIMD/Floating-point operation."
   },
-  {,
+  {
     "EventCode": "0xE7",
     "EventName": "LD_DEP_STALL",
     "BriefDescription": "Cycles there is a stall in the Wr stage because of a load miss"
   },
-  {,
+  {
     "EventCode": "0xE8",
     "EventName": "ST_DEP_STALL",
     "BriefDescription": "Cycles there is a stall in the Wr stage because of a store"
-- 
cgit v1.2.3


From 3d4caec1600e0bf34600a7b700599a20df03629e Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Thu, 8 Mar 2018 18:58:36 +0800
Subject: perf vendor events arm64: add HiSilicon hip08 JSON file

This patch adds the HiSilicon hip08 JSON file. This platform follows the
ARMv8 recommended IMPLEMENTATION DEFINED events, where applicable.

Signed-off-by: John Garry <john.garry@huawei.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ganapatrao Kulkarni <ganapatrao.kulkarni@cavium.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Shaokun Zhang <zhangshaokun@hisilicon.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: William Cohen <wcohen@redhat.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linuxarm@huawei.com
Link: http://lkml.kernel.org/r/1520506716-197429-12-git-send-email-john.garry@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../arch/arm64/hisilicon/hip08/core-imp-def.json   | 122 +++++++++++++++++++++
 tools/perf/pmu-events/arch/arm64/mapfile.csv       |   1 +
 2 files changed, 123 insertions(+)
 create mode 100644 tools/perf/pmu-events/arch/arm64/hisilicon/hip08/core-imp-def.json

(limited to 'tools')

diff --git a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/core-imp-def.json b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/core-imp-def.json
new file mode 100644
index 000000000000..9f0f15d15f75
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/core-imp-def.json
@@ -0,0 +1,122 @@
+[
+    {
+        "ArchStdEvent": "L1D_CACHE_RD",
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_WR",
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_REFILL_RD",
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_REFILL_WR",
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_WB_VICTIM",
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_WB_CLEAN",
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_INVAL",
+    },
+    {
+        "ArchStdEvent": "L1D_TLB_REFILL_RD",
+    },
+    {
+        "ArchStdEvent": "L1D_TLB_REFILL_WR",
+    },
+    {
+        "ArchStdEvent": "L1D_TLB_RD",
+    },
+    {
+        "ArchStdEvent": "L1D_TLB_WR",
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_RD",
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_WR",
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_REFILL_RD",
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_REFILL_WR",
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_WB_VICTIM",
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_WB_CLEAN",
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_INVAL",
+    },
+    {
+        "PublicDescription": "Level 1 instruction cache prefetch access count",
+        "EventCode": "0x102e",
+        "EventName": "L1I_CACHE_PRF",
+        "BriefDescription": "L1I cache prefetch access count",
+    },
+    {
+        "PublicDescription": "Level 1 instruction cache miss due to prefetch access count",
+        "EventCode": "0x102f",
+        "EventName": "L1I_CACHE_PRF_REFILL",
+        "BriefDescription": "L1I cache miss due to prefetch access count",
+    },
+    {
+        "PublicDescription": "Instruction queue is empty",
+        "EventCode": "0x1043",
+        "EventName": "IQ_IS_EMPTY",
+        "BriefDescription": "Instruction queue is empty",
+    },
+    {
+        "PublicDescription": "Instruction fetch stall cycles",
+        "EventCode": "0x1044",
+        "EventName": "IF_IS_STALL",
+        "BriefDescription": "Instruction fetch stall cycles",
+    },
+    {
+        "PublicDescription": "Instructions can receive, but not send",
+        "EventCode": "0x2014",
+        "EventName": "FETCH_BUBBLE",
+        "BriefDescription": "Instructions can receive, but not send",
+    },
+    {
+        "PublicDescription": "Prefetch request from LSU",
+        "EventCode": "0x6013",
+        "EventName": "PRF_REQ",
+        "BriefDescription": "Prefetch request from LSU",
+    },
+    {
+        "PublicDescription": "Hit on prefetched data",
+        "EventCode": "0x6014",
+        "EventName": "HIT_ON_PRF",
+        "BriefDescription": "Hit on prefetched data",
+    },
+    {
+        "PublicDescription": "Cycles of that the number of issuing micro operations are less than 4",
+        "EventCode": "0x7001",
+        "EventName": "EXE_STALL_CYCLE",
+        "BriefDescription": "Cycles of that the number of issue ups are less than 4",
+    },
+    {
+        "PublicDescription": "No any micro operation is issued and meanwhile any load operation is not resolved",
+        "EventCode": "0x7004",
+        "EventName": "MEM_STALL_ANYLOAD",
+        "BriefDescription": "No any micro operation is issued and meanwhile any load operation is not resolved",
+    },
+    {
+        "PublicDescription": "No any micro operation is issued and meanwhile there is any load operation missing L1 cache and pending data refill",
+        "EventCode": "0x7006",
+        "EventName": "MEM_STALL_L1MISS",
+        "BriefDescription": "No any micro operation is issued and meanwhile there is any load operation missing L1 cache and pending data refill",
+    },
+    {
+        "PublicDescription": "No any micro operation is issued and meanwhile there is any load operation missing both L1 and L2 cache and pending data refill from L3 cache",
+        "EventCode": "0x7007",
+        "EventName": "MEM_STALL_L2MISS",
+        "BriefDescription": "No any micro operation is issued and meanwhile there is any load operation missing both L1 and L2 cache and pending data refill from L3 cache",
+    },
+]
diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv
index cf14e23b6404..8f11aeb003a9 100644
--- a/tools/perf/pmu-events/arch/arm64/mapfile.csv
+++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv
@@ -14,3 +14,4 @@
 #Family-model,Version,Filename,EventType
 0x00000000410fd03[[:xdigit:]],v1,arm/cortex-a53,core
 0x00000000420f5160,v1,cavium/thunderx2,core
+0x00000000480fd010,v1,hisilicon/hip08,core
-- 
cgit v1.2.3


From fca32340a5e8b896f57d41fd94b8b1701df25eb1 Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.vnet.ibm.com>
Date: Thu, 8 Mar 2018 15:57:35 +0100
Subject: perf stat: Fix core dump when flag T is used

Executing command 'perf stat -T -- ls' dumps core on x86 and s390.

Here is the call back chain (done on x86):

 # gdb ./perf
 ....
 (gdb) r stat -T -- ls
...
Program received signal SIGSEGV, Segmentation fault.
0x00007ffff56d1963 in vasprintf () from /lib64/libc.so.6
(gdb) where
 #0  0x00007ffff56d1963 in vasprintf () from /lib64/libc.so.6
 #1  0x00007ffff56ae484 in asprintf () from /lib64/libc.so.6
 #2  0x00000000004f1982 in __parse_events_add_pmu (parse_state=0x7fffffffd580,
    list=0xbfb970, name=0xbf3ef0 "cpu",
    head_config=0xbfb930, auto_merge_stats=false) at util/parse-events.c:1233
 #3  0x00000000004f1c8e in parse_events_add_pmu (parse_state=0x7fffffffd580,
    list=0xbfb970, name=0xbf3ef0 "cpu",
    head_config=0xbfb930) at util/parse-events.c:1288
 #4  0x0000000000537ce3 in parse_events_parse (_parse_state=0x7fffffffd580,
    scanner=0xbf4210) at util/parse-events.y:234
 #5  0x00000000004f2c7a in parse_events__scanner (str=0x6b66c0
    "task-clock,{instructions,cycles,cpu/cycles-t/,cpu/tx-start/}",
    parse_state=0x7fffffffd580, start_token=258) at util/parse-events.c:1673
 #6  0x00000000004f2e23 in parse_events (evlist=0xbe9990, str=0x6b66c0
    "task-clock,{instructions,cycles,cpu/cycles-t/,cpu/tx-start/}", err=0x0)
    at util/parse-events.c:1713
 #7  0x000000000044e137 in add_default_attributes () at builtin-stat.c:2281
 #8  0x000000000044f7b5 in cmd_stat (argc=1, argv=0x7fffffffe3b0) at
    builtin-stat.c:2828
 #9  0x00000000004c8b0f in run_builtin (p=0xab01a0 <commands+288>, argc=4,
    argv=0x7fffffffe3b0) at perf.c:297
 #10 0x00000000004c8d7c in handle_internal_command (argc=4,
    argv=0x7fffffffe3b0) at perf.c:349
 #11 0x00000000004c8ece in run_argv (argcp=0x7fffffffe20c,
   argv=0x7fffffffe200) at perf.c:393
 #12 0x00000000004c929c in main (argc=4, argv=0x7fffffffe3b0) at perf.c:537
(gdb)

It turns out that a NULL pointer is referenced. Here are the
function calls:

  ...
  cmd_stat()
  +---> add_default_attributes()
	+---> parse_events(evsel_list, transaction_attrs, NULL);
	             3rd parameter set to NULL

Function parse_events(xx, xx, struct parse_events_error *err) dives
into a bison generated scanner and creates
parser state information for it first:

   struct parse_events_state parse_state = {
                .list   = LIST_HEAD_INIT(parse_state.list),
                .idx    = evlist->nr_entries,
                .error  = err,   <--- NULL POINTER !!!
                .evlist = evlist,
        };

Now various functions inside the bison scanner are called to end up in
__parse_events_add_pmu(struct parse_events_state *parse_state, ..) with
first parameter being a pointer to above structure definition.

Now the PMU event name is not found (because being executed in a VM) and
this function tries to create an error message with

   asprintf(&parse_state->error.str, ....)

which references a NULL pointer and dumps core.

Fix this by providing a pointer to the necessary error information
instead of NULL. Technically only the else part is needed to avoid the
core dump, just lets be safe...

Signed-off-by: Thomas Richter <tmricht@linux.vnet.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Link: http://lkml.kernel.org/r/20180308145735.64717-1-tmricht@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 0fa9ea3a6d92..f5c454855908 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -2331,11 +2331,16 @@ static int add_default_attributes(void)
 		return 0;
 
 	if (transaction_run) {
+		struct parse_events_error errinfo;
+
 		if (pmu_have_event("cpu", "cycles-ct") &&
 		    pmu_have_event("cpu", "el-start"))
-			err = parse_events(evsel_list, transaction_attrs, NULL);
+			err = parse_events(evsel_list, transaction_attrs,
+					   &errinfo);
 		else
-			err = parse_events(evsel_list, transaction_limited_attrs, NULL);
+			err = parse_events(evsel_list,
+					   transaction_limited_attrs,
+					   &errinfo);
 		if (err) {
 			fprintf(stderr, "Cannot set up transaction events\n");
 			return -1;
-- 
cgit v1.2.3


From 39ce7fb31530c6d4648919e03e16c5e9286a5940 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 7 Mar 2018 16:24:30 +0100
Subject: perf report: Show zero counters as well in 'perf report --stat'

When recently using 'perf report --stat' it was not clear to me from the
output whether a particular statistics field (LOST_SAMPLES) was not
present, or just zero:

  fomalhaut:~> perf report --stat

  Aggregated stats:
           TOTAL events:     495984
            MMAP events:         85
            COMM events:       3389
            EXIT events:       1605
        THROTTLE events:          2
      UNTHROTTLE events:          2
            FORK events:       3377
          SAMPLE events:     472629
           MMAP2 events:      14753
  FINISHED_ROUND events:        139
      THREAD_MAP events:          1
         CPU_MAP events:          1
       TIME_CONV events:          1

I had to check the output several times to ascertain that I'm not
misreading the output, that the field didn't change and that I didn't
misremember the name. In fact I had to look into the perf source to make
sure that zero fields are indeed not shown.

With the patch applied:

  fomalhaut:~> perf report --stat

  Aggregated stats:
           TOTAL events:     495984
            MMAP events:         85
            LOST events:          0
            COMM events:       3389
            EXIT events:       1605
        THROTTLE events:          2
      UNTHROTTLE events:          2
            FORK events:       3377
            READ events:          0
          SAMPLE events:     472629
           MMAP2 events:      14753
             AUX events:          0
    ITRACE_START events:          0
    LOST_SAMPLES events:          0
          SWITCH events:          0
 SWITCH_CPU_WIDE events:          0
      NAMESPACES events:          0
            ATTR events:          0
      EVENT_TYPE events:          0
    TRACING_DATA events:          0
        BUILD_ID events:          0
  FINISHED_ROUND events:        139
        ID_INDEX events:          0
   AUXTRACE_INFO events:          0
        AUXTRACE events:          0
  AUXTRACE_ERROR events:          0
      THREAD_MAP events:          1
         CPU_MAP events:          1
     STAT_CONFIG events:          0
            STAT events:          0
      STAT_ROUND events:          0
    EVENT_UPDATE events:          0
       TIME_CONV events:          1
         FEATURE events:          0

It's pretty clear at a glance that LOST_SAMPLES is present but zero.

The original output can still be gotten via:

  fomalhaut:~> perf report --stat | grep -vw 0

  Aggregated stats:
           TOTAL events:     495984
            MMAP events:         85
            COMM events:       3389
            EXIT events:       1605
        THROTTLE events:          2
      UNTHROTTLE events:          2
            FORK events:       3377
          SAMPLE events:     472629
           MMAP2 events:      14753
  FINISHED_ROUND events:        139
      THREAD_MAP events:          1
         CPU_MAP events:          1
       TIME_CONV events:          1

So I don't think there's any real loss in functionality.

Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/20180307152430.7e5h7e657b7bgd7q@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/stdio/hist.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index 25dd1e0ecc58..6832fcb2e6ff 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -840,15 +840,11 @@ size_t events_stats__fprintf(struct events_stats *stats, FILE *fp)
 	for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) {
 		const char *name;
 
-		if (stats->nr_events[i] == 0)
-			continue;
-
 		name = perf_event__name(i);
 		if (!strcmp(name, "UNKNOWN"))
 			continue;
 
-		ret += fprintf(fp, "%16s events: %10d\n", name,
-			       stats->nr_events[i]);
+		ret += fprintf(fp, "%16s events: %10d\n", name, stats->nr_events[i]);
 	}
 
 	return ret;
-- 
cgit v1.2.3


From a8685f088819d21cd5aea5de4c184de427c3625d Mon Sep 17 00:00:00 2001
From: Ganapatrao Kulkarni <ganapatrao.kulkarni@cavium.com>
Date: Wed, 7 Mar 2018 16:38:03 +0530
Subject: perf vendor events arm64: Enable JSON events for ThunderX2 B0

There is MIDR change on ThunderX2 B0, adding an entry to mapfile to
enable JSON events for B0.

Signed-off-by: Ganapatrao Kulkarni <ganapatrao.kulkarni@cavium.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ganapatrao Kulkarni <gpkulkarni@gklkml16.com>
Cc: Jayachandran C <jnair@caviumnetworks.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: John Garry <john.garry@huawei.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@cavium.com>
Cc: William Cohen <wcohen@redhat.com>
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/20180307110803.32418-1-ganapatrao.kulkarni@cavium.com
[ Fixup wrt recent patchset by John Garry ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/pmu-events/arch/arm64/mapfile.csv | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv
index 8f11aeb003a9..f03e26ecb658 100644
--- a/tools/perf/pmu-events/arch/arm64/mapfile.csv
+++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv
@@ -14,4 +14,5 @@
 #Family-model,Version,Filename,EventType
 0x00000000410fd03[[:xdigit:]],v1,arm/cortex-a53,core
 0x00000000420f5160,v1,cavium/thunderx2,core
+0x00000000430f0af0,v1,cavium/thunderx2,core
 0x00000000480fd010,v1,hisilicon/hip08,core
-- 
cgit v1.2.3


From 3d20c6246690219881786de10d2dda93f616d0ac Mon Sep 17 00:00:00 2001
From: Martin Vuille <jpmv27@aim.com>
Date: Sun, 11 Feb 2018 16:24:20 -0500
Subject: perf unwind: Unwind with libdw doesn't take symfs into account

Path passed to libdw for unwinding doesn't include symfs path
if specified, so unwinding fails because ELF file is not found.

Similar to unwinding with libunwind, pass symsrc_filename instead
of long_name. If there is no symsrc_filename, fallback to long_name.

Signed-off-by: Martin Vuille <jpmv27@aim.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/20180211212420.18388-1-jpmv27@aim.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/unwind-libdw.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c
index 1e9c974faf67..8e969f28cc59 100644
--- a/tools/perf/util/unwind-libdw.c
+++ b/tools/perf/util/unwind-libdw.c
@@ -50,7 +50,7 @@ static int __report_module(struct addr_location *al, u64 ip,
 
 	if (!mod)
 		mod = dwfl_report_elf(ui->dwfl, dso->short_name,
-				      dso->long_name, -1, al->map->start,
+				      (dso->symsrc_filename ? dso->symsrc_filename : dso->long_name), -1, al->map->start,
 				      false);
 
 	return mod && dwfl_addrmodule(ui->dwfl, ip) == mod ? 0 : -1;
-- 
cgit v1.2.3


From cff17205d6bd363703034510a84d66044aff176e Mon Sep 17 00:00:00 2001
From: Yisheng Xie <xieyisheng1@huawei.com>
Date: Mon, 12 Mar 2018 19:25:57 +0800
Subject: perf record: Avoid duplicate call of perf_default_config()

We have brought perf_default_config to the very beginning at main(), so
it no need to call perf_default_config() once more for most of config in
perf-record but only for record.call-graph.

Signed-off-by: Yisheng Xie <xieyisheng1@huawei.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1520853957-36106-2-git-send-email-xieyisheng1@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-record.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index b81494587120..d33103291b02 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1279,10 +1279,12 @@ static int perf_record_config(const char *var, const char *value, void *cb)
 			return -1;
 		return 0;
 	}
-	if (!strcmp(var, "record.call-graph"))
-		var = "call-graph.record-mode"; /* fall-through */
+	if (!strcmp(var, "record.call-graph")) {
+		var = "call-graph.record-mode";
+		return perf_default_config(var, value, cb);
+	}
 
-	return perf_default_config(var, value, cb);
+	return 0;
 }
 
 struct clockid_map {
-- 
cgit v1.2.3


From a3a4a3b37c9b911af4c375b2475cea0fd2b84d38 Mon Sep 17 00:00:00 2001
From: Yisheng Xie <xieyisheng1@huawei.com>
Date: Mon, 12 Mar 2018 19:25:56 +0800
Subject: perf top: Fix top.call-graph config option reading

When trying to add the "call-graph" variable for top into the
.perfconfig file, like:

      [top]
            call-graph = fp

I that perf_top_config() do not parse this variable.

Fix it by calling perf_default_config() when the top.call-graph variable
is set.

Signed-off-by: Yisheng Xie <xieyisheng1@huawei.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Fixes: b8cbb349061e ("perf config: Bring perf_default_config to the very beginning at main()")
Link: http://lkml.kernel.org/r/1520853957-36106-1-git-send-email-xieyisheng1@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-top.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 0a26b56afcc5..113c298ed38b 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1223,8 +1223,10 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset)
 
 static int perf_top_config(const char *var, const char *value, void *cb __maybe_unused)
 {
-	if (!strcmp(var, "top.call-graph"))
-		var = "call-graph.record-mode"; /* fall-through */
+	if (!strcmp(var, "top.call-graph")) {
+		var = "call-graph.record-mode";
+		return perf_default_config(var, value, cb);
+	}
 	if (!strcmp(var, "top.children")) {
 		symbol_conf.cumulate_callchain = perf_config_bool(var, value);
 		return 0;
-- 
cgit v1.2.3


From 5eab5a7ee032acaab3da8fc95a3614fec14687ac Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Mon, 12 Mar 2018 10:43:02 +0100
Subject: perf llvm: Display eBPF compiling command in debug output

In addition to template, display also the real compile command line with
all the variables substituted.

  llvm compiling command template: $CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS ...
  llvm compiling command : /usr/bin/clang -D__KERNEL__ -D__NR_CPUS__=24 -DLINUX_VERSION_CODE=0x41000 ...

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180312094313.18738-3-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/llvm-utils.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c
index 4952b429caa7..1cca0a2fa641 100644
--- a/tools/perf/util/llvm-utils.c
+++ b/tools/perf/util/llvm-utils.c
@@ -433,6 +433,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf,
 	char serr[STRERR_BUFSIZE];
 	char *kbuild_dir = NULL, *kbuild_include_opts = NULL;
 	const char *template = llvm_param.clang_bpf_cmd_template;
+	char *command_echo, *command_out;
 
 	if (path[0] != '-' && realpath(path, abspath) == NULL) {
 		err = errno;
@@ -487,6 +488,16 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf,
 		      (path[0] == '-') ? path : abspath);
 
 	pr_debug("llvm compiling command template: %s\n", template);
+
+	if (asprintf(&command_echo, "echo -n \"%s\"", template) < 0)
+		goto errout;
+
+	err = read_from_pipe(command_echo, (void **) &command_out, NULL);
+	if (err)
+		goto errout;
+
+	pr_debug("llvm compiling command : %s\n", command_out);
+
 	err = read_from_pipe(template, &obj_buf, &obj_buf_sz);
 	if (err) {
 		pr_err("ERROR:\tunable to compile %s\n", path);
@@ -497,6 +508,8 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf,
 		goto errout;
 	}
 
+	free(command_echo);
+	free(command_out);
 	free(kbuild_dir);
 	free(kbuild_include_opts);
 
@@ -509,6 +522,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf,
 		*p_obj_buf_sz = obj_buf_sz;
 	return 0;
 errout:
+	free(command_echo);
 	free(kbuild_dir);
 	free(kbuild_include_opts);
 	free(obj_buf);
-- 
cgit v1.2.3


From 26e4711fc8352252f474a02429d7495652c4aef7 Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.vnet.ibm.com>
Date: Mon, 12 Mar 2018 11:38:07 +0100
Subject: perf stat: Make function perf_stat_evsel_id_init static

Function perf_stat_evsel_id_init() has global linkage but is only used
in util/stat.c. Make it static.

Signed-off-by: Thomas Richter <tmricht@linux.vnet.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Link: http://lkml.kernel.org/r/20180312103807.45069-2-tmricht@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat.c | 2 +-
 tools/perf/util/stat.h | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 32235657c1ac..a0061e0b0fad 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -92,7 +92,7 @@ static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = {
 };
 #undef ID
 
-void perf_stat_evsel_id_init(struct perf_evsel *evsel)
+static void perf_stat_evsel_id_init(struct perf_evsel *evsel)
 {
 	struct perf_stat_evsel *ps = evsel->stats;
 	int i;
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 2f44e386a0e8..8f56ba4fd258 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -128,8 +128,6 @@ bool __perf_evsel_stat__is(struct perf_evsel *evsel,
 #define perf_stat_evsel__is(evsel, id) \
 	__perf_evsel_stat__is(evsel, PERF_STAT_EVSEL_ID__ ## id)
 
-void perf_stat_evsel_id_init(struct perf_evsel *evsel);
-
 extern struct runtime_stat rt_stat;
 extern struct stats walltime_nsecs_stats;
 
-- 
cgit v1.2.3


From c192524e6fe8a4bd18f2549f9556b81ed9e05a86 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Mon, 12 Mar 2018 16:24:06 +0100
Subject: perf machine: Fix mmap name setup

Leo reported broken -k option behavior. The reason is that we used
symbol_conf.vmlinux_name as a source for mmap event name, but in fact
it's a vmlinux path.

Moving the symbol_conf.vmlinux_name check for both host and guest to the
proper place and out of the machine__set_mmap_name function.

Reported-by: Leo Yan <leo.yan@linaro.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Leo Yan <leo.yan@linaro.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Fixes: commit ("8c7f1bb37b29 perf machine: Move kernel mmap name into struct machine")
Link: http://lkml.kernel.org/r/20180312152406.10141-1-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/machine.c | 28 +++++++++++++---------------
 1 file changed, 13 insertions(+), 15 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 43fbbee409ec..2eca8478e24f 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -50,21 +50,13 @@ static void machine__threads_init(struct machine *machine)
 
 static int machine__set_mmap_name(struct machine *machine)
 {
-	if (machine__is_host(machine)) {
-		if (symbol_conf.vmlinux_name)
-			machine->mmap_name = strdup(symbol_conf.vmlinux_name);
-		else
-			machine->mmap_name = strdup("[kernel.kallsyms]");
-	} else if (machine__is_default_guest(machine)) {
-		if (symbol_conf.default_guest_vmlinux_name)
-			machine->mmap_name = strdup(symbol_conf.default_guest_vmlinux_name);
-		else
-			machine->mmap_name = strdup("[guest.kernel.kallsyms]");
-	} else {
-		if (asprintf(&machine->mmap_name, "[guest.kernel.kallsyms.%d]",
-			 machine->pid) < 0)
-			machine->mmap_name = NULL;
-	}
+	if (machine__is_host(machine))
+		machine->mmap_name = strdup("[kernel.kallsyms]");
+	else if (machine__is_default_guest(machine))
+		machine->mmap_name = strdup("[guest.kernel.kallsyms]");
+	else if (asprintf(&machine->mmap_name, "[guest.kernel.kallsyms.%d]",
+			  machine->pid) < 0)
+		machine->mmap_name = NULL;
 
 	return machine->mmap_name ? 0 : -ENOMEM;
 }
@@ -794,9 +786,15 @@ static struct dso *machine__get_kernel(struct machine *machine)
 	struct dso *kernel;
 
 	if (machine__is_host(machine)) {
+		if (symbol_conf.vmlinux_name)
+			vmlinux_name = symbol_conf.vmlinux_name;
+
 		kernel = machine__findnew_kernel(machine, vmlinux_name,
 						 "[kernel]", DSO_TYPE_KERNEL);
 	} else {
+		if (symbol_conf.default_guest_vmlinux_name)
+			vmlinux_name = symbol_conf.default_guest_vmlinux_name;
+
 		kernel = machine__findnew_kernel(machine, vmlinux_name,
 						 "[guest.kernel]",
 						 DSO_TYPE_GUEST_KERNEL);
-- 
cgit v1.2.3


From 10f354a36f9a9aa1b8bffe0abc1cd43822a85bcd Mon Sep 17 00:00:00 2001
From: Sandipan Das <sandipan@linux.vnet.ibm.com>
Date: Mon, 12 Mar 2018 18:14:50 +0530
Subject: perf test: Fix exit code for record+probe_libc_inet_pton.sh

This fixes record+probe_libc_inet_pton.sh from always exiting with code
0 and making the test pass even if the perf script output does not match
the expected pattern.

The issue can be observed if this test is run with the verbose flags as
shown below:

  60: probe libc's inet_pton & backtrace it with ping       :
  ...
  ping 19602 [006] 16988.413767: probe_libc:inet_pton: (7fff9a2c42e8)
  1842e8 __GI___inet_pton (/usr/lib64/libc-2.26.so)
  130db4 getaddrinfo (/usr/lib64/libc-2.26.so)

  FAIL: expected backtrace entry 3 ".*\(.*/bin/ping.*\)$" got ""
  test child finished with 0
  ...
  probe libc's inet_pton & backtrace it with ping: Ok

Signed-off-by: Sandipan Das <sandipan@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Fixes: e07d585e2454 ("perf tests: Switch trace+probe_libc_inet_pton to use record")
Link: http://lkml.kernel.org/r/20180312124450.30371-1-sandipan@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/shell/record+probe_libc_inet_pton.sh | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
index 52c3ee701a89..1ecc1f0ff84a 100755
--- a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
+++ b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
@@ -47,7 +47,10 @@ trace_libc_inet_pton_backtrace() {
 		[ -z "${expected[$idx]}" ] && break
 	done
 
-	rm -f $file
+	# If any statements are executed from this point onwards,
+	# the exit code of the last among these will be reflected
+	# in err below. If the exit code is 0, the test will pass
+	# even if the perf script output does not match.
 }
 
 # Check for IPv6 interface existence
-- 
cgit v1.2.3


From 6810158d526e483868e519befff407b91e76b3db Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 14 Mar 2018 10:34:11 -0300
Subject: perf annotate: Use asprintf when formatting objdump command line

We were using a local buffer with an arbitrary size, that would have to
get increased to avoid truncation as warned by gcc 8:

  util/annotate.c: In function 'symbol__disassemble':
  util/annotate.c:1488:4: error: '%s' directive output may be truncated writing up to 4095 bytes into a region of size between 3966 and 8086 [-Werror=format-truncation=]
      "%s %s%s --start-address=0x%016" PRIx64
      ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  util/annotate.c:1498:20:
      symfs_filename, symfs_filename);
                      ~~~~~~~~~~~~~~
  util/annotate.c:1490:50: note: format string is defined here
      " -l -d %s %s -C \"%s\" 2>/dev/null|grep -v \"%s:\"|expand",
                                                  ^~
  In file included from /usr/include/stdio.h:861,
                   from util/color.h:5,
                   from util/sort.h:8,
                   from util/annotate.c:14:
  /usr/include/bits/stdio2.h:67:10: note: '__builtin___snprintf_chk' output 116 or more bytes (assuming 8331) into a destination of size 8192
     return __builtin___snprintf_chk (__s, __n, __USE_FORTIFY_LEVEL - 1,
            ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          __bos (__s), __fmt, __va_arg_pack ());
          ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

So switch to asprintf, that will make sure enough space is available.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-qagoy2dmbjpc9gdnaj0r3mml@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/annotate.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index bc3302da702b..ddad87f34a68 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -1427,7 +1427,7 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
 {
 	struct map *map = args->map;
 	struct dso *dso = map->dso;
-	char command[PATH_MAX * 2];
+	char *command;
 	FILE *file;
 	char symfs_filename[PATH_MAX];
 	struct kcore_extract kce;
@@ -1468,7 +1468,7 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
 		strcpy(symfs_filename, tmp);
 	}
 
-	snprintf(command, sizeof(command),
+	err = asprintf(&command,
 		 "%s %s%s --start-address=0x%016" PRIx64
 		 " --stop-address=0x%016" PRIx64
 		 " -l -d %s %s -C \"%s\" 2>/dev/null|grep -v \"%s:\"|expand",
@@ -1481,12 +1481,17 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
 		 symbol_conf.annotate_src ? "-S" : "",
 		 symfs_filename, symfs_filename);
 
+	if (err < 0) {
+		pr_err("Failure allocating memory for the command to run\n");
+		goto out_remove_tmp;
+	}
+
 	pr_debug("Executing: %s\n", command);
 
 	err = -1;
 	if (pipe(stdout_fd) < 0) {
 		pr_err("Failure creating the pipe to run %s\n", command);
-		goto out_remove_tmp;
+		goto out_free_command;
 	}
 
 	pid = fork();
@@ -1513,7 +1518,7 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
 		 * If we were using debug info should retry with
 		 * original binary.
 		 */
-		goto out_remove_tmp;
+		goto out_free_command;
 	}
 
 	nline = 0;
@@ -1541,6 +1546,8 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
 
 	fclose(file);
 	err = 0;
+out_free_command:
+	free(command);
 out_remove_tmp:
 	close(stdout_fd[0]);
 
@@ -1554,7 +1561,7 @@ out:
 
 out_close_stdout:
 	close(stdout_fd[1]);
-	goto out_remove_tmp;
+	goto out_free_command;
 }
 
 static void calc_percent(struct sym_hist *hist,
-- 
cgit v1.2.3


From 66790bc8e1f51831d73691954ae0b430bde614ad Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 14 Mar 2018 17:33:54 +0000
Subject: perf tests: Fix out of bounds access on array fd when cnt is 100

Currently when cnt is 100 an array bounds overflow occurs on the
assignment of fd[cnt]. Fix this by performing the bounds check on cnt
before writing to fd.

Detected by cppcheck:

tools/perf/tests/bp_account.c:115: (warning) Either the condition
'cnt==100' is redundant or the array 'fd[100]' is accessed at index 100,
which is out of bounds.

Signed-off-by: Colin King <colin.king@canonical.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: kernel-janitors@vger.kernel.org
Fixes: 032db28e5fa3 ("perf tests: Add breakpoint accounting/modify test")
Link: http://lkml.kernel.org/r/20180314173354.11250-1-colin.king@canonical.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/bp_account.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/tests/bp_account.c b/tools/perf/tests/bp_account.c
index 2f75fa0c4fef..9e88d7608951 100644
--- a/tools/perf/tests/bp_account.c
+++ b/tools/perf/tests/bp_account.c
@@ -107,16 +107,14 @@ static int detect_cnt(bool is_x)
 	int fd[100], cnt = 0, i;
 
 	while (1) {
-		fd[cnt] = __event(is_x, addr, &attr);
-
-		if (fd[cnt] < 0)
-			break;
-
 		if (cnt == 100) {
 			pr_debug("way too many debug registers, fix the test\n");
 			return 0;
 		}
+		fd[cnt] = __event(is_x, addr, &attr);
 
+		if (fd[cnt] < 0)
+			break;
 		cnt++;
 	}
 
-- 
cgit v1.2.3


From a2015516c5c0be932a69e1d3405c2fb03b4eacf1 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 14 Mar 2018 10:22:04 +0100
Subject: perf record: Synthesize features before events in pipe mode

We need to synthesize events first, because some features works on top
of them (on report side).

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Stephane Eranian <eranian@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180314092205.23291-1-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-record.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index d33103291b02..22ebeb92ac51 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -754,13 +754,10 @@ static int record__synthesize(struct record *rec, bool tail)
 		return 0;
 
 	if (data->is_pipe) {
-		err = perf_event__synthesize_features(
-			tool, session, rec->evlist, process_synthesized_event);
-		if (err < 0) {
-			pr_err("Couldn't synthesize features.\n");
-			return err;
-		}
-
+		/*
+		 * We need to synthesize events first, because some
+		 * features works on top of them (on report side).
+		 */
 		err = perf_event__synthesize_attrs(tool, session,
 						   process_synthesized_event);
 		if (err < 0) {
@@ -768,6 +765,13 @@ static int record__synthesize(struct record *rec, bool tail)
 			goto out;
 		}
 
+		err = perf_event__synthesize_features(tool, session, rec->evlist,
+						      process_synthesized_event);
+		if (err < 0) {
+			pr_err("Couldn't synthesize features.\n");
+			return err;
+		}
+
 		if (have_tracepoints(&rec->evlist->entries)) {
 			/*
 			 * FIXME err <= 0 here actually means that
-- 
cgit v1.2.3


From 57b5de463925b9fbd1eff56a38a510495ac9c2c0 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 14 Mar 2018 10:22:05 +0100
Subject: perf report: Support forced leader feature in pipe mode

Stephane reported a problem with forced leader in pipe mode, where
report does not force the group output. The reason is that we don't
force the leader in pipe mode.

This patch adds HEADER_LAST_FEATURE mark to have a point where we have
all events and features received, and force the group if requested.

  $ perf record --group -e '{cycles, instructions}' -o - kill | perf report -i - --group

  SNIP

  #         Overhead  Command  Shared Object     Symbol
  # ................  .......  ................  .......................
  #
      28.36%   0.00%  kill     libc-2.25.so      [.] __unregister_atfork
      26.32%   0.00%  kill     libc-2.25.so      [.] _dl_addr
      26.10%   0.00%  kill     ld-2.25.so        [.] _dl_relocate_object
      17.32%   0.00%  kill     ld-2.25.so        [.] __tunables_init
       1.70%   0.01%  kill     [unknown]         [k] 0xffffffffafa01a40
       0.20%   0.00%  kill     ld-2.25.so        [.] _start
       0.00%  48.77%  kill     ld-2.25.so        [.] do_lookup_x
       0.00%  42.97%  kill     libc-2.25.so      [.] _IO_getline
       0.00%   6.35%  kill     ld-2.25.so        [.] strcmp
       0.00%   1.71%  kill     ld-2.25.so        [.] _dl_sysdep_start
       0.00%   0.19%  kill     ld-2.25.so        [.] _dl_start

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: Stephane Eranian <eranian@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180314092205.23291-2-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-report.c | 57 ++++++++++++++++++++++++++++++++++-----------
 tools/perf/util/header.c    | 11 ++++++++-
 2 files changed, 53 insertions(+), 15 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 971ccba85464..91da12975642 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -68,6 +68,7 @@ struct report {
 	bool			header;
 	bool			header_only;
 	bool			nonany_branch_mode;
+	bool			group_set;
 	int			max_stack;
 	struct perf_read_values	show_threads_values;
 	const char		*pretty_printing_style;
@@ -193,6 +194,45 @@ out:
 	return err;
 }
 
+/*
+ * Events in data file are not collect in groups, but we still want
+ * the group display. Set the artificial group and set the leader's
+ * forced_leader flag to notify the display code.
+ */
+static void setup_forced_leader(struct report *report,
+				struct perf_evlist *evlist)
+{
+	if (report->group_set && !evlist->nr_groups) {
+		struct perf_evsel *leader = perf_evlist__first(evlist);
+
+		perf_evlist__set_leader(evlist);
+		leader->forced_leader = true;
+	}
+}
+
+static int process_feature_event(struct perf_tool *tool,
+				 union perf_event *event,
+				 struct perf_session *session __maybe_unused)
+{
+	struct report *rep = container_of(tool, struct report, tool);
+
+	if (event->feat.feat_id < HEADER_LAST_FEATURE)
+		return perf_event__process_feature(tool, event, session);
+
+	if (event->feat.feat_id != HEADER_LAST_FEATURE) {
+		pr_err("failed: wrong feature ID: %" PRIu64 "\n",
+		       event->feat.feat_id);
+		return -1;
+	}
+
+	/*
+	 * All features are received, we can force the
+	 * group if needed.
+	 */
+	setup_forced_leader(rep, session->evlist);
+	return 0;
+}
+
 static int process_sample_event(struct perf_tool *tool,
 				union perf_event *event,
 				struct perf_sample *sample,
@@ -940,7 +980,6 @@ int cmd_report(int argc, const char **argv)
 		"perf report [<options>]",
 		NULL
 	};
-	bool group_set = false;
 	struct report report = {
 		.tool = {
 			.sample		 = process_sample_event,
@@ -958,7 +997,7 @@ int cmd_report(int argc, const char **argv)
 			.id_index	 = perf_event__process_id_index,
 			.auxtrace_info	 = perf_event__process_auxtrace_info,
 			.auxtrace	 = perf_event__process_auxtrace,
-			.feature	 = perf_event__process_feature,
+			.feature	 = process_feature_event,
 			.ordered_events	 = true,
 			.ordering_requires_timestamps = true,
 		},
@@ -1060,7 +1099,7 @@ int cmd_report(int argc, const char **argv)
 		   "Specify disassembler style (e.g. -M intel for intel syntax)"),
 	OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
 		    "Show a column with the sum of periods"),
-	OPT_BOOLEAN_SET(0, "group", &symbol_conf.event_group, &group_set,
+	OPT_BOOLEAN_SET(0, "group", &symbol_conf.event_group, &report.group_set,
 		    "Show event group information together"),
 	OPT_CALLBACK_NOOPT('b', "branch-stack", &branch_mode, "",
 		    "use branch records for per branch histogram filling",
@@ -1177,17 +1216,7 @@ repeat:
 	has_br_stack = perf_header__has_feat(&session->header,
 					     HEADER_BRANCH_STACK);
 
-	/*
-	 * Events in data file are not collect in groups, but we still want
-	 * the group display. Set the artificial group and set the leader's
-	 * forced_leader flag to notify the display code.
-	 */
-	if (group_set && !session->evlist->nr_groups) {
-		struct perf_evsel *leader = perf_evlist__first(session->evlist);
-
-		perf_evlist__set_leader(session->evlist);
-		leader->forced_leader = true;
-	}
+	setup_forced_leader(&report, session->evlist);
 
 	if (itrace_synth_opts.last_branch)
 		has_br_stack = true;
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index e14b3f7c7212..121df1683c36 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -3415,8 +3415,17 @@ int perf_event__synthesize_features(struct perf_tool *tool,
 			return ret;
 		}
 	}
+
+	/* Send HEADER_LAST_FEATURE mark. */
+	fe = ff.buf;
+	fe->feat_id     = HEADER_LAST_FEATURE;
+	fe->header.type = PERF_RECORD_HEADER_FEATURE;
+	fe->header.size = sizeof(*fe);
+
+	ret = process(tool, ff.buf, NULL, NULL);
+
 	free(ff.buf);
-	return 0;
+	return ret;
 }
 
 int perf_event__process_feature(struct perf_tool *tool,
-- 
cgit v1.2.3


From 9749adc3b2a23c91b2eda8758ff0c650d731aa2f Mon Sep 17 00:00:00 2001
From: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Date: Tue, 13 Mar 2018 12:33:29 -0500
Subject: perf vendor events: Update POWER9 events

Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Link: https://lkml.kernel.org/r/20180313224647.GA22960@us.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../perf/pmu-events/arch/powerpc/power9/cache.json |  25 ---
 .../pmu-events/arch/powerpc/power9/frontend.json   |  10 -
 .../pmu-events/arch/powerpc/power9/marked.json     |   5 -
 .../pmu-events/arch/powerpc/power9/memory.json     |   5 -
 .../perf/pmu-events/arch/powerpc/power9/other.json | 241 ++++++++++++++-------
 .../pmu-events/arch/powerpc/power9/pipeline.json   |  50 ++---
 tools/perf/pmu-events/arch/powerpc/power9/pmc.json |   5 -
 .../arch/powerpc/power9/translation.json           |  10 +-
 8 files changed, 178 insertions(+), 173 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/pmu-events/arch/powerpc/power9/cache.json b/tools/perf/pmu-events/arch/powerpc/power9/cache.json
index 7945c5196c43..851072105054 100644
--- a/tools/perf/pmu-events/arch/powerpc/power9/cache.json
+++ b/tools/perf/pmu-events/arch/powerpc/power9/cache.json
@@ -19,11 +19,6 @@
     "EventName": "PM_CMPLU_STALL_FXU",
     "BriefDescription": "Finish stall due to a scalar fixed point or CR instruction in the execution pipeline. These instructions get routed to the ALU, ALU2, and DIV pipes"
   },
-  {,
-    "EventCode": "0x1D15C",
-    "EventName": "PM_MRK_DTLB_MISS_1G",
-    "BriefDescription": "Marked Data TLB reload (after a miss) page size 2M. Implies radix translation was used"
-  },
   {,
     "EventCode": "0x4D12A",
     "EventName": "PM_MRK_DATA_FROM_RL4_CYC",
@@ -79,21 +74,6 @@
     "EventName": "PM_THRESH_EXC_4096",
     "BriefDescription": "Threshold counter exceed a count of 4096"
   },
-  {,
-    "EventCode": "0x3D156",
-    "EventName": "PM_MRK_DTLB_MISS_64K",
-    "BriefDescription": "Marked Data TLB Miss page size 64K"
-  },
-  {,
-    "EventCode": "0x4C15E",
-    "EventName": "PM_MRK_DTLB_MISS_16M",
-    "BriefDescription": "Marked Data TLB Miss page size 16M"
-  },
-  {,
-    "EventCode": "0x2D15E",
-    "EventName": "PM_MRK_DTLB_MISS_16G",
-    "BriefDescription": "Marked Data TLB Miss page size 16G"
-  },
   {,
     "EventCode": "0x3F14A",
     "EventName": "PM_MRK_DPTEG_FROM_RMEM",
@@ -123,10 +103,5 @@
     "EventCode": "0x1002A",
     "EventName": "PM_CMPLU_STALL_LARX",
     "BriefDescription": "Finish stall because the NTF instruction was a larx waiting to be satisfied"
-  },
-  {,
-    "EventCode": "0x1C058",
-    "EventName": "PM_DTLB_MISS_16G",
-    "BriefDescription": "Data TLB Miss page size 16G"
   }
 ]
\ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/frontend.json b/tools/perf/pmu-events/arch/powerpc/power9/frontend.json
index bd8361b5fd6a..f9fa84b16fb5 100644
--- a/tools/perf/pmu-events/arch/powerpc/power9/frontend.json
+++ b/tools/perf/pmu-events/arch/powerpc/power9/frontend.json
@@ -154,11 +154,6 @@
     "EventName": "PM_MRK_DATA_FROM_RL2L3_SHR_CYC",
     "BriefDescription": "Duration in cycles to reload with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked load"
   },
-  {,
-    "EventCode": "0x3C056",
-    "EventName": "PM_DTLB_MISS_64K",
-    "BriefDescription": "Data TLB Miss page size 64K"
-  },
   {,
     "EventCode": "0x30060",
     "EventName": "PM_TM_TRANS_RUN_INST",
@@ -344,11 +339,6 @@
     "EventName": "PM_MRK_LARX_FIN",
     "BriefDescription": "Larx finished"
   },
-  {,
-    "EventCode": "0x4C056",
-    "EventName": "PM_DTLB_MISS_16M",
-    "BriefDescription": "Data TLB Miss page size 16M"
-  },
   {,
     "EventCode": "0x1003A",
     "EventName": "PM_CMPLU_STALL_LSU_FIN",
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/marked.json b/tools/perf/pmu-events/arch/powerpc/power9/marked.json
index 22f9f32060a8..b1954c38bab1 100644
--- a/tools/perf/pmu-events/arch/powerpc/power9/marked.json
+++ b/tools/perf/pmu-events/arch/powerpc/power9/marked.json
@@ -529,11 +529,6 @@
     "EventName": "PM_L1_ICACHE_RELOADED_ALL",
     "BriefDescription": "Counts all Icache reloads includes demand, prefetch, prefetch turned into demand and demand turned into prefetch"
   },
-  {,
-    "EventCode": "0x4003C",
-    "EventName": "PM_DISP_HELD_SYNC_HOLD",
-    "BriefDescription": "Cycles in which dispatch is held because of a synchronizing instruction in the pipeline"
-  },
   {,
     "EventCode": "0x3003C",
     "EventName": "PM_CMPLU_STALL_NESTED_TEND",
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/memory.json b/tools/perf/pmu-events/arch/powerpc/power9/memory.json
index 9960d1c0dd44..2e2ebc700c74 100644
--- a/tools/perf/pmu-events/arch/powerpc/power9/memory.json
+++ b/tools/perf/pmu-events/arch/powerpc/power9/memory.json
@@ -44,11 +44,6 @@
     "EventName": "PM_LD_CMPL",
     "BriefDescription": "count of Loads completed"
   },
-  {,
-    "EventCode": "0x2D156",
-    "EventName": "PM_MRK_DTLB_MISS_4K",
-    "BriefDescription": "Marked Data TLB Miss page size 4k"
-  },
   {,
     "EventCode": "0x4C042",
     "EventName": "PM_DATA_FROM_L3",
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/other.json b/tools/perf/pmu-events/arch/powerpc/power9/other.json
index 5ce312973f1e..48cf4f920b3f 100644
--- a/tools/perf/pmu-events/arch/powerpc/power9/other.json
+++ b/tools/perf/pmu-events/arch/powerpc/power9/other.json
@@ -69,6 +69,11 @@
     "EventName": "PM_THRD_PRIO_0_1_CYC",
     "BriefDescription": "Cycles thread running at priority level 0 or 1"
   },
+  {,
+    "EventCode": "0x4C054",
+    "EventName": "PM_DERAT_MISS_16G_1G",
+    "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 16G (hpt mode) or 1G (radix mode)"
+  },
   {,
     "EventCode": "0x2084",
     "EventName": "PM_FLUSH_HB_RESTORE_CYC",
@@ -107,12 +112,12 @@
   {,
     "EventCode": "0x360B2",
     "EventName": "PM_L3_GRP_GUESS_WRONG_LOW",
-    "BriefDescription": "Initial scope=group (GS or NNS) but data from outside group (far or rem). Prediction too Low"
+    "BriefDescription": "Prefetch scope predictor selected GS or NNS, but was wrong because scope was LNS"
   },
   {,
     "EventCode": "0x168A6",
     "EventName": "PM_TM_CAM_OVERFLOW",
-    "BriefDescription": "L3 TM cam overflow during L2 co of SC"
+    "BriefDescription": "L3 TM CAM is full when a L2 castout of TM_SC line occurs.  Line is pushed to memory"
   },
   {,
     "EventCode": "0xE8B0",
@@ -149,11 +154,6 @@
     "EventName": "PM_ISU3_ISS_HOLD_ALL",
     "BriefDescription": "All ISU rejects"
   },
-  {,
-    "EventCode": "0x460A6",
-    "EventName": "PM_RD_FORMING_SC",
-    "BriefDescription": "Read forming SC"
-  },
   {,
     "EventCode": "0x468A0",
     "EventName": "PM_L3_PF_OFF_CHIP_MEM",
@@ -187,7 +187,7 @@
   {,
     "EventCode": "0x368A6",
     "EventName": "PM_SNP_TM_HIT_T",
-    "BriefDescription": "Snp TM sthit T/Tn/Te"
+    "BriefDescription": "TM snoop that is a store hits line in L3 in T, Tn or Te state (shared modified)"
   },
   {,
     "EventCode": "0x3001A",
@@ -204,6 +204,11 @@
     "EventName": "PM_MRK_DATA_FROM_L31_ECO_MOD_CYC",
     "BriefDescription": "Duration in cycles to reload with Modified (M) data from another core's ECO L3 on the same chip due to a marked load"
   },
+  {,
+    "EventCode": "0xF0B4",
+    "EventName": "PM_DC_PREF_CONS_ALLOC",
+    "BriefDescription": "Prefetch stream allocated in the conservative phase by either the hardware prefetch mechanism or software prefetch. The sum of this pair subtracted from the total number of allocs will give the total allocs in normal phase"
+  },
   {,
     "EventCode": "0xF894",
     "EventName": "PM_LSU3_L1_CAM_CANCEL",
@@ -227,7 +232,12 @@
   {,
     "EventCode": "0x468A6",
     "EventName": "PM_RD_CLEARING_SC",
-    "BriefDescription": "Read clearing SC"
+    "BriefDescription": "Core TM load hits line in L3 in TM_SC state and causes it to be invalidated"
+  },
+  {,
+    "EventCode": "0xD0B0",
+    "EventName": "PM_HWSYNC",
+    "BriefDescription": ""
   },
   {,
     "EventCode": "0x168B0",
@@ -264,6 +274,11 @@
     "EventName": "PM_DC_PREF_HW_ALLOC",
     "BriefDescription": "Prefetch stream allocated by the hardware prefetch mechanism"
   },
+  {,
+    "EventCode": "0xF0BC",
+    "EventName": "PM_LS2_UNALIGNED_ST",
+    "BriefDescription": "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size.  If the Store wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty"
+  },
   {,
     "EventCode": "0xD0AC",
     "EventName": "PM_SRQ_SYNC_CYC",
@@ -274,6 +289,11 @@
     "EventName": "PM_MRK_INST_FROM_L3MISS",
     "BriefDescription": "Marked instruction was reloaded from a location beyond the local chiplet"
   },
+  {,
+    "EventCode": "0x58A8",
+    "EventName": "PM_DECODE_HOLD_ICT_FULL",
+    "BriefDescription": "Counts the number of cycles in which the IFU was not able to decode and transmit one or more instructions because all itags were in use.  This means the ICT is full for this thread"
+  },
   {,
     "EventCode": "0x26082",
     "EventName": "PM_L2_IC_INV",
@@ -364,6 +384,16 @@
     "EventName": "PM_MRK_DATA_FROM_OFF_CHIP_CACHE_CYC",
     "BriefDescription": "Duration in cycles to reload either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a marked load"
   },
+  {,
+    "EventCode": "0xF888",
+    "EventName": "PM_LSU1_STORE_REJECT",
+    "BriefDescription": "All internal store rejects cause the instruction to go back to the SRQ and go to sleep until woken up to try again after the condition has been met"
+  },
+  {,
+    "EventCode": "0xC098",
+    "EventName": "PM_LS2_UNALIGNED_LD",
+    "BriefDescription": "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size.  If the load wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty"
+  },
   {,
     "EventCode": "0x20058",
     "EventName": "PM_DARQ1_10_12_ENTRIES",
@@ -372,7 +402,7 @@
   {,
     "EventCode": "0x360A6",
     "EventName": "PM_SNP_TM_HIT_M",
-    "BriefDescription": "Snp TM st hit M/Mu"
+    "BriefDescription": "TM snoop that is a store hits line in L3 in M or Mu state (exclusive modified)"
   },
   {,
     "EventCode": "0x5898",
@@ -395,9 +425,9 @@
     "BriefDescription": "A data line was written to the L1 due to a hardware or software prefetch"
   },
   {,
-    "EventCode": "0xF888",
-    "EventName": "PM_LSU1_STORE_REJECT",
-    "BriefDescription": "All internal store rejects cause the instruction to go back to the SRQ and go to sleep until woken up to try again after the condition has been met"
+    "EventCode": "0x2608E",
+    "EventName": "PM_TM_LD_CONF",
+    "BriefDescription": "TM Load (fav or non-fav) ran into conflict (failed)"
   },
   {,
     "EventCode": "0x1D144",
@@ -422,7 +452,7 @@
   {,
     "EventCode": "0x26884",
     "EventName": "PM_DSIDE_MRU_TOUCH",
-    "BriefDescription": "D-side L2 MRU touch sent to L2"
+    "BriefDescription": "D-side L2 MRU touch commands sent to the L2"
   },
   {,
     "EventCode": "0x30134",
@@ -439,6 +469,16 @@
     "EventName": "PM_EAT_FORCE_MISPRED",
     "BriefDescription": "XL-form branch was mispredicted due to the predicted target address missing from EAT.  The EAT forces a mispredict in this case since there is no predicated target to validate.  This is a rare case that may occur when the EAT is full and a branch is issued"
   },
+  {,
+    "EventCode": "0xC094",
+    "EventName": "PM_LS0_UNALIGNED_LD",
+    "BriefDescription": "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size.  If the load wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty"
+  },
+  {,
+    "EventCode": "0xF8BC",
+    "EventName": "PM_LS3_UNALIGNED_ST",
+    "BriefDescription": "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size.  If the Store wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty"
+  },
   {,
     "EventCode": "0x460AE",
     "EventName": "PM_L3_P2_CO_RTY",
@@ -492,7 +532,7 @@
   {,
     "EventCode": "0xC880",
     "EventName": "PM_LS1_LD_VECTOR_FIN",
-    "BriefDescription": ""
+    "BriefDescription": "LS1 finished load vector op"
   },
   {,
     "EventCode": "0x2894",
@@ -514,6 +554,11 @@
     "EventName": "PM_MRK_LSU_DERAT_MISS",
     "BriefDescription": "Marked derat reload (miss) for any page size"
   },
+  {,
+    "EventCode": "0x160A0",
+    "EventName": "PM_L3_PF_MISS_L3",
+    "BriefDescription": "L3 PF missed in L3"
+  },
   {,
     "EventCode": "0x1C04A",
     "EventName": "PM_DATA_FROM_RL2L3_SHR",
@@ -564,11 +609,21 @@
     "EventName": "PM_L2_LOC_GUESS_WRONG",
     "BriefDescription": "L2 guess local (LNS) and guess was not correct (ie data not on chip)"
   },
+  {,
+    "EventCode": "0xC888",
+    "EventName": "PM_LSU_DTLB_MISS_64K",
+    "BriefDescription": "Data TLB Miss page size 64K"
+  },
   {,
     "EventCode": "0xE0A4",
     "EventName": "PM_TMA_REQ_L2",
     "BriefDescription": "addrs only req to L2 only on the first one,Indication that Load footprint is not expanding"
   },
+  {,
+    "EventCode": "0xC088",
+    "EventName": "PM_LSU_DTLB_MISS_4K",
+    "BriefDescription": "Data TLB Miss page size 4K"
+  },
   {,
     "EventCode": "0x3C042",
     "EventName": "PM_DATA_FROM_L3_DISP_CONFLICT",
@@ -602,7 +657,7 @@
   {,
     "EventCode": "0x26084",
     "EventName": "PM_L2_RCLD_DISP_FAIL_OTHER",
-    "BriefDescription": "All I-or-D side load dispatch attempts for this thread that failed due to reason other than address collision (excludes i_l2mru_tch_reqs)"
+    "BriefDescription": "All D-side-Ld or I-side-instruction-fetch dispatch attempts for this thread that failed due to reasons other than an address collision conflicts with an L2 machines (e.g. Read-Claim/Snoop machine not available)"
   },
   {,
     "EventCode": "0x101E4",
@@ -647,12 +702,12 @@
   {,
     "EventCode": "0x46080",
     "EventName": "PM_L2_DISP_ALL_L2MISS",
-    "BriefDescription": "All successful Ld/St dispatches for this thread that were an L2 miss (excludes i_l2mru_tch_reqs)"
+    "BriefDescription": "All successful D-side-Ld/St or I-side-instruction-fetch dispatches for this thread that were an L2 miss"
   },
   {,
-    "EventCode": "0x160A0",
-    "EventName": "PM_L3_PF_MISS_L3",
-    "BriefDescription": "L3 PF missed in L3"
+    "EventCode": "0xF8B8",
+    "EventName": "PM_LS1_UNALIGNED_ST",
+    "BriefDescription": "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size.  If the Store wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty"
   },
   {,
     "EventCode": "0x408C",
@@ -667,7 +722,7 @@
   {,
     "EventCode": "0x160B2",
     "EventName": "PM_L3_LOC_GUESS_CORRECT",
-    "BriefDescription": "initial scope=node/chip (LNS) and data from local node (local) (pred successful) - always PFs only"
+    "BriefDescription": "Prefetch scope predictor selected LNS and was correct"
   },
   {,
     "EventCode": "0x48B4",
@@ -767,7 +822,7 @@
   {,
     "EventCode": "0x36082",
     "EventName": "PM_L2_LD_DISP",
-    "BriefDescription": "All successful I-or-D side load dispatches for this thread (excludes i_l2mru_tch_reqs)"
+    "BriefDescription": "All successful D-side-Ld or I-side-instruction-fetch dispatches for this thread"
   },
   {,
     "EventCode": "0xF8B0",
@@ -787,7 +842,7 @@
   {,
     "EventCode": "0x16884",
     "EventName": "PM_L2_RCLD_DISP_FAIL_ADDR",
-    "BriefDescription": "All I-od-D side load dispatch attempts for this thread that failed due to address collision with RC/CO/SN/SQ machine (excludes i_l2mru_tch_reqs)"
+    "BriefDescription": "All D-side-Ld or I-side-instruction-fetch dispatch attempts for this thread that failed due to an address collision conflicts with an L2 machines already working on this line (e.g. ld-hit-stq or Read-claim/Castout/Snoop machines)"
   },
   {,
     "EventCode": "0x460A0",
@@ -829,6 +884,11 @@
     "EventName": "PM_IC_PREF_REQ",
     "BriefDescription": "Instruction prefetch requests"
   },
+  {,
+    "EventCode": "0xC898",
+    "EventName": "PM_LS3_UNALIGNED_LD",
+    "BriefDescription": "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size.  If the load wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty"
+  },
   {,
     "EventCode": "0x488C",
     "EventName": "PM_IC_PREF_WRITE",
@@ -837,7 +897,7 @@
   {,
     "EventCode": "0xF89C",
     "EventName": "PM_XLATE_MISS",
-    "BriefDescription": "The LSU requested a line from L2 for translation.  It may be satisfied from any source beyond L2.  Includes speculative instructions"
+    "BriefDescription": "The LSU requested a line from L2 for translation.  It may be satisfied from any source beyond L2.  Includes speculative instructions. Includes instruction, prefetch and demand"
   },
   {,
     "EventCode": "0x14158",
@@ -849,10 +909,15 @@
     "EventName": "PM_MRK_DATA_FROM_L31_SHR_CYC",
     "BriefDescription": "Duration in cycles to reload with Shared (S) data from another core's L3 on the same chip due to a marked load"
   },
+  {,
+    "EventCode": "0xC88C",
+    "EventName": "PM_LSU_DTLB_MISS_16G_1G",
+    "BriefDescription": "Data TLB Miss page size 16G (HPT) or 1G (Radix)"
+  },
   {,
     "EventCode": "0x268A6",
     "EventName": "PM_TM_RST_SC",
-    "BriefDescription": "TM-snp rst RM SC"
+    "BriefDescription": "TM snoop hits line in L3 that is TM_SC state and causes it to be invalidated"
   },
   {,
     "EventCode": "0x468A4",
@@ -917,7 +982,7 @@
   {,
     "EventCode": "0x46086",
     "EventName": "PM_L2_SN_M_RD_DONE",
-    "BriefDescription": "SNP dispatched for a read and was M (true M)"
+    "BriefDescription": "Snoop dispatched for a read and was M (true M)"
   },
   {,
     "EventCode": "0x40154",
@@ -979,15 +1044,10 @@
     "EventName": "PM_LINK_STACK_CORRECT",
     "BriefDescription": "Link stack predicts right address"
   },
-  {,
-    "EventCode": "0x4C05A",
-    "EventName": "PM_DTLB_MISS_1G",
-    "BriefDescription": "Data TLB reload (after a miss) page size 1G. Implies radix translation was used"
-  },
   {,
     "EventCode": "0x36886",
     "EventName": "PM_L2_SN_SX_I_DONE",
-    "BriefDescription": "SNP dispatched and went from Sx to Ix"
+    "BriefDescription": "Snoop dispatched and went from Sx to Ix"
   },
   {,
     "EventCode": "0x4E04A",
@@ -999,11 +1059,6 @@
     "EventName": "PM_MRK_DATA_FROM_DL4_CYC",
     "BriefDescription": "Duration in cycles to reload from another chip's L4 on a different Node or Group (Distant) due to a marked load"
   },
-  {,
-    "EventCode": "0x2608E",
-    "EventName": "PM_TM_LD_CONF",
-    "BriefDescription": "TM Load (fav or non-fav) ran into conflict (failed)"
-  },
   {,
     "EventCode": "0x4080",
     "EventName": "PM_INST_FROM_L1",
@@ -1037,7 +1092,7 @@
   {,
     "EventCode": "0x260A6",
     "EventName": "PM_NON_TM_RST_SC",
-    "BriefDescription": "Non-TM snp rst TM SC"
+    "BriefDescription": "Non-TM snoop hits line in L3 that is TM_SC state and causes it to be invalidated"
   },
   {,
     "EventCode": "0x3608A",
@@ -1064,11 +1119,6 @@
     "EventName": "PM_FLUSH_MPRED",
     "BriefDescription": "Branch mispredict flushes.  Includes target and address misprecition"
   },
-  {,
-    "EventCode": "0x508C",
-    "EventName": "PM_SHL_CREATED",
-    "BriefDescription": "Store-Hit-Load Table Entry Created"
-  },
   {,
     "EventCode": "0x1504C",
     "EventName": "PM_IPTEG_FROM_LL4",
@@ -1107,7 +1157,7 @@
   {,
     "EventCode": "0x2608A",
     "EventName": "PM_ISIDE_DISP_FAIL_ADDR",
-    "BriefDescription": "All I-side dispatch attempts for this thread that failed due to a addr collision with another machine (excludes i_l2mru_tch_reqs)"
+    "BriefDescription": "All I-side-instruction-fetch dispatch attempts for this thread that failed due to an address collision conflict with an L2 machine already working on this line (e.g. ld-hit-stq or RC/CO/SN machines)"
   },
   {,
     "EventCode": "0x50B4",
@@ -1180,9 +1230,9 @@
     "BriefDescription": "Number of stcx instructions finished. This includes instructions in the speculative path of a branch that may be flushed"
   },
   {,
-    "EventCode": "0xE0B8",
-    "EventName": "PM_LS2_TM_DISALLOW",
-    "BriefDescription": "A TM-ineligible instruction tries to execute inside a transaction and the LSU disallows it"
+    "EventCode": "0xD8AC",
+    "EventName": "PM_LWSYNC",
+    "BriefDescription": ""
   },
   {,
     "EventCode": "0x2094",
@@ -1209,6 +1259,11 @@
     "EventName": "PM_ICT_NOSLOT_DISP_HELD_HB_FULL",
     "BriefDescription": "Ict empty for this thread due to dispatch holds because the History Buffer was full. Could be GPR/VSR/VMR/FPR/CR/XVF; CR; XVF (XER/VSCR/FPSCR)"
   },
+  {,
+    "EventCode": "0xC894",
+    "EventName": "PM_LS1_UNALIGNED_LD",
+    "BriefDescription": "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size.  If the load wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty"
+  },
   {,
     "EventCode": "0x360A2",
     "EventName": "PM_L3_L2_CO_HIT",
@@ -1292,7 +1347,7 @@
   {,
     "EventCode": "0xC084",
     "EventName": "PM_LS2_LD_VECTOR_FIN",
-    "BriefDescription": ""
+    "BriefDescription": "LS2 finished load vector op"
   },
   {,
     "EventCode": "0x1608E",
@@ -1344,6 +1399,11 @@
     "EventName": "PM_SN_USAGE",
     "BriefDescription": "Continuous 16 cycle (2to1) window where this signals rotates thru sampling each SN machine busy. PMU uses this wave to then do 16 cyc count to sample total number of machs running"
   },
+  {,
+    "EventCode": "0x36084",
+    "EventName": "PM_L2_RCST_DISP",
+    "BriefDescription": "All D-side store dispatch attempts for this thread"
+  },
   {,
     "EventCode": "0x46084",
     "EventName": "PM_L2_RCST_DISP_FAIL_OTHER",
@@ -1354,11 +1414,6 @@
     "EventName": "PM_DC_PREF_STRIDED_CONF",
     "BriefDescription": "A demand load referenced a line in an active strided prefetch stream. The stream could have been allocated through the hardware prefetch mechanism or through software."
   },
-  {,
-    "EventCode": "0x36084",
-    "EventName": "PM_L2_RCST_DISP",
-    "BriefDescription": "All D-side store dispatch attempts for this thread"
-  },
   {,
     "EventCode": "0x45054",
     "EventName": "PM_FMA_CMPL",
@@ -1372,7 +1427,7 @@
   {,
     "EventCode": "0x36080",
     "EventName": "PM_L2_INST",
-    "BriefDescription": "All successful I-side dispatches for this thread   (excludes i_l2mru_tch reqs)"
+    "BriefDescription": "All successful I-side-instruction-fetch (e.g. i-demand, i-prefetch) dispatches for this thread"
   },
   {,
     "EventCode": "0x3504C",
@@ -1387,7 +1442,7 @@
   {,
     "EventCode": "0x1688A",
     "EventName": "PM_ISIDE_DISP",
-    "BriefDescription": "All I-side dispatch attempts for this thread (excludes i_l2mru_tch_reqs)"
+    "BriefDescription": "All I-side-instruction-fetch dispatch attempts for this thread"
   },
   {,
     "EventCode": "0x468AA",
@@ -1419,6 +1474,11 @@
     "EventName": "PM_LSU2_TM_L1_HIT",
     "BriefDescription": "Load tm hit in L1"
   },
+  {,
+    "EventCode": "0xE0B8",
+    "EventName": "PM_LS2_TM_DISALLOW",
+    "BriefDescription": "A TM-ineligible instruction tries to execute inside a transaction and the LSU disallows it"
+  },
   {,
     "EventCode": "0x44044",
     "EventName": "PM_INST_FROM_L31_ECO_MOD",
@@ -1467,7 +1527,7 @@
   {,
     "EventCode": "0x36086",
     "EventName": "PM_L2_RC_ST_DONE",
-    "BriefDescription": "RC did store to line that was Tx or Sx"
+    "BriefDescription": "Read-claim machine did store to line that was in Tx or Sx (Tagged or Shared state)"
   },
   {,
     "EventCode": "0xE8AC",
@@ -1499,6 +1559,11 @@
     "EventName": "PM_IPTEG_FROM_L2_NO_CONFLICT",
     "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 without conflict due to a instruction side request"
   },
+  {,
+    "EventCode": "0x460A6",
+    "EventName": "PM_RD_FORMING_SC",
+    "BriefDescription": "Doesn't occur"
+  },
   {,
     "EventCode": "0x35042",
     "EventName": "PM_IPTEG_FROM_L3_DISP_CONFLICT",
@@ -1527,7 +1592,7 @@
   {,
     "EventCode": "0x36882",
     "EventName": "PM_L2_LD_HIT",
-    "BriefDescription": "All successful I-or-D side load dispatches for this thread that were L2 hits (excludes i_l2mru_tch_reqs)"
+    "BriefDescription": "All successful D-side-Ld or I-side-instruction-fetch dispatches for this thread that were L2 hits"
   },
   {,
     "EventCode": "0x168AC",
@@ -1554,11 +1619,6 @@
     "EventName": "PM_PROBE_NOP_DISP",
     "BriefDescription": "ProbeNops dispatched"
   },
-  {,
-    "EventCode": "0x58A8",
-    "EventName": "PM_DECODE_HOLD_ICT_FULL",
-    "BriefDescription": "Counts the number of cycles in which the IFU was not able to decode and transmit one or more instructions because all itags were in use.  This means the ICT is full for this thread"
-  },
   {,
     "EventCode": "0x10052",
     "EventName": "PM_GRP_PUMP_MPRED_RTY",
@@ -1572,7 +1632,7 @@
   {,
     "EventCode": "0x2688A",
     "EventName": "PM_ISIDE_DISP_FAIL_OTHER",
-    "BriefDescription": "All I-side dispatch attempts for this thread that failed due to a reason other than addrs collision (excludes i_l2mru_tch_reqs)"
+    "BriefDescription": "All I-side-instruction-fetch dispatch attempts for this thread that failed due to reasons other than an address collision conflict with an L2 machine (e.g. no available RC/CO machines)"
   },
   {,
     "EventCode": "0x2001A",
@@ -1652,12 +1712,12 @@
   {,
     "EventCode": "0x46880",
     "EventName": "PM_ISIDE_MRU_TOUCH",
-    "BriefDescription": "I-side L2 MRU touch sent to L2 for this thread"
+    "BriefDescription": "I-side L2 MRU touch sent to L2 for this thread I-side L2 MRU touch commands sent to the L2 for this thread"
   },
   {,
-    "EventCode": "0x1C05C",
-    "EventName": "PM_DTLB_MISS_2M",
-    "BriefDescription": "Data TLB reload (after a miss) page size 2M. Implies radix translation was used"
+    "EventCode": "0x508C",
+    "EventName": "PM_SHL_CREATED",
+    "BriefDescription": "Store-Hit-Load Table Entry Created"
   },
   {,
     "EventCode": "0x50B8",
@@ -1672,7 +1732,7 @@
   {,
     "EventCode": "0x268B2",
     "EventName": "PM_L3_LOC_GUESS_WRONG",
-    "BriefDescription": "Initial scope=node (LNS) but data from out side local node (near or far or rem). Prediction too Low"
+    "BriefDescription": "Prefetch scope predictor selected LNS, but was wrong"
   },
   {,
     "EventCode": "0x36088",
@@ -1684,6 +1744,11 @@
     "EventName": "PM_L3_P2_PF_RTY",
     "BriefDescription": "L3 PF received retry port 2, every retry counted"
   },
+  {,
+    "EventCode": "0xD8B0",
+    "EventName": "PM_PTESYNC",
+    "BriefDescription": ""
+  },
   {,
     "EventCode": "0x26086",
     "EventName": "PM_CO_TM_SC_FOOTPRINT",
@@ -1739,6 +1804,11 @@
     "EventName": "PM_L2_ST_MISS",
     "BriefDescription": "All successful D-Side Store dispatches that were an L2 miss for this thread"
   },
+  {,
+    "EventCode": "0xF8B4",
+    "EventName": "PM_DC_PREF_XCONS_ALLOC",
+    "BriefDescription": "Prefetch stream allocated in the Ultra conservative phase by either the hardware prefetch mechanism or software prefetch"
+  },
   {,
     "EventCode": "0x35048",
     "EventName": "PM_IPTEG_FROM_DL2L3_SHR",
@@ -1782,7 +1852,7 @@
   {,
     "EventCode": "0x460B2",
     "EventName": "PM_L3_SYS_GUESS_WRONG",
-    "BriefDescription": "Initial scope=system (VGS or RNS) but data from local or near. Prediction too high"
+    "BriefDescription": "Prefetch scope predictor selected VGS or RNS, but was wrong"
   },
   {,
     "EventCode": "0x58B8",
@@ -1799,11 +1869,6 @@
     "EventName": "PM_TM_TABORT_TRECLAIM",
     "BriefDescription": "Completion time tabortnoncd, tabortcd, treclaim"
   },
-  {,
-    "EventCode": "0x4C054",
-    "EventName": "PM_DERAT_MISS_16G",
-    "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 16G"
-  },
   {,
     "EventCode": "0x268A0",
     "EventName": "PM_L3_CO_L31",
@@ -1862,7 +1927,7 @@
   {,
     "EventCode": "0x368B2",
     "EventName": "PM_L3_GRP_GUESS_WRONG_HIGH",
-    "BriefDescription": "Initial scope=group (GS or NNS) but data from local node. Prediction too high"
+    "BriefDescription": "Prefetch scope predictor selected GS or NNS, but was wrong because scope was VGS or RNS"
   },
   {,
     "EventCode": "0xE8BC",
@@ -1897,7 +1962,7 @@
   {,
     "EventCode": "0x260B2",
     "EventName": "PM_L3_SYS_GUESS_CORRECT",
-    "BriefDescription": "Initial scope=system (VGS or RNS) and data from outside group (far or rem)(pred successful)"
+    "BriefDescription": "Prefetch scope predictor selected VGS or RNS and was correct"
   },
   {,
     "EventCode": "0x1D146",
@@ -1914,6 +1979,11 @@
     "EventName": "PM_L2_GROUP_PUMP",
     "BriefDescription": "RC requests that were on group (aka nodel) pump attempts"
   },
+  {,
+    "EventCode": "0xC08C",
+    "EventName": "PM_LSU_DTLB_MISS_16M_2M",
+    "BriefDescription": "Data TLB Miss page size 16M (HPT) or 2M (Radix)"
+  },
   {,
     "EventCode": "0x16080",
     "EventName": "PM_L2_LD",
@@ -1927,7 +1997,7 @@
   {,
     "EventCode": "0xC080",
     "EventName": "PM_LS0_LD_VECTOR_FIN",
-    "BriefDescription": ""
+    "BriefDescription": "LS0 finished load vector op"
   },
   {,
     "EventCode": "0x368B0",
@@ -1999,6 +2069,11 @@
     "EventName": "PM_BR_CORECT_PRED_TAKEN_CMPL",
     "BriefDescription": "Conditional Branch Completed in which the HW correctly predicted the direction as taken.  Counted at completion time"
   },
+  {,
+    "EventCode": "0xF0B8",
+    "EventName": "PM_LS0_UNALIGNED_ST",
+    "BriefDescription": "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size.  If the Store wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty"
+  },
   {,
     "EventCode": "0x20132",
     "EventName": "PM_MRK_DFU_FIN",
@@ -2007,7 +2082,7 @@
   {,
     "EventCode": "0x160A6",
     "EventName": "PM_TM_SC_CO",
-    "BriefDescription": "L3 castout TM SC line"
+    "BriefDescription": "L3 castout of line that was StoreCopy (original value of speculatively written line) in a Transaction"
   },
   {,
     "EventCode": "0xC8B0",
@@ -2017,7 +2092,7 @@
   {,
     "EventCode": "0x16084",
     "EventName": "PM_L2_RCLD_DISP",
-    "BriefDescription": "All I-or-D side load dispatch attempts for this thread (excludes i_l2mru_tch_reqs)"
+    "BriefDescription": "All D-side-Ld or I-side-instruction-fetch dispatch attempts for this thread"
   },
   {,
     "EventCode": "0x3F150",
@@ -2122,12 +2197,12 @@
   {,
     "EventCode": "0x46082",
     "EventName": "PM_L2_ST_DISP",
-    "BriefDescription": "All successful D-side store dispatches for this thread (L2 miss + L2 hits)"
+    "BriefDescription": "All successful D-side store dispatches for this thread"
   },
   {,
     "EventCode": "0x36880",
     "EventName": "PM_L2_INST_MISS",
-    "BriefDescription": "All successful I-side dispatches that were an L2 miss for this thread (excludes i_l2mru_tch reqs)"
+    "BriefDescription": "All successful I-side-instruction-fetch (e.g. i-demand, i-prefetch) dispatches for this thread that were an L2 miss"
   },
   {,
     "EventCode": "0xE084",
@@ -2217,7 +2292,7 @@
   {,
     "EventCode": "0xC884",
     "EventName": "PM_LS3_LD_VECTOR_FIN",
-    "BriefDescription": ""
+    "BriefDescription": "LS3 finished load vector op"
   },
   {,
     "EventCode": "0x360A8",
@@ -2242,7 +2317,7 @@
   {,
     "EventCode": "0x168B2",
     "EventName": "PM_L3_GRP_GUESS_CORRECT",
-    "BriefDescription": "Initial scope=group (GS or NNS) and data from same group (near) (pred successful)"
+    "BriefDescription": "Prefetch scope predictor selected GS or NNS and was correct"
   },
   {,
     "EventCode": "0x48A4",
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json b/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json
index 5af1abbe82c4..b4772f54a271 100644
--- a/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json
+++ b/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json
@@ -64,11 +64,6 @@
     "EventName": "PM_DISP_HELD",
     "BriefDescription": "Dispatch Held"
   },
-  {,
-    "EventCode": "0x3D154",
-    "EventName": "PM_MRK_DERAT_MISS_16M",
-    "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 16M"
-  },
   {,
     "EventCode": "0x200F8",
     "EventName": "PM_EXT_INT",
@@ -119,6 +114,11 @@
     "EventName": "PM_MRK_DPTEG_FROM_L3_MEPF",
     "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 without dispatch conflicts hit on Mepf state. due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
   },
+  {,
+    "EventCode": "0x4C15C",
+    "EventName": "PM_MRK_DERAT_MISS_16G_1G",
+    "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 16G (hpt mode) and 1G (radix mode)"
+  },
   {,
     "EventCode": "0x10024",
     "EventName": "PM_PMC5_OVERFLOW",
@@ -154,11 +154,6 @@
     "EventName": "PM_ICT_NOSLOT_IC_MISS",
     "BriefDescription": "Ict empty for this thread due to Icache Miss"
   },
-  {,
-    "EventCode": "0x3D152",
-    "EventName": "PM_MRK_DERAT_MISS_1G",
-    "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 1G. Implies radix translation"
-  },
   {,
     "EventCode": "0x4F14A",
     "EventName": "PM_MRK_DPTEG_FROM_OFF_CHIP_CACHE",
@@ -184,11 +179,6 @@
     "EventName": "PM_MRK_DPTEG_FROM_L2_NO_CONFLICT",
     "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 without conflict due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
   },
-  {,
-    "EventCode": "0x2C05A",
-    "EventName": "PM_DERAT_MISS_1G",
-    "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 1G. Implies radix translation"
-  },
   {,
     "EventCode": "0x1F058",
     "EventName": "PM_RADIX_PWC_L2_PTE_FROM_L2",
@@ -239,11 +229,6 @@
     "EventName": "PM_DTLB_MISS",
     "BriefDescription": "Data PTEG reload"
   },
-  {,
-    "EventCode": "0x2D152",
-    "EventName": "PM_MRK_DERAT_MISS_2M",
-    "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 2M. Implies radix translation"
-  },
   {,
     "EventCode": "0x2C046",
     "EventName": "PM_DATA_FROM_RL2L3_MOD",
@@ -289,6 +274,11 @@
     "EventName": "PM_CMPLU_STALL_DFU",
     "BriefDescription": "Finish stall because the NTF instruction was issued to the Decimal Floating Point execution pipe and waiting to finish. Includes decimal floating point instructions + 128 bit binary floating point instructions. Not qualified by multicycle"
   },
+  {,
+    "EventCode": "0x3C054",
+    "EventName": "PM_DERAT_MISS_16M_2M",
+    "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 16M (HPT mode) or 2M (Radix mode)"
+  },
   {,
     "EventCode": "0x4C04C",
     "EventName": "PM_DATA_FROM_DMEM",
@@ -359,11 +349,6 @@
     "EventName": "PM_INST_FROM_MEMORY",
     "BriefDescription": "The processor's Instruction cache was reloaded from a memory location including L4 from local remote or distant due to an instruction fetch (not prefetch)"
   },
-  {,
-    "EventCode": "0x1C05A",
-    "EventName": "PM_DERAT_MISS_2M",
-    "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 2M. Implies radix translation"
-  },
   {,
     "EventCode": "0x30024",
     "EventName": "PM_PMC6_OVERFLOW",
@@ -374,6 +359,11 @@
     "EventName": "PM_BRU_FIN",
     "BriefDescription": "Branch Instruction Finished"
   },
+  {,
+    "EventCode": "0x3D154",
+    "EventName": "PM_MRK_DERAT_MISS_16M_2M",
+    "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 16M (hpt mode) or 2M (radix mode)"
+  },
   {,
     "EventCode": "0x30020",
     "EventName": "PM_PMC2_REWIND",
@@ -409,11 +399,6 @@
     "EventName": "PM_MRK_DPTEG_FROM_L31_MOD",
     "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L3 on the same chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
   },
-  {,
-    "EventCode": "0x4C15C",
-    "EventName": "PM_MRK_DERAT_MISS_16G",
-    "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 16G"
-  },
   {,
     "EventCode": "0x14052",
     "EventName": "PM_INST_GRP_PUMP_MPRED_RTY",
@@ -444,11 +429,6 @@
     "EventName": "PM_IC_DEMAND_CYC",
     "BriefDescription": "Icache miss demand cycles"
   },
-  {,
-    "EventCode": "0x3C054",
-    "EventName": "PM_DERAT_MISS_16M",
-    "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 16M"
-  },
   {,
     "EventCode": "0x2D14E",
     "EventName": "PM_MRK_DATA_FROM_L21_SHR",
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/pmc.json b/tools/perf/pmu-events/arch/powerpc/power9/pmc.json
index d0b89f930567..8b3b0f3be664 100644
--- a/tools/perf/pmu-events/arch/powerpc/power9/pmc.json
+++ b/tools/perf/pmu-events/arch/powerpc/power9/pmc.json
@@ -9,11 +9,6 @@
     "EventName": "PM_MEM_LOC_THRESH_LSU_HIGH",
     "BriefDescription": "Local memory above threshold for LSU medium"
   },
-  {,
-    "EventCode": "0x2C056",
-    "EventName": "PM_DTLB_MISS_4K",
-    "BriefDescription": "Data TLB Miss page size 4k"
-  },
   {,
     "EventCode": "0x40118",
     "EventName": "PM_MRK_DCACHE_RELOAD_INTV",
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/translation.json b/tools/perf/pmu-events/arch/powerpc/power9/translation.json
index bc8e03d7a6b0..b27642676244 100644
--- a/tools/perf/pmu-events/arch/powerpc/power9/translation.json
+++ b/tools/perf/pmu-events/arch/powerpc/power9/translation.json
@@ -29,11 +29,6 @@
     "EventName": "PM_ST_FIN",
     "BriefDescription": "Store finish count. Includes speculative activity"
   },
-  {,
-    "EventCode": "0x44042",
-    "EventName": "PM_INST_FROM_L3",
-    "BriefDescription": "The processor's Instruction cache was reloaded from local core's L3 due to an instruction fetch (not prefetch)"
-  },
   {,
     "EventCode": "0x1504A",
     "EventName": "PM_IPTEG_FROM_RL2L3_SHR",
@@ -124,6 +119,11 @@
     "EventName": "PM_PMC1_SAVED",
     "BriefDescription": "PMC1 Rewind Value saved"
   },
+  {,
+    "EventCode": "0x44042",
+    "EventName": "PM_INST_FROM_L3",
+    "BriefDescription": "The processor's Instruction cache was reloaded from local core's L3 due to an instruction fetch (not prefetch)"
+  },
   {,
     "EventCode": "0x200FE",
     "EventName": "PM_DATA_FROM_L2MISS",
-- 
cgit v1.2.3


From 699db111058798bcc9f92a93767062905a561bef Mon Sep 17 00:00:00 2001
From: Yisheng Xie <xieyisheng1@huawei.com>
Date: Tue, 13 Mar 2018 20:31:13 +0800
Subject: perf mmap: Discard head in overwrite_rb_find_range()

In overwrite mode, start will be set to head in perf_mmap__read_init().
Therefore, there is no need to set the start one more time in
overwrite_rb_find_range() and *start can be used as head instead of
passing head to overwrite_rb_find_range().

Signed-off-by: Yisheng Xie <xieyisheng1@huawei.com>
Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1520944274-37001-1-git-send-email-xieyisheng1@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/mmap.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 074c4fd3b67e..38ca3ffb9d61 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -199,19 +199,18 @@ int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd)
 	return 0;
 }
 
-static int overwrite_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end)
+static int overwrite_rb_find_range(void *buf, int mask, u64 *start, u64 *end)
 {
 	struct perf_event_header *pheader;
-	u64 evt_head = head;
+	u64 evt_head = *start;
 	int size = mask + 1;
 
-	pr_debug2("overwrite_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head);
-	pheader = (struct perf_event_header *)(buf + (head & mask));
-	*start = head;
+	pr_debug2("%s: buf=%p, start=%"PRIx64"\n", __func__, buf, *start);
+	pheader = (struct perf_event_header *)(buf + (*start & mask));
 	while (true) {
-		if (evt_head - head >= (unsigned int)size) {
+		if (evt_head - *start >= (unsigned int)size) {
 			pr_debug("Finished reading overwrite ring buffer: rewind\n");
-			if (evt_head - head > (unsigned int)size)
+			if (evt_head - *start > (unsigned int)size)
 				evt_head -= pheader->size;
 			*end = evt_head;
 			return 0;
@@ -262,7 +261,7 @@ int perf_mmap__read_init(struct perf_mmap *md)
 		 * Backward ring buffer is full. We still have a chance to read
 		 * most of data from it.
 		 */
-		if (overwrite_rb_find_range(data, md->mask, head, &md->start, &md->end))
+		if (overwrite_rb_find_range(data, md->mask, &md->start, &md->end))
 			return -EINVAL;
 	}
 
-- 
cgit v1.2.3


From a08f6dd4190e90dc7b013435acb66770f117e8b0 Mon Sep 17 00:00:00 2001
From: Yisheng Xie <xieyisheng1@huawei.com>
Date: Tue, 13 Mar 2018 20:31:14 +0800
Subject: perf debug: Avoid setting 'quiet' to 'true' unnecessarily

When using --quiet to disable messages, we will set the 'quiet' variable
to 'true' first, then check that variable to decide whether we need to
call perf_quiet_option(), so no need to set 'quiet' to 'true' once more
in perf_quiet_option().

Signed-off-by: Yisheng Xie <xieyisheng1@huawei.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1520944274-37001-2-git-send-email-xieyisheng1@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/debug.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index f3a71db83947..3d6459626c2a 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -232,7 +232,6 @@ int perf_quiet_option(void)
 		var++;
 	}
 
-	quiet = true;
 	return 0;
 }
 
-- 
cgit v1.2.3


From 77f18153c080855e1c3fb520ca31a4e61530121d Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Mon, 19 Mar 2018 09:29:01 +0100
Subject: perf tools: Fix snprint warnings for gcc 8
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With gcc 8 we get new set of snprintf() warnings that breaks the
compilation, one example:

  tests/mem.c: In function ‘check’:
  tests/mem.c:19:48: error: ‘%s’ directive output may be truncated writing \
        up to 99 bytes into a region of size 89 [-Werror=format-truncation=]
    snprintf(failure, sizeof failure, "unexpected %s", out);

The gcc docs says:

 To avoid the warning either use a bigger buffer or handle the
 function's return value which indicates whether or not its output
 has been truncated.

Given that all these warnings are harmless, because the code either
properly fails due to uncomplete file path or we don't care for
truncated output at all, I'm changing all those snprintf() calls to
scnprintf(), which actually 'checks' for the snprint return value so the
gcc stays silent.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Link: http://lkml.kernel.org/r/20180319082902.4518-1-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-script.c    | 22 +++++++++++-----------
 tools/perf/tests/attr.c        |  4 ++--
 tools/perf/tests/mem.c         |  2 +-
 tools/perf/tests/pmu.c         |  2 +-
 tools/perf/util/cgroup.c       |  2 +-
 tools/perf/util/parse-events.c |  4 ++--
 tools/perf/util/pmu.c          |  2 +-
 7 files changed, 19 insertions(+), 19 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index cce926aeb0c0..313c42423393 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -2674,8 +2674,8 @@ static int list_available_scripts(const struct option *opt __maybe_unused,
 	}
 
 	for_each_lang(scripts_path, scripts_dir, lang_dirent) {
-		snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path,
-			 lang_dirent->d_name);
+		scnprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path,
+			  lang_dirent->d_name);
 		lang_dir = opendir(lang_path);
 		if (!lang_dir)
 			continue;
@@ -2684,8 +2684,8 @@ static int list_available_scripts(const struct option *opt __maybe_unused,
 			script_root = get_script_root(script_dirent, REPORT_SUFFIX);
 			if (script_root) {
 				desc = script_desc__findnew(script_root);
-				snprintf(script_path, MAXPATHLEN, "%s/%s",
-					 lang_path, script_dirent->d_name);
+				scnprintf(script_path, MAXPATHLEN, "%s/%s",
+					  lang_path, script_dirent->d_name);
 				read_script_info(desc, script_path);
 				free(script_root);
 			}
@@ -2721,7 +2721,7 @@ static int check_ev_match(char *dir_name, char *scriptname,
 	int match, len;
 	FILE *fp;
 
-	sprintf(filename, "%s/bin/%s-record", dir_name, scriptname);
+	scnprintf(filename, MAXPATHLEN, "%s/bin/%s-record", dir_name, scriptname);
 
 	fp = fopen(filename, "r");
 	if (!fp)
@@ -2799,8 +2799,8 @@ int find_scripts(char **scripts_array, char **scripts_path_array)
 	}
 
 	for_each_lang(scripts_path, scripts_dir, lang_dirent) {
-		snprintf(lang_path, MAXPATHLEN, "%s/%s", scripts_path,
-			 lang_dirent->d_name);
+		scnprintf(lang_path, MAXPATHLEN, "%s/%s", scripts_path,
+			  lang_dirent->d_name);
 #ifdef NO_LIBPERL
 		if (strstr(lang_path, "perl"))
 			continue;
@@ -2855,8 +2855,8 @@ static char *get_script_path(const char *script_root, const char *suffix)
 		return NULL;
 
 	for_each_lang(scripts_path, scripts_dir, lang_dirent) {
-		snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path,
-			 lang_dirent->d_name);
+		scnprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path,
+			  lang_dirent->d_name);
 		lang_dir = opendir(lang_path);
 		if (!lang_dir)
 			continue;
@@ -2867,8 +2867,8 @@ static char *get_script_path(const char *script_root, const char *suffix)
 				free(__script_root);
 				closedir(lang_dir);
 				closedir(scripts_dir);
-				snprintf(script_path, MAXPATHLEN, "%s/%s",
-					 lang_path, script_dirent->d_name);
+				scnprintf(script_path, MAXPATHLEN, "%s/%s",
+					  lang_path, script_dirent->d_name);
 				return strdup(script_path);
 			}
 			free(__script_root);
diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c
index 97f64ad7fa08..05dfe11c2f9e 100644
--- a/tools/perf/tests/attr.c
+++ b/tools/perf/tests/attr.c
@@ -170,8 +170,8 @@ static int run_dir(const char *d, const char *perf)
 	if (verbose > 0)
 		vcnt++;
 
-	snprintf(cmd, 3*PATH_MAX, PYTHON " %s/attr.py -d %s/attr/ -p %s %.*s",
-		 d, d, perf, vcnt, v);
+	scnprintf(cmd, 3*PATH_MAX, PYTHON " %s/attr.py -d %s/attr/ -p %s %.*s",
+		  d, d, perf, vcnt, v);
 
 	return system(cmd) ? TEST_FAIL : TEST_OK;
 }
diff --git a/tools/perf/tests/mem.c b/tools/perf/tests/mem.c
index 21952e1e6e6d..0f82ee9fd3f7 100644
--- a/tools/perf/tests/mem.c
+++ b/tools/perf/tests/mem.c
@@ -16,7 +16,7 @@ static int check(union perf_mem_data_src data_src,
 
 	n = perf_mem__snp_scnprintf(out, sizeof out, &mi);
 	n += perf_mem__lvl_scnprintf(out + n, sizeof out - n, &mi);
-	snprintf(failure, sizeof failure, "unexpected %s", out);
+	scnprintf(failure, sizeof failure, "unexpected %s", out);
 	TEST_ASSERT_VAL(failure, !strcmp(string, out));
 	return 0;
 }
diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c
index 9abca267afa9..7bedf8608fdd 100644
--- a/tools/perf/tests/pmu.c
+++ b/tools/perf/tests/pmu.c
@@ -98,7 +98,7 @@ static char *test_format_dir_get(void)
 		struct test_format *format = &test_formats[i];
 		FILE *file;
 
-		snprintf(name, PATH_MAX, "%s/%s", dir, format->name);
+		scnprintf(name, PATH_MAX, "%s/%s", dir, format->name);
 
 		file = fopen(name, "w");
 		if (!file)
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index 78408f5c4bad..decb91f9da82 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -81,7 +81,7 @@ static int open_cgroup(const char *name)
 	if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1))
 		return -1;
 
-	snprintf(path, PATH_MAX, "%s/%s", mnt, name);
+	scnprintf(path, PATH_MAX, "%s/%s", mnt, name);
 
 	fd = open(path, O_RDONLY);
 	if (fd == -1)
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 4e80ca320399..2fb0272146d8 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -206,8 +206,8 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config)
 
 		for_each_event(sys_dirent, evt_dir, evt_dirent) {
 
-			snprintf(evt_path, MAXPATHLEN, "%s/%s/id", dir_path,
-				 evt_dirent->d_name);
+			scnprintf(evt_path, MAXPATHLEN, "%s/%s/id", dir_path,
+				  evt_dirent->d_name);
 			fd = open(evt_path, O_RDONLY);
 			if (fd < 0)
 				continue;
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 1111d5bf15ca..064bdcb7bd78 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -351,7 +351,7 @@ static int pmu_aliases_parse(char *dir, struct list_head *head)
 		if (pmu_alias_info_file(name))
 			continue;
 
-		snprintf(path, PATH_MAX, "%s/%s", dir, name);
+		scnprintf(path, PATH_MAX, "%s/%s", dir, name);
 
 		file = fopen(path, "r");
 		if (!file) {
-- 
cgit v1.2.3


From b7a313d84e853049062011d78cb04b6decd12f5c Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Mon, 19 Mar 2018 09:29:02 +0100
Subject: perf tools: Fix python extension build for gcc 8
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The gcc 8 compiler won't compile the python extension code with the
following errors (one example):

  python.c:830:15: error: cast between incompatible  function types from              \
  ‘PyObject * (*)(struct pyrf_evsel *, PyObject *, PyObject *)’                       \
  uct _object * (*)(struct pyrf_evsel *, struct _object *, struct _object *)’} to     \
  ‘PyObject * (*)(PyObject *, PyObject *)’ {aka ‘struct _object * (*)(struct _objeuct \
  _object *)’} [-Werror=cast-function-type]
     .ml_meth  = (PyCFunction)pyrf_evsel__open,

The problem with the PyMethodDef::ml_meth callback is that its type is
determined based on the PyMethodDef::ml_flags value, which we set as
METH_VARARGS | METH_KEYWORDS.

That indicates that the callback is expecting an extra PyObject* arg, and is
actually PyCFunctionWithKeywords type, but the base PyMethodDef::ml_meth type
stays PyCFunction.

Previous gccs did not find this, gcc8 now does. Fixing this by silencing this
warning for python.c build.

Commiter notes:

Do not do that for CC=clang, as it breaks the build in some clang
versions, like the ones in fedora up to fedora27:

  fedora:25:error: unknown warning option '-Wno-cast-function-type'; did you mean '-Wno-bad-function-cast'? [-Werror,-Wunknown-warning-option]
  fedora:26:error: unknown warning option '-Wno-cast-function-type'; did you mean '-Wno-bad-function-cast'? [-Werror,-Wunknown-warning-option]
  fedora:27:error: unknown warning option '-Wno-cast-function-type'; did you mean '-Wno-bad-function-cast'? [-Werror,-Wunknown-warning-option]
  #

those have:

  clang version 3.9.1 (tags/RELEASE_391/final)

The one in rawhide accepts that:

  clang version 6.0.0 (tags/RELEASE_600/final)

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Link: http://lkml.kernel.org/r/20180319082902.4518-2-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/setup.py | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
index 6891635b50c3..001be4f9d3b9 100644
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py
@@ -28,6 +28,8 @@ class install_lib(_install_lib):
 cflags = getenv('CFLAGS', '').split()
 # switch off several checks (need to be at the end of cflags list)
 cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter' ]
+if cc != "clang":
+    cflags += ['-Wno-cast-function-type' ]
 
 src_perf  = getenv('srctree') + '/tools/perf'
 build_lib = getenv('PYTHON_EXTBUILD_LIB')
-- 
cgit v1.2.3


From a8403912d04e2c8271653bb5b7f6294dc6d322ac Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 16 Mar 2018 16:24:34 -0300
Subject: perf top: Document --ignore-vmlinux

We've had this since 2013, document it.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Cc: Willy Tarreau <w@1wt.eu>
Fixes: fc2be6968e99 ("perf symbols: Add new option --ignore-vmlinux for perf top")
Link: https://lkml.kernel.org/n/tip-0jwfueooddwfsw9r603belxi@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-top.txt | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index a039407d63b8..114fda12aa49 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -67,6 +67,9 @@ Default is to monitor all CPUS.
 --vmlinux=<path>::
 	Path to vmlinux.  Required for annotation functionality.
 
+--ignore-vmlinux::
+	Ignore vmlinux files.
+
 -m <pages>::
 --mmap-pages=<pages>::
 	Number of mmap data pages (must be a power of two) or size
-- 
cgit v1.2.3


From 4c9cb2c2b4b5530717f74b2252f8cc4c45b2a918 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 16 Mar 2018 13:28:09 -0300
Subject: perf annotate: Use ops->target.name when available for unresolved
 call targets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is a bug where when using 'perf annotate timerqueue_add' the
target for its only routine called with the 'callq' instruction,
'rb_insert_color', doesn't get resolved from its address when parsing
that 'callq' instruction.

That symbol resolution works when using 'perf report --tui' and then
doing annotation for 'timerqueue_add' from there, the vmlinux
dso->symbols rb_tree somehow gets in a state that we can't find that
address, that is a bug that has to be further investigated.

But since the objdump output has the function name, i.e. the raw objdump
disassembled line looks like:

So, before:

  # perf annotate timerqueue_add

              │      mov    %rbx,%rdi
              │      mov    %rbx,(%rdx)
              │    → callq  *ffffffff8184dc80
              │      mov    0x8(%rbp),%rdx
              │      test   %rdx,%rdx
              │    ↓ je     67

  # perf report

              │      mov    %rbx,%rdi
              │      mov    %rbx,(%rdx)
              │    → callq  rb_insert_color
              │      mov    0x8(%rbp),%rdx
              │      test   %rdx,%rdx
              │    ↓ je     67

And after both look the same:

  # perf annotate timerqueue_add

              │      mov    %rbx,%rdi
              │      mov    %rbx,(%rdx)
              │    → callq  rb_insert_color
              │      mov    0x8(%rbp),%rdx
              │      test   %rdx,%rdx
              │    ↓ je     67

From 'perf report' one can annotate and navigate to that 'rb_insert_color'
function, but not directly from 'perf annotate timerqueue_add', that
remains to be investigated and fixed.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-nkktz6355rhqtq7o8atr8f8r@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/annotate.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index ddad87f34a68..535357c6ce02 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -238,6 +238,9 @@ static int call__scnprintf(struct ins *ins, char *bf, size_t size,
 	if (ops->target.addr == 0)
 		return ins__raw_scnprintf(ins, bf, size, ops);
 
+	if (ops->target.name)
+		return scnprintf(bf, size, "%-6s %s", ins->name, ops->target.name);
+
 	return scnprintf(bf, size, "%-6s *%" PRIx64, ins->name, ops->target.addr);
 }
 
-- 
cgit v1.2.3


From d0461794a1dcaf552b507e23788777f718b736a1 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Sat, 17 Mar 2018 21:52:25 +0900
Subject: perf probe: Use right type to access array elements

Current 'perf probe' converts the type of array-elements incorrectly. It
always converts the types as a pointer of array. This passes the "array"
type DIE to the type converter so that it can get correct "element of
array" type DIE from it.

E.g.
  ====
  $ cat hello.c
  #include <stdio.h>

  void foo(int a[])
  {
	  printf("%d\n", a[1]);
  }

  void main()
  {
	  int a[3] = {4, 5, 6};
	  printf("%d\n", a[0]);
	  foo(a);
  }

  $ gcc -g hello.c -o hello
  $ perf probe -x ./hello -D "foo a[1]"
  ====

Without this fix, above outputs
  ====
  p:probe_hello/foo /tmp/hello:0x4d3 a=+4(-8(%bp)):u64
  ====
The "u64" means "int *", but a[1] is "int".

With this,
  ====
  p:probe_hello/foo /tmp/hello:0x4d3 a=+4(-8(%bp)):s32
  ====
So, "int" correctly converted to "s32"

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tom Zanussi <tom.zanussi@linux.intel.com>
Cc: linux-kselftest@vger.kernel.org
Cc: linux-trace-users@vger.kernel.org
Fixes: b2a3c12b7442 ("perf probe: Support tracing an entry of array")
Link: http://lkml.kernel.org/r/152129114502.31874.2474068470011496356.stgit@devbox
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/probe-finder.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index a5731de0e5eb..c37fbef1711d 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -423,20 +423,20 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname,
 		pr_warning("Failed to get the type of %s.\n", varname);
 		return -ENOENT;
 	}
-	pr_debug2("Var real type: (%x)\n", (unsigned)dwarf_dieoffset(&type));
+	pr_debug2("Var real type: %s (%x)\n", dwarf_diename(&type),
+		  (unsigned)dwarf_dieoffset(&type));
 	tag = dwarf_tag(&type);
 
 	if (field->name[0] == '[' &&
 	    (tag == DW_TAG_array_type || tag == DW_TAG_pointer_type)) {
-		if (field->next)
-			/* Save original type for next field */
-			memcpy(die_mem, &type, sizeof(*die_mem));
+		/* Save original type for next field or type */
+		memcpy(die_mem, &type, sizeof(*die_mem));
 		/* Get the type of this array */
 		if (die_get_real_type(&type, &type) == NULL) {
 			pr_warning("Failed to get the type of %s.\n", varname);
 			return -ENOENT;
 		}
-		pr_debug2("Array real type: (%x)\n",
+		pr_debug2("Array real type: %s (%x)\n", dwarf_diename(&type),
 			 (unsigned)dwarf_dieoffset(&type));
 		if (tag == DW_TAG_pointer_type) {
 			ref = zalloc(sizeof(struct probe_trace_arg_ref));
@@ -448,9 +448,6 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname,
 				*ref_ptr = ref;
 		}
 		ref->offset += dwarf_bytesize(&type) * field->index;
-		if (!field->next)
-			/* Save vr_die for converting types */
-			memcpy(die_mem, vr_die, sizeof(*die_mem));
 		goto next;
 	} else if (tag == DW_TAG_pointer_type) {
 		/* Check the pointer and dereference */
-- 
cgit v1.2.3


From 854e55ad289ef8888e7991f0ada85d5846f5afb9 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Thu, 15 Mar 2018 22:11:54 -0500
Subject: objtool, perf: Fix GCC 8 -Wrestrict error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Starting with recent GCC 8 builds, objtool and perf fail to build with
the following error:

  ../str_error_r.c: In function ‘str_error_r’:
  ../str_error_r.c:25:3: error: passing argument 1 to restrict-qualified parameter aliases with argument 5 [-Werror=restrict]
     snprintf(buf, buflen, "INTERNAL ERROR: strerror_r(%d, %p, %zd)=%d", errnum, buf, buflen, err);

The code seems harmless, but there's probably no benefit in printing the
'buf' pointer in this situation anyway, so just remove it to make GCC
happy.

Reported-by: Laura Abbott <labbott@redhat.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Tested-by: Laura Abbott <labbott@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/20180316031154.juk2uncs7baffctp@treble
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/str_error_r.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/str_error_r.c b/tools/lib/str_error_r.c
index d6d65537b0d9..6aad8308a0ac 100644
--- a/tools/lib/str_error_r.c
+++ b/tools/lib/str_error_r.c
@@ -22,6 +22,6 @@ char *str_error_r(int errnum, char *buf, size_t buflen)
 {
 	int err = strerror_r(errnum, buf, buflen);
 	if (err)
-		snprintf(buf, buflen, "INTERNAL ERROR: strerror_r(%d, %p, %zd)=%d", errnum, buf, buflen, err);
+		snprintf(buf, buflen, "INTERNAL ERROR: strerror_r(%d, [buf], %zd)=%d", errnum, buflen, err);
 	return buf;
 }
-- 
cgit v1.2.3


From 1cd618838b9703eabe4a75badf433382b12f6bef Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 19 Mar 2018 10:51:00 -0300
Subject: perf tests bp_account: Fix build with clang-6

To shut up this compiler warning:

    CC       /tmp/build/perf/tests/bp_account.o
    CC       /tmp/build/perf/tests/task-exit.o
    CC       /tmp/build/perf/tests/sw-clock.o
  tests/bp_account.c:106:20: error: pointer type mismatch ('int (*)(void)' and 'void *') [-Werror,-Wpointer-type-mismatch]
          void *addr = is_x ? test_function : (void *) &the_var;
                            ^ ~~~~~~~~~~~~~   ~~~~~~~~~~~~~~~~~
  1 error generated.

Noticed with clang 6 on fedora rawhide.

  [perfbuilder@44490f0e7241 perf]$ clang -v
  clang version 6.0.0 (tags/RELEASE_600/final)
  Target: x86_64-unknown-linux-gnu
  Thread model: posix
  InstalledDir: /usr/bin
  Found candidate GCC installation: /usr/bin/../lib/gcc/x86_64-redhat-linux/8
  Found candidate GCC installation: /usr/lib/gcc/x86_64-redhat-linux/8
  Selected GCC installation: /usr/bin/../lib/gcc/x86_64-redhat-linux/8
  Candidate multilib: .;@m64
  Candidate multilib: 32;@m32
  Selected multilib: .;@m64
  [perfbuilder@44490f0e7241 perf]$

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Fixes: 032db28e5fa3 ("perf tests: Add breakpoint accounting/modify test")
Link: https://lkml.kernel.org/n/tip-a3jnkzh4xam0l954de5tn66d@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/bp_account.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/tests/bp_account.c b/tools/perf/tests/bp_account.c
index 9e88d7608951..a20cbc445426 100644
--- a/tools/perf/tests/bp_account.c
+++ b/tools/perf/tests/bp_account.c
@@ -103,7 +103,7 @@ static int bp_accounting(int wp_cnt, int share)
 static int detect_cnt(bool is_x)
 {
 	struct perf_event_attr attr;
-	void *addr = is_x ? test_function : (void *) &the_var;
+	void *addr = is_x ? (void *)test_function : (void *)&the_var;
 	int fd[100], cnt = 0, i;
 
 	while (1) {
-- 
cgit v1.2.3


From 555fc3b1ef4c850c635be333024dcf67bc1e7cb8 Mon Sep 17 00:00:00 2001
From: Martin Vuille <jpmv27@aim.com>
Date: Sun, 18 Mar 2018 13:50:53 -0400
Subject: perf unwind: Report error from dwfl_attach_state

In verbose level 2, errors returned by libdw are reported in most cases,
but not when calling dwfl_attach_state.

Since elfutils v 0.160 (2014), dwfl_attach_state sets the error code to
report failure cause. On failure, log the reported error.

Signed-off-by: Martin Vuille <jpmv27@aim.com>
Reviewed-by: Kim Phillips <kim.phillips@arm.com>
Link: http://lkml.kernel.org/r/20180318175053.4222-1-jpmv27@aim.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/unwind-libdw.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c
index 8e969f28cc59..7bdd239c795c 100644
--- a/tools/perf/util/unwind-libdw.c
+++ b/tools/perf/util/unwind-libdw.c
@@ -236,7 +236,8 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
 	if (err)
 		goto out;
 
-	if (!dwfl_attach_state(ui->dwfl, EM_NONE, thread->tid, &callbacks, ui))
+	err = !dwfl_attach_state(ui->dwfl, EM_NONE, thread->tid, &callbacks, ui);
+	if (err)
 		goto out;
 
 	err = dwfl_getthread_frames(ui->dwfl, thread->tid, frame_callback, ui);
-- 
cgit v1.2.3


From 98bc80b0a199619d2987e61e42415a1ae7a1d7fb Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 15 Mar 2018 10:03:34 -0300
Subject: perf annotate: Move annotation_options out of the TUI browser

This will be useful when making parts of the TUI browser generic enough
to be used for a new stdio mode, available even when the TUI is not
built in, for explicit user decision or when the necessary library devel
files, for the slang library currently, are not available in the build
system.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-45twzienhz7ypbad0sbvojku@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/annotate.c | 10 +---------
 tools/perf/util/annotate.h        | 10 ++++++++++
 2 files changed, 11 insertions(+), 9 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 618edf96353c..69ec968ac0be 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -31,15 +31,7 @@ struct browser_line {
 	int	jump_sources;
 };
 
-static struct annotate_browser_opt {
-	bool hide_src_code,
-	     use_offset,
-	     jump_arrows,
-	     show_linenr,
-	     show_nr_jumps,
-	     show_nr_samples,
-	     show_total_period;
-} annotate_browser__opts = {
+static struct annotation_options annotate_browser__opts = {
 	.use_offset	= true,
 	.jump_arrows	= true,
 };
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 7e914e834101..e924033432a3 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -58,6 +58,16 @@ bool ins__is_lock(const struct ins *ins);
 int ins__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops);
 bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2);
 
+struct annotation_options {
+	bool hide_src_code,
+	     use_offset,
+	     jump_arrows,
+	     show_linenr,
+	     show_nr_jumps,
+	     show_nr_samples,
+	     show_total_period;
+};
+
 struct annotation;
 
 struct sym_hist_entry {
-- 
cgit v1.2.3


From c426e5849b6019a0da7957e1acce4762c77f7c71 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 15 Mar 2018 10:26:17 -0300
Subject: perf annotate: Move cycles/IPC formatting width constants outside TUI

These will be used in --stdio2 so lets move it first to reduce noise in
the following patches.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-fisud7pcak3prk7uwsvs3g2e@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/annotate.c | 19 ++++++++-----------
 tools/perf/util/annotate.h        |  3 +++
 2 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 69ec968ac0be..ca3d99c1e4d9 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -22,9 +22,6 @@ struct disasm_line_samples {
 	struct sym_hist_entry he;
 };
 
-#define IPC_WIDTH 6
-#define CYCLES_WIDTH 6
-
 struct browser_line {
 	u32	idx;
 	int	idx_asm;
@@ -107,7 +104,7 @@ static int annotate_browser__pcnt_width(struct annotate_browser *ab)
 
 static int annotate_browser__cycles_width(struct annotate_browser *ab)
 {
-	return ab->have_cycles ? IPC_WIDTH + CYCLES_WIDTH : 0;
+	return ab->have_cycles ? ANNOTATION__IPC_WIDTH + ANNOTATION__CYCLES_WIDTH : 0;
 }
 
 static void disasm_line__write(struct disasm_line *dl, struct ui_browser *browser,
@@ -194,19 +191,19 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
 	}
 	if (ab->have_cycles) {
 		if (al->ipc)
-			ui_browser__printf(browser, "%*.2f ", IPC_WIDTH - 1, al->ipc);
+			ui_browser__printf(browser, "%*.2f ", ANNOTATION__IPC_WIDTH - 1, al->ipc);
 		else if (!show_title)
-			ui_browser__write_nstring(browser, " ", IPC_WIDTH);
+			ui_browser__write_nstring(browser, " ", ANNOTATION__IPC_WIDTH);
 		else
-			ui_browser__printf(browser, "%*s ", IPC_WIDTH - 1, "IPC");
+			ui_browser__printf(browser, "%*s ", ANNOTATION__IPC_WIDTH - 1, "IPC");
 
 		if (al->cycles)
 			ui_browser__printf(browser, "%*" PRIu64 " ",
-					   CYCLES_WIDTH - 1, al->cycles);
+					   ANNOTATION__CYCLES_WIDTH - 1, al->cycles);
 		else if (!show_title)
-			ui_browser__write_nstring(browser, " ", CYCLES_WIDTH);
+			ui_browser__write_nstring(browser, " ", ANNOTATION__CYCLES_WIDTH);
 		else
-			ui_browser__printf(browser, "%*s ", CYCLES_WIDTH - 1, "Cycle");
+			ui_browser__printf(browser, "%*s ", ANNOTATION__CYCLES_WIDTH - 1, "Cycle");
 	}
 
 	SLsmg_write_char(' ');
@@ -359,7 +356,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
 	}
 
 	if (ab->have_cycles)
-		width = IPC_WIDTH + CYCLES_WIDTH;
+		width = ANNOTATION__IPC_WIDTH + ANNOTATION__CYCLES_WIDTH;
 
 	ui_browser__set_color(browser, HE_COLORSET_JUMP_ARROWS);
 	__ui_browser__line_arrow(browser,
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index e924033432a3..0407234b5157 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -58,6 +58,9 @@ bool ins__is_lock(const struct ins *ins);
 int ins__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops);
 bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2);
 
+#define ANNOTATION__IPC_WIDTH 6
+#define ANNOTATION__CYCLES_WIDTH 6
+
 struct annotation_options {
 	bool hide_src_code,
 	     use_offset,
-- 
cgit v1.2.3


From 00ea0eb21e9ba38636608e3eb0ac332fc022d5bb Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 15 Mar 2018 10:28:06 -0300
Subject: perf annotate tui: Use annotate_browser__cycles_width() mroe

Instead of an open coded equivalent, will reduce a bit noise in
the following patches.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-pnwn1dg9345zawhgiorpsadf@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/annotate.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index ca3d99c1e4d9..0f84a52b4088 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -308,7 +308,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
 	struct map_symbol *ms = ab->b.priv;
 	struct symbol *sym = ms->sym;
 	u8 pcnt_width = annotate_browser__pcnt_width(ab);
-	int width = 0;
+	int width;
 
 	/* PLT symbols contain external offsets */
 	if (strstr(sym->name, "@plt"))
@@ -355,8 +355,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
 		to = (u64)btarget->idx;
 	}
 
-	if (ab->have_cycles)
-		width = ANNOTATION__IPC_WIDTH + ANNOTATION__CYCLES_WIDTH;
+	width = annotate_browser__cycles_width(ab);
 
 	ui_browser__set_color(browser, HE_COLORSET_JUMP_ARROWS);
 	__ui_browser__line_arrow(browser,
-- 
cgit v1.2.3


From 0e83a7e9e592cdf60696076071b2629adc569826 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 15 Mar 2018 10:35:04 -0300
Subject: perf annotate tui: Move have_cycles to struct annotation

This is to pave the way to have more functions shared between TUI, stdio
and the upcoming stdio2 formatting, that will use the __scnprintf
functions used by --tui in a --stdio fashion.

This partially addresses the comments added in cset 30e863bb6f70 ("perf
annotate: Compute IPC and basic block cycles"):

/*
 * This should probably be in util/annotate.c to share with the tty
 * annotate, but right now we need the per byte offsets arrays,
 * which are only here.
 */

The following patches will address the rest.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-yftvybgx1s8sevs6kp1an0ft@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/annotate.c | 20 +++++++++-----------
 tools/perf/util/annotate.h        |  6 ++++++
 2 files changed, 15 insertions(+), 11 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 0f84a52b4088..0dd64707125a 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -49,7 +49,6 @@ struct annotate_browser {
 	int			    max_jump_sources;
 	int			    nr_jumps;
 	bool			    searching_backwards;
-	bool			    have_cycles;
 	u8			    addr_width;
 	u8			    jumps_width;
 	u8			    target_width;
@@ -102,11 +101,6 @@ static int annotate_browser__pcnt_width(struct annotate_browser *ab)
 	return (annotate_browser__opts.show_total_period ? 12 : 7) * ab->nr_events;
 }
 
-static int annotate_browser__cycles_width(struct annotate_browser *ab)
-{
-	return ab->have_cycles ? ANNOTATION__IPC_WIDTH + ANNOTATION__CYCLES_WIDTH : 0;
-}
-
 static void disasm_line__write(struct disasm_line *dl, struct ui_browser *browser,
 			       char *bf, size_t size)
 {
@@ -136,6 +130,9 @@ static void disasm_line__write(struct disasm_line *dl, struct ui_browser *browse
 static void annotate_browser__write(struct ui_browser *browser, void *entry, int row)
 {
 	struct annotate_browser *ab = container_of(browser, struct annotate_browser, b);
+	struct map_symbol *ms = browser->priv;
+	struct symbol *sym = ms->sym;
+	struct annotation *notes = symbol__annotation(sym);
 	struct annotation_line *al = list_entry(entry, struct annotation_line, node);
 	struct browser_line *bl = browser_line(al);
 	bool current_entry = ui_browser__is_current_entry(browser, row);
@@ -144,7 +141,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
 					         !browser->navkeypressed)));
 	int width = browser->width, printed;
 	int i, pcnt_width = annotate_browser__pcnt_width(ab),
-	       cycles_width = annotate_browser__cycles_width(ab);
+	       cycles_width = annotation__cycles_width(notes);
 	double percent_max = 0.0;
 	char bf[256];
 	bool show_title = false;
@@ -155,7 +152,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
 	}
 
 	if ((row == 0) && (al->offset == -1 || percent_max == 0.0)) {
-		if (ab->have_cycles) {
+		if (notes->have_cycles) {
 			if (al->ipc == 0.0 && al->cycles == 0)
 				show_title = true;
 		} else
@@ -189,7 +186,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
 					   annotate_browser__opts.show_nr_samples ? "Samples" : "Percent");
 		}
 	}
-	if (ab->have_cycles) {
+	if (notes->have_cycles) {
 		if (al->ipc)
 			ui_browser__printf(browser, "%*.2f ", ANNOTATION__IPC_WIDTH - 1, al->ipc);
 		else if (!show_title)
@@ -307,6 +304,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
 	unsigned int from, to;
 	struct map_symbol *ms = ab->b.priv;
 	struct symbol *sym = ms->sym;
+	struct annotation *notes = symbol__annotation(sym);
 	u8 pcnt_width = annotate_browser__pcnt_width(ab);
 	int width;
 
@@ -355,7 +353,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
 		to = (u64)btarget->idx;
 	}
 
-	width = annotate_browser__cycles_width(ab);
+	width = annotation__cycles_width(notes);
 
 	ui_browser__set_color(browser, HE_COLORSET_JUMP_ARROWS);
 	__ui_browser__line_arrow(browser,
@@ -1033,7 +1031,7 @@ static void annotate__compute_ipc(struct annotate_browser *browser, size_t size,
 			al = browser->offsets[offset];
 			if (al && ch->num_aggr)
 				al->cycles = ch->cycles_aggr / ch->num_aggr;
-			browser->have_cycles = true;
+			notes->have_cycles = true;
 		}
 	}
 	pthread_mutex_unlock(&notes->lock);
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 0407234b5157..23d2f32b7bf0 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -164,9 +164,15 @@ struct annotated_source {
 struct annotation {
 	pthread_mutex_t		lock;
 	u64			max_coverage;
+	bool			have_cycles;
 	struct annotated_source *src;
 };
 
+static inline int annotation__cycles_width(struct annotation *notes)
+{
+	return notes->have_cycles ? ANNOTATION__IPC_WIDTH + ANNOTATION__CYCLES_WIDTH : 0;
+}
+
 static inline struct sym_hist *annotation__histogram(struct annotation *notes, int idx)
 {
 	return (((void *)&notes->src->histograms) +
-- 
cgit v1.2.3


From 9d6bb41d1ced7288b54a79a2c15d750085012215 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 15 Mar 2018 10:47:54 -0300
Subject: perf annotate: Move annotation_line array from TUI to generic code

This is needed to reduce the differences between the TUI mode and the
other annotation UIs, next csets will move that code to the UI-neutral
annotation library. Leaving it in place for now to ease review.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-gz09ahsd5xm1eip7ura5ow6x@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/annotate.c | 41 ++++++++++++++++++---------------------
 tools/perf/util/annotate.h        |  1 +
 2 files changed, 20 insertions(+), 22 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 0dd64707125a..e89b68612408 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -40,7 +40,6 @@ struct annotate_browser {
 	struct rb_root		    entries;
 	struct rb_node		   *curr_hot;
 	struct annotation_line	   *selection;
-	struct annotation_line	  **offsets;
 	struct arch		   *arch;
 	int			    nr_events;
 	u64			    start;
@@ -335,9 +334,9 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
 	 *  name right after the '<' token and probably treating this like a
 	 *  'call' instruction.
 	 */
-	target = ab->offsets[cursor->ops.target.offset];
+	target = notes->offsets[cursor->ops.target.offset];
 	if (target == NULL) {
-		ui_helpline__printf("WARN: jump target inconsistency, press 'o', ab->offsets[%#x] = NULL\n",
+		ui_helpline__printf("WARN: jump target inconsistency, press 'o', notes->offsets[%#x] = NULL\n",
 				    cursor->ops.target.offset);
 		return;
 	}
@@ -969,25 +968,25 @@ int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel,
 }
 
 
-static unsigned count_insn(struct annotate_browser *browser, u64 start, u64 end)
+static unsigned count_insn(struct annotation *notes, u64 start, u64 end)
 {
 	unsigned n_insn = 0;
 	u64 offset;
 
 	for (offset = start; offset <= end; offset++) {
-		if (browser->offsets[offset])
+		if (notes->offsets[offset])
 			n_insn++;
 	}
 	return n_insn;
 }
 
-static void count_and_fill(struct annotate_browser *browser, u64 start, u64 end,
+static void count_and_fill(struct annotation *notes, u64 start, u64 end,
 			   struct cyc_hist *ch)
 {
 	unsigned n_insn;
 	u64 offset;
 
-	n_insn = count_insn(browser, start, end);
+	n_insn = count_insn(notes, start, end);
 	if (n_insn && ch->num && ch->cycles) {
 		float ipc = n_insn / ((double)ch->cycles / (double)ch->num);
 
@@ -996,7 +995,7 @@ static void count_and_fill(struct annotate_browser *browser, u64 start, u64 end,
 			return;
 
 		for (offset = start; offset <= end; offset++) {
-			struct annotation_line *al = browser->offsets[offset];
+			struct annotation_line *al = notes->offsets[offset];
 
 			if (al)
 				al->ipc = ipc;
@@ -1009,11 +1008,9 @@ static void count_and_fill(struct annotate_browser *browser, u64 start, u64 end,
  * annotate, but right now we need the per byte offsets arrays,
  * which are only here.
  */
-static void annotate__compute_ipc(struct annotate_browser *browser, size_t size,
-			   struct symbol *sym)
+static void annotate__compute_ipc(struct annotation *notes, size_t size)
 {
 	u64 offset;
-	struct annotation *notes = symbol__annotation(sym);
 
 	if (!notes->src || !notes->src->cycles_hist)
 		return;
@@ -1027,8 +1024,8 @@ static void annotate__compute_ipc(struct annotate_browser *browser, size_t size,
 			struct annotation_line *al;
 
 			if (ch->have_start)
-				count_and_fill(browser, ch->start, offset, ch);
-			al = browser->offsets[offset];
+				count_and_fill(notes, ch->start, offset, ch);
+			al = notes->offsets[offset];
 			if (al && ch->num_aggr)
 				al->cycles = ch->cycles_aggr / ch->num_aggr;
 			notes->have_cycles = true;
@@ -1043,13 +1040,14 @@ static void annotate_browser__mark_jump_targets(struct annotate_browser *browser
 	u64 offset;
 	struct map_symbol *ms = browser->b.priv;
 	struct symbol *sym = ms->sym;
+	struct annotation *notes = symbol__annotation(sym);
 
 	/* PLT symbols contain external offsets */
 	if (strstr(sym->name, "@plt"))
 		return;
 
 	for (offset = 0; offset < size; ++offset) {
-		struct annotation_line *al = browser->offsets[offset];
+		struct annotation_line *al = notes->offsets[offset];
 		struct disasm_line *dl;
 		struct browser_line *blt;
 
@@ -1058,7 +1056,7 @@ static void annotate_browser__mark_jump_targets(struct annotate_browser *browser
 		if (!disasm_line__is_valid_jump(dl, sym))
 			continue;
 
-		al = browser->offsets[dl->ops.target.offset];
+		al = notes->offsets[dl->ops.target.offset];
 
 		/*
  		 * FIXME: Oops, no jump target? Buggy disassembler? Or do we
@@ -1089,7 +1087,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
 			 struct hist_browser_timer *hbt)
 {
 	struct annotation_line *al;
-	struct annotation *notes;
+	struct annotation *notes = symbol__annotation(sym);
 	size_t size;
 	struct map_symbol ms = {
 		.map = map,
@@ -1116,8 +1114,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
 	if (map->dso->annotate_warned)
 		return -1;
 
-	browser.offsets = zalloc(size * sizeof(struct annotation_line *));
-	if (browser.offsets == NULL) {
+	notes->offsets = zalloc(size * sizeof(struct annotation_line *));
+	if (notes->offsets == NULL) {
 		ui__error("Not enough memory!");
 		return -1;
 	}
@@ -1137,7 +1135,6 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
 
 	ui_helpline__push("Press ESC to exit");
 
-	notes = symbol__annotation(sym);
 	browser.start = map__rip_2objdump(map, sym->start);
 
 	list_for_each_entry(al, &notes->src->source, node) {
@@ -1158,13 +1155,13 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
 			 * E.g. copy_user_generic_unrolled
  			 */
 			if (al->offset < (s64)size)
-				browser.offsets[al->offset] = al;
+				notes->offsets[al->offset] = al;
 		} else
 			bpos->idx_asm = -1;
 	}
 
 	annotate_browser__mark_jump_targets(&browser, size);
-	annotate__compute_ipc(&browser, size, sym);
+	annotate__compute_ipc(notes, size);
 
 	browser.addr_width = browser.target_width = browser.min_addr_width = hex_width(size);
 	browser.max_addr_width = hex_width(sym->end);
@@ -1184,7 +1181,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
 	annotated_source__purge(notes->src);
 
 out_free_offsets:
-	free(browser.offsets);
+	zfree(&notes->offsets);
 	return ret;
 }
 
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 23d2f32b7bf0..d2c3436830a9 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -164,6 +164,7 @@ struct annotated_source {
 struct annotation {
 	pthread_mutex_t		lock;
 	u64			max_coverage;
+	struct annotation_line	**offsets;
 	bool			have_cycles;
 	struct annotated_source *src;
 };
-- 
cgit v1.2.3


From f56c083bc4f9bca6f4d75d13b93720915185a8e8 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 15 Mar 2018 11:46:23 -0300
Subject: perf annotate: Move compute_ipc() to annotation library

Out of the TUI code, as it has nothing specific to that UI and should be
used in the other output modes as well.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-0jahghvqdodb8vu2591pkv3d@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/annotate.c | 69 +--------------------------------------
 tools/perf/util/annotate.c        | 60 ++++++++++++++++++++++++++++++++++
 tools/perf/util/annotate.h        |  2 ++
 3 files changed, 63 insertions(+), 68 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index e89b68612408..353edff3fdbb 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -967,73 +967,6 @@ int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel,
 	return map_symbol__tui_annotate(&he->ms, evsel, hbt);
 }
 
-
-static unsigned count_insn(struct annotation *notes, u64 start, u64 end)
-{
-	unsigned n_insn = 0;
-	u64 offset;
-
-	for (offset = start; offset <= end; offset++) {
-		if (notes->offsets[offset])
-			n_insn++;
-	}
-	return n_insn;
-}
-
-static void count_and_fill(struct annotation *notes, u64 start, u64 end,
-			   struct cyc_hist *ch)
-{
-	unsigned n_insn;
-	u64 offset;
-
-	n_insn = count_insn(notes, start, end);
-	if (n_insn && ch->num && ch->cycles) {
-		float ipc = n_insn / ((double)ch->cycles / (double)ch->num);
-
-		/* Hide data when there are too many overlaps. */
-		if (ch->reset >= 0x7fff || ch->reset >= ch->num / 2)
-			return;
-
-		for (offset = start; offset <= end; offset++) {
-			struct annotation_line *al = notes->offsets[offset];
-
-			if (al)
-				al->ipc = ipc;
-		}
-	}
-}
-
-/*
- * This should probably be in util/annotate.c to share with the tty
- * annotate, but right now we need the per byte offsets arrays,
- * which are only here.
- */
-static void annotate__compute_ipc(struct annotation *notes, size_t size)
-{
-	u64 offset;
-
-	if (!notes->src || !notes->src->cycles_hist)
-		return;
-
-	pthread_mutex_lock(&notes->lock);
-	for (offset = 0; offset < size; ++offset) {
-		struct cyc_hist *ch;
-
-		ch = &notes->src->cycles_hist[offset];
-		if (ch && ch->cycles) {
-			struct annotation_line *al;
-
-			if (ch->have_start)
-				count_and_fill(notes, ch->start, offset, ch);
-			al = notes->offsets[offset];
-			if (al && ch->num_aggr)
-				al->cycles = ch->cycles_aggr / ch->num_aggr;
-			notes->have_cycles = true;
-		}
-	}
-	pthread_mutex_unlock(&notes->lock);
-}
-
 static void annotate_browser__mark_jump_targets(struct annotate_browser *browser,
 						size_t size)
 {
@@ -1161,7 +1094,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
 	}
 
 	annotate_browser__mark_jump_targets(&browser, size);
-	annotate__compute_ipc(notes, size);
+	annotation__compute_ipc(notes, size);
 
 	browser.addr_width = browser.target_width = browser.min_addr_width = hex_width(size);
 	browser.max_addr_width = hex_width(sym->end);
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 535357c6ce02..d737c33c87d0 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -833,6 +833,66 @@ int addr_map_symbol__account_cycles(struct addr_map_symbol *ams,
 	return err;
 }
 
+static unsigned annotation__count_insn(struct annotation *notes, u64 start, u64 end)
+{
+	unsigned n_insn = 0;
+	u64 offset;
+
+	for (offset = start; offset <= end; offset++) {
+		if (notes->offsets[offset])
+			n_insn++;
+	}
+	return n_insn;
+}
+
+static void annotation__count_and_fill(struct annotation *notes, u64 start, u64 end, struct cyc_hist *ch)
+{
+	unsigned n_insn;
+	u64 offset;
+
+	n_insn = annotation__count_insn(notes, start, end);
+	if (n_insn && ch->num && ch->cycles) {
+		float ipc = n_insn / ((double)ch->cycles / (double)ch->num);
+
+		/* Hide data when there are too many overlaps. */
+		if (ch->reset >= 0x7fff || ch->reset >= ch->num / 2)
+			return;
+
+		for (offset = start; offset <= end; offset++) {
+			struct annotation_line *al = notes->offsets[offset];
+
+			if (al)
+				al->ipc = ipc;
+		}
+	}
+}
+
+void annotation__compute_ipc(struct annotation *notes, size_t size)
+{
+	u64 offset;
+
+	if (!notes->src || !notes->src->cycles_hist)
+		return;
+
+	pthread_mutex_lock(&notes->lock);
+	for (offset = 0; offset < size; ++offset) {
+		struct cyc_hist *ch;
+
+		ch = &notes->src->cycles_hist[offset];
+		if (ch && ch->cycles) {
+			struct annotation_line *al;
+
+			if (ch->have_start)
+				annotation__count_and_fill(notes, ch->start, offset, ch);
+			al = notes->offsets[offset];
+			if (al && ch->num_aggr)
+				al->cycles = ch->cycles_aggr / ch->num_aggr;
+			notes->have_cycles = true;
+		}
+	}
+	pthread_mutex_unlock(&notes->lock);
+}
+
 int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_sample *sample,
 				 int evidx)
 {
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index d2c3436830a9..21fa5d7cd9e0 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -174,6 +174,8 @@ static inline int annotation__cycles_width(struct annotation *notes)
 	return notes->have_cycles ? ANNOTATION__IPC_WIDTH + ANNOTATION__CYCLES_WIDTH : 0;
 }
 
+void annotation__compute_ipc(struct annotation *notes, size_t size);
+
 static inline struct sym_hist *annotation__histogram(struct annotation *notes, int idx)
 {
 	return (((void *)&notes->src->histograms) +
-- 
cgit v1.2.3


From 0553e83dc10831ac5127c86fa5a78af940fa92a2 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 15 Mar 2018 11:52:32 -0300
Subject: perf annotate: Move nr_events from annotate_browser to annotation
 struct

Paving the way to move more stuff out of TUI and into the generic
annotation library.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-8vqax6wgfqohelot8j8zsfvs@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/annotate.c | 11 ++++++-----
 tools/perf/util/annotate.h        |  1 +
 2 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 353edff3fdbb..8a110765b328 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -41,7 +41,6 @@ struct annotate_browser {
 	struct rb_node		   *curr_hot;
 	struct annotation_line	   *selection;
 	struct arch		   *arch;
-	int			    nr_events;
 	u64			    start;
 	int			    nr_asm_entries;
 	int			    nr_entries;
@@ -97,7 +96,9 @@ static int annotate_browser__set_jumps_percent_color(struct annotate_browser *br
 
 static int annotate_browser__pcnt_width(struct annotate_browser *ab)
 {
-	return (annotate_browser__opts.show_total_period ? 12 : 7) * ab->nr_events;
+	struct map_symbol *ms = ab->b.priv;
+	struct annotation *notes = symbol__annotation(ms->sym);
+	return (annotate_browser__opts.show_total_period ? 12 : 7) * notes->nr_events;
 }
 
 static void disasm_line__write(struct disasm_line *dl, struct ui_browser *browser,
@@ -145,7 +146,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
 	char bf[256];
 	bool show_title = false;
 
-	for (i = 0; i < ab->nr_events; i++) {
+	for (i = 0; i < notes->nr_events; i++) {
 		if (al->samples[i].percent > percent_max)
 			percent_max = al->samples[i].percent;
 	}
@@ -159,7 +160,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
 	}
 
 	if (al->offset != -1 && percent_max != 0.0) {
-		for (i = 0; i < ab->nr_events; i++) {
+		for (i = 0; i < notes->nr_events; i++) {
 			ui_browser__set_percent_color(browser,
 						al->samples[i].percent,
 						current_entry);
@@ -1099,7 +1100,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
 	browser.addr_width = browser.target_width = browser.min_addr_width = hex_width(size);
 	browser.max_addr_width = hex_width(sym->end);
 	browser.jumps_width = width_jumps(browser.max_jump_sources);
-	browser.nr_events = nr_pcnt;
+	notes->nr_events = nr_pcnt;
 	browser.b.nr_entries = browser.nr_entries;
 	browser.b.entries = &notes->src->source,
 	browser.b.width += 18; /* Percentage */
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 21fa5d7cd9e0..5d15cdf22fc9 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -165,6 +165,7 @@ struct annotation {
 	pthread_mutex_t		lock;
 	u64			max_coverage;
 	struct annotation_line	**offsets;
+	int			nr_events;
 	bool			have_cycles;
 	struct annotated_source *src;
 };
-- 
cgit v1.2.3


From 16932d77050fb3d76bc265c21c53eeec14639d5e Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 15 Mar 2018 12:33:56 -0300
Subject: perf annotate: Stop using a global config struct

For the TUI, that is interactive, its interesting to have a
configuration that one can go on changing and then when moving from one
symbol annotation to another symbol, the options set while browsing the
first symbol to be kept.

But since we're trying to make this code reusable by a --stdio
formatter, we better have a pointer in struct annotation and in the TUI
case set it to the global, but use something else for other cases, such
as --stdio2.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-kv1ngr159jfu5h9ddgiuwcvg@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/annotate.c | 84 +++++++++++++++++++++++----------------
 tools/perf/util/annotate.h        |  1 +
 2 files changed, 51 insertions(+), 34 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 8a110765b328..b20e0ae72466 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -63,10 +63,12 @@ static inline struct browser_line *browser_line(struct annotation_line *al)
 	return ptr - sizeof(struct browser_line);
 }
 
-static bool disasm_line__filter(struct ui_browser *browser __maybe_unused,
-				void *entry)
+static bool disasm_line__filter(struct ui_browser *browser, void *entry)
 {
-	if (annotate_browser__opts.hide_src_code) {
+	struct map_symbol *ms = browser->priv;
+	struct annotation *notes = symbol__annotation(ms->sym);
+
+	if (notes->options->hide_src_code) {
 		struct annotation_line *al = list_entry(entry, struct annotation_line, node);
 
 		return al->offset == -1;
@@ -98,12 +100,15 @@ static int annotate_browser__pcnt_width(struct annotate_browser *ab)
 {
 	struct map_symbol *ms = ab->b.priv;
 	struct annotation *notes = symbol__annotation(ms->sym);
-	return (annotate_browser__opts.show_total_period ? 12 : 7) * notes->nr_events;
+	return (notes->options->show_total_period ? 12 : 7) * notes->nr_events;
 }
 
 static void disasm_line__write(struct disasm_line *dl, struct ui_browser *browser,
 			       char *bf, size_t size)
 {
+	struct map_symbol *ms = browser->priv;
+	struct annotation *notes = symbol__annotation(ms->sym);
+
 	if (dl->ins.ops && dl->ins.ops->scnprintf) {
 		if (ins__is_jump(&dl->ins)) {
 			bool fwd = dl->ops.target.offset > dl->al.offset;
@@ -124,7 +129,7 @@ static void disasm_line__write(struct disasm_line *dl, struct ui_browser *browse
 		ui_browser__write_nstring(browser, " ", 2);
 	}
 
-	disasm_line__scnprintf(dl, bf, size, !annotate_browser__opts.use_offset);
+	disasm_line__scnprintf(dl, bf, size, !notes->options->use_offset);
 }
 
 static void annotate_browser__write(struct ui_browser *browser, void *entry, int row)
@@ -136,7 +141,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
 	struct annotation_line *al = list_entry(entry, struct annotation_line, node);
 	struct browser_line *bl = browser_line(al);
 	bool current_entry = ui_browser__is_current_entry(browser, row);
-	bool change_color = (!annotate_browser__opts.hide_src_code &&
+	bool change_color = (!notes->options->hide_src_code &&
 			     (!current_entry || (browser->use_navkeypressed &&
 					         !browser->navkeypressed)));
 	int width = browser->width, printed;
@@ -164,10 +169,10 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
 			ui_browser__set_percent_color(browser,
 						al->samples[i].percent,
 						current_entry);
-			if (annotate_browser__opts.show_total_period) {
+			if (notes->options->show_total_period) {
 				ui_browser__printf(browser, "%11" PRIu64 " ",
 						   al->samples[i].he.period);
-			} else if (annotate_browser__opts.show_nr_samples) {
+			} else if (notes->options->show_nr_samples) {
 				ui_browser__printf(browser, "%6" PRIu64 " ",
 						   al->samples[i].he.nr_samples);
 			} else {
@@ -182,8 +187,8 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
 			ui_browser__write_nstring(browser, " ", pcnt_width);
 		else {
 			ui_browser__printf(browser, "%*s", pcnt_width,
-					   annotate_browser__opts.show_total_period ? "Period" :
-					   annotate_browser__opts.show_nr_samples ? "Samples" : "Percent");
+					   notes->options->show_total_period ? "Period" :
+					   notes->options->show_nr_samples ? "Samples" : "Percent");
 		}
 	}
 	if (notes->have_cycles) {
@@ -212,7 +217,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
 	if (!*al->line)
 		ui_browser__write_nstring(browser, " ", width - pcnt_width - cycles_width);
 	else if (al->offset == -1) {
-		if (al->line_nr && annotate_browser__opts.show_linenr)
+		if (al->line_nr && notes->options->show_linenr)
 			printed = scnprintf(bf, sizeof(bf), "%-*d ",
 					ab->addr_width + 1, al->line_nr);
 		else
@@ -224,14 +229,14 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
 		u64 addr = al->offset;
 		int color = -1;
 
-		if (!annotate_browser__opts.use_offset)
+		if (!notes->options->use_offset)
 			addr += ab->start;
 
-		if (!annotate_browser__opts.use_offset) {
+		if (!notes->options->use_offset) {
 			printed = scnprintf(bf, sizeof(bf), "%" PRIx64 ": ", addr);
 		} else {
 			if (bl->jump_sources) {
-				if (annotate_browser__opts.show_nr_jumps) {
+				if (notes->options->show_nr_jumps) {
 					int prev;
 					printed = scnprintf(bf, sizeof(bf), "%*d ",
 							    ab->jumps_width,
@@ -345,7 +350,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
 	bcursor = browser_line(&cursor->al);
 	btarget = browser_line(target);
 
-	if (annotate_browser__opts.hide_src_code) {
+	if (notes->options->hide_src_code) {
 		from = bcursor->idx_asm;
 		to = btarget->idx_asm;
 	} else {
@@ -371,10 +376,12 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
 static unsigned int annotate_browser__refresh(struct ui_browser *browser)
 {
 	struct annotate_browser *ab = container_of(browser, struct annotate_browser, b);
+	struct map_symbol *ms = browser->priv;
+	struct annotation *notes = symbol__annotation(ms->sym);
 	int ret = ui_browser__list_head_refresh(browser);
 	int pcnt_width = annotate_browser__pcnt_width(ab);
 
-	if (annotate_browser__opts.jump_arrows)
+	if (notes->options->jump_arrows)
 		annotate_browser__draw_current_jump(browser);
 
 	ui_browser__set_color(browser, HE_COLORSET_NORMAL);
@@ -439,6 +446,8 @@ static void annotate_browser__set_top(struct annotate_browser *browser,
 static void annotate_browser__set_rb_top(struct annotate_browser *browser,
 					 struct rb_node *nd)
 {
+	struct map_symbol *ms = browser->b.priv;
+	struct annotation *notes = symbol__annotation(ms->sym);
 	struct browser_line *bpos;
 	struct annotation_line *pos;
 	u32 idx;
@@ -447,7 +456,7 @@ static void annotate_browser__set_rb_top(struct annotate_browser *browser,
 	bpos = browser_line(pos);
 
 	idx = bpos->idx;
-	if (annotate_browser__opts.hide_src_code)
+	if (notes->options->hide_src_code)
 		idx = bpos->idx_asm;
 	annotate_browser__set_top(browser, pos, idx);
 	browser->curr_hot = nd;
@@ -496,6 +505,8 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser,
 
 static bool annotate_browser__toggle_source(struct annotate_browser *browser)
 {
+	struct map_symbol *ms = browser->b.priv;
+	struct annotation *notes = symbol__annotation(ms->sym);
 	struct annotation_line *al;
 	struct browser_line *bl;
 	off_t offset = browser->b.index - browser->b.top_idx;
@@ -504,12 +515,12 @@ static bool annotate_browser__toggle_source(struct annotate_browser *browser)
 	al = list_entry(browser->b.top, struct annotation_line, node);
 	bl = browser_line(al);
 
-	if (annotate_browser__opts.hide_src_code) {
+	if (notes->options->hide_src_code) {
 		if (bl->idx_asm < offset)
 			offset = bl->idx;
 
 		browser->b.nr_entries = browser->nr_entries;
-		annotate_browser__opts.hide_src_code = false;
+		notes->options->hide_src_code = false;
 		browser->b.seek(&browser->b, -offset, SEEK_CUR);
 		browser->b.top_idx = bl->idx - offset;
 		browser->b.index = bl->idx;
@@ -524,7 +535,7 @@ static bool annotate_browser__toggle_source(struct annotate_browser *browser)
 			offset = bl->idx_asm;
 
 		browser->b.nr_entries = browser->nr_asm_entries;
-		annotate_browser__opts.hide_src_code = true;
+		notes->options->hide_src_code = true;
 		browser->b.seek(&browser->b, -offset, SEEK_CUR);
 		browser->b.top_idx = bl->idx_asm - offset;
 		browser->b.index = bl->idx_asm;
@@ -750,14 +761,17 @@ bool annotate_browser__continue_search_reverse(struct annotate_browser *browser,
 
 static void annotate_browser__update_addr_width(struct annotate_browser *browser)
 {
-	if (annotate_browser__opts.use_offset)
+	struct map_symbol *ms = browser->b.priv;
+	struct annotation *notes = symbol__annotation(ms->sym);
+
+	if (notes->options->use_offset)
 		browser->target_width = browser->min_addr_width;
 	else
 		browser->target_width = browser->max_addr_width;
 
 	browser->addr_width = browser->target_width;
 
-	if (annotate_browser__opts.show_nr_jumps)
+	if (notes->options->show_nr_jumps)
 		browser->addr_width += browser->jumps_width + 1;
 }
 
@@ -768,6 +782,7 @@ static int annotate_browser__run(struct annotate_browser *browser,
 	struct rb_node *nd = NULL;
 	struct map_symbol *ms = browser->b.priv;
 	struct symbol *sym = ms->sym;
+	struct annotation *notes = symbol__annotation(ms->sym);
 	const char *help = "Press 'h' for help on key bindings";
 	int delay_secs = hbt ? hbt->refresh : 0;
 	int key;
@@ -851,8 +866,7 @@ static int annotate_browser__run(struct annotate_browser *browser,
 				continue;
 			}
 		case 'k':
-			annotate_browser__opts.show_linenr =
-				!annotate_browser__opts.show_linenr;
+			notes->options->show_linenr = !notes->options->show_linenr;
 			break;
 		case 'H':
 			nd = browser->curr_hot;
@@ -862,14 +876,14 @@ static int annotate_browser__run(struct annotate_browser *browser,
 				ui_helpline__puts(help);
 			continue;
 		case 'o':
-			annotate_browser__opts.use_offset = !annotate_browser__opts.use_offset;
+			notes->options->use_offset = !notes->options->use_offset;
 			annotate_browser__update_addr_width(browser);
 			continue;
 		case 'j':
-			annotate_browser__opts.jump_arrows = !annotate_browser__opts.jump_arrows;
+			notes->options->jump_arrows = !notes->options->jump_arrows;
 			continue;
 		case 'J':
-			annotate_browser__opts.show_nr_jumps = !annotate_browser__opts.show_nr_jumps;
+			notes->options->show_nr_jumps = !notes->options->show_nr_jumps;
 			annotate_browser__update_addr_width(browser);
 			continue;
 		case '/':
@@ -920,13 +934,13 @@ show_sup_ins:
 			continue;
 		}
 		case 't':
-			if (annotate_browser__opts.show_total_period) {
-				annotate_browser__opts.show_total_period = false;
-				annotate_browser__opts.show_nr_samples = true;
-			} else if (annotate_browser__opts.show_nr_samples)
-				annotate_browser__opts.show_nr_samples = false;
+			if (notes->options->show_total_period) {
+				notes->options->show_total_period = false;
+				notes->options->show_nr_samples = true;
+			} else if (notes->options->show_nr_samples)
+				notes->options->show_nr_samples = false;
 			else
-				annotate_browser__opts.show_total_period = true;
+				notes->options->show_total_period = true;
 			annotate_browser__update_addr_width(browser);
 			continue;
 		case K_LEFT:
@@ -1048,6 +1062,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
 	if (map->dso->annotate_warned)
 		return -1;
 
+	notes->options = &annotate_browser__opts;
+
 	notes->offsets = zalloc(size * sizeof(struct annotation_line *));
 	if (notes->offsets == NULL) {
 		ui__error("Not enough memory!");
@@ -1105,7 +1121,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
 	browser.b.entries = &notes->src->source,
 	browser.b.width += 18; /* Percentage */
 
-	if (annotate_browser__opts.hide_src_code)
+	if (notes->options->hide_src_code)
 		annotate_browser__init_asm_mode(&browser);
 
 	annotate_browser__update_addr_width(&browser);
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 5d15cdf22fc9..5936605b5dac 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -164,6 +164,7 @@ struct annotated_source {
 struct annotation {
 	pthread_mutex_t		lock;
 	u64			max_coverage;
+	struct annotation_options *options;
 	struct annotation_line	**offsets;
 	int			nr_events;
 	bool			have_cycles;
-- 
cgit v1.2.3


From 6af612d2b10593cdd8bba4427741a981bdd57c86 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 15 Mar 2018 12:41:39 -0300
Subject: perf annotate: Move pcnt_with() to the annotation library

Out of the TUI code, since now all it touches is what is in 'struct
annotation'.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-kh5bbbgd7l4agv9oc5hnw0ui@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/annotate.c | 14 +++-----------
 tools/perf/util/annotate.h        |  5 +++++
 2 files changed, 8 insertions(+), 11 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index b20e0ae72466..3b030ee4505f 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -96,13 +96,6 @@ static int annotate_browser__set_jumps_percent_color(struct annotate_browser *br
 	 return ui_browser__set_color(&browser->b, color);
 }
 
-static int annotate_browser__pcnt_width(struct annotate_browser *ab)
-{
-	struct map_symbol *ms = ab->b.priv;
-	struct annotation *notes = symbol__annotation(ms->sym);
-	return (notes->options->show_total_period ? 12 : 7) * notes->nr_events;
-}
-
 static void disasm_line__write(struct disasm_line *dl, struct ui_browser *browser,
 			       char *bf, size_t size)
 {
@@ -145,7 +138,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
 			     (!current_entry || (browser->use_navkeypressed &&
 					         !browser->navkeypressed)));
 	int width = browser->width, printed;
-	int i, pcnt_width = annotate_browser__pcnt_width(ab),
+	int i, pcnt_width = annotation__pcnt_width(notes),
 	       cycles_width = annotation__cycles_width(notes);
 	double percent_max = 0.0;
 	char bf[256];
@@ -310,7 +303,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
 	struct map_symbol *ms = ab->b.priv;
 	struct symbol *sym = ms->sym;
 	struct annotation *notes = symbol__annotation(sym);
-	u8 pcnt_width = annotate_browser__pcnt_width(ab);
+	u8 pcnt_width = annotation__pcnt_width(notes);
 	int width;
 
 	/* PLT symbols contain external offsets */
@@ -375,11 +368,10 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
 
 static unsigned int annotate_browser__refresh(struct ui_browser *browser)
 {
-	struct annotate_browser *ab = container_of(browser, struct annotate_browser, b);
 	struct map_symbol *ms = browser->priv;
 	struct annotation *notes = symbol__annotation(ms->sym);
 	int ret = ui_browser__list_head_refresh(browser);
-	int pcnt_width = annotate_browser__pcnt_width(ab);
+	int pcnt_width = annotation__pcnt_width(notes);
 
 	if (notes->options->jump_arrows)
 		annotate_browser__draw_current_jump(browser);
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 5936605b5dac..17cd5d274fe5 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -176,6 +176,11 @@ static inline int annotation__cycles_width(struct annotation *notes)
 	return notes->have_cycles ? ANNOTATION__IPC_WIDTH + ANNOTATION__CYCLES_WIDTH : 0;
 }
 
+static inline int annotation__pcnt_width(struct annotation *notes)
+{
+	return (notes->options->show_total_period ? 12 : 7) * notes->nr_events;
+}
+
 void annotation__compute_ipc(struct annotation *notes, size_t size);
 
 static inline struct sym_hist *annotation__histogram(struct annotation *notes, int idx)
-- 
cgit v1.2.3


From 95aa89d92de8b8da6dc1469bfc560784ab177aba Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 15 Mar 2018 15:04:33 -0300
Subject: perf annotate tui: Add browser__annotation() helper

To reduce the boilerplate to get to the symbol being annotated from the
struct browser ->priv area.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-ficdyqhe9esjseflvkriskwn@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/annotate.c | 40 ++++++++++++++++-----------------------
 1 file changed, 16 insertions(+), 24 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 3b030ee4505f..c995d28d1a58 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -55,6 +55,12 @@ struct annotate_browser {
 	char			    search_bf[128];
 };
 
+static inline struct annotation *browser__annotation(struct ui_browser *browser)
+{
+	struct map_symbol *ms = browser->priv;
+	return symbol__annotation(ms->sym);
+}
+
 static inline struct browser_line *browser_line(struct annotation_line *al)
 {
 	void *ptr = al;
@@ -65,8 +71,7 @@ static inline struct browser_line *browser_line(struct annotation_line *al)
 
 static bool disasm_line__filter(struct ui_browser *browser, void *entry)
 {
-	struct map_symbol *ms = browser->priv;
-	struct annotation *notes = symbol__annotation(ms->sym);
+	struct annotation *notes = browser__annotation(browser);
 
 	if (notes->options->hide_src_code) {
 		struct annotation_line *al = list_entry(entry, struct annotation_line, node);
@@ -99,8 +104,7 @@ static int annotate_browser__set_jumps_percent_color(struct annotate_browser *br
 static void disasm_line__write(struct disasm_line *dl, struct ui_browser *browser,
 			       char *bf, size_t size)
 {
-	struct map_symbol *ms = browser->priv;
-	struct annotation *notes = symbol__annotation(ms->sym);
+	struct annotation *notes = browser__annotation(browser);
 
 	if (dl->ins.ops && dl->ins.ops->scnprintf) {
 		if (ins__is_jump(&dl->ins)) {
@@ -128,9 +132,7 @@ static void disasm_line__write(struct disasm_line *dl, struct ui_browser *browse
 static void annotate_browser__write(struct ui_browser *browser, void *entry, int row)
 {
 	struct annotate_browser *ab = container_of(browser, struct annotate_browser, b);
-	struct map_symbol *ms = browser->priv;
-	struct symbol *sym = ms->sym;
-	struct annotation *notes = symbol__annotation(sym);
+	struct annotation *notes = browser__annotation(browser);
 	struct annotation_line *al = list_entry(entry, struct annotation_line, node);
 	struct browser_line *bl = browser_line(al);
 	bool current_entry = ui_browser__is_current_entry(browser, row);
@@ -368,8 +370,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
 
 static unsigned int annotate_browser__refresh(struct ui_browser *browser)
 {
-	struct map_symbol *ms = browser->priv;
-	struct annotation *notes = symbol__annotation(ms->sym);
+	struct annotation *notes = browser__annotation(browser);
 	int ret = ui_browser__list_head_refresh(browser);
 	int pcnt_width = annotation__pcnt_width(notes);
 
@@ -438,8 +439,7 @@ static void annotate_browser__set_top(struct annotate_browser *browser,
 static void annotate_browser__set_rb_top(struct annotate_browser *browser,
 					 struct rb_node *nd)
 {
-	struct map_symbol *ms = browser->b.priv;
-	struct annotation *notes = symbol__annotation(ms->sym);
+	struct annotation *notes = browser__annotation(&browser->b);
 	struct browser_line *bpos;
 	struct annotation_line *pos;
 	u32 idx;
@@ -497,8 +497,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser,
 
 static bool annotate_browser__toggle_source(struct annotate_browser *browser)
 {
-	struct map_symbol *ms = browser->b.priv;
-	struct annotation *notes = symbol__annotation(ms->sym);
+	struct annotation *notes = browser__annotation(&browser->b);
 	struct annotation_line *al;
 	struct browser_line *bl;
 	off_t offset = browser->b.index - browser->b.top_idx;
@@ -588,9 +587,7 @@ static
 struct disasm_line *annotate_browser__find_offset(struct annotate_browser *browser,
 					  s64 offset, s64 *idx)
 {
-	struct map_symbol *ms = browser->b.priv;
-	struct symbol *sym = ms->sym;
-	struct annotation *notes = symbol__annotation(sym);
+	struct annotation *notes = browser__annotation(&browser->b);
 	struct disasm_line *pos;
 
 	*idx = 0;
@@ -629,9 +626,7 @@ static
 struct annotation_line *annotate_browser__find_string(struct annotate_browser *browser,
 					  char *s, s64 *idx)
 {
-	struct map_symbol *ms = browser->b.priv;
-	struct symbol *sym = ms->sym;
-	struct annotation *notes = symbol__annotation(sym);
+	struct annotation *notes = browser__annotation(&browser->b);
 	struct annotation_line *al = browser->selection;
 
 	*idx = browser->b.index;
@@ -668,9 +663,7 @@ static
 struct annotation_line *annotate_browser__find_string_reverse(struct annotate_browser *browser,
 						  char *s, s64 *idx)
 {
-	struct map_symbol *ms = browser->b.priv;
-	struct symbol *sym = ms->sym;
-	struct annotation *notes = symbol__annotation(sym);
+	struct annotation *notes = browser__annotation(&browser->b);
 	struct annotation_line *al = browser->selection;
 
 	*idx = browser->b.index;
@@ -753,8 +746,7 @@ bool annotate_browser__continue_search_reverse(struct annotate_browser *browser,
 
 static void annotate_browser__update_addr_width(struct annotate_browser *browser)
 {
-	struct map_symbol *ms = browser->b.priv;
-	struct annotation *notes = symbol__annotation(ms->sym);
+	struct annotation *notes = browser__annotation(&browser->b);
 
 	if (notes->options->use_offset)
 		browser->target_width = browser->min_addr_width;
-- 
cgit v1.2.3


From bc1c0f3dfa77619ad90f6fed290636cf54629d30 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 15 Mar 2018 15:10:28 -0300
Subject: perf annotate: Move max_jump_sources to struct annotation

This is not useful only for the TUI, we'll want to somehow mark the
--stdio2 lines with the most jump sources too.

And moving this will allow us to change some function signatures from
annotate_browser to ui_browser, reducing boilerplate.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-vyggbbqd05k3k4mvv7z9l5px@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/annotate.c | 11 ++++++-----
 tools/perf/util/annotate.h        |  1 +
 2 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index c995d28d1a58..3bc003fe0b1d 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -44,7 +44,6 @@ struct annotate_browser {
 	u64			    start;
 	int			    nr_asm_entries;
 	int			    nr_entries;
-	int			    max_jump_sources;
 	int			    nr_jumps;
 	bool			    searching_backwards;
 	u8			    addr_width;
@@ -85,9 +84,11 @@ static bool disasm_line__filter(struct ui_browser *browser, void *entry)
 static int annotate_browser__jumps_percent_color(struct annotate_browser *browser,
 						 int nr, bool current)
 {
+	struct annotation *notes = browser__annotation(&browser->b);
+
 	if (current && (!browser->b.use_navkeypressed || browser->b.navkeypressed))
 		return HE_COLORSET_SELECTED;
-	if (nr == browser->max_jump_sources)
+	if (nr == notes->max_jump_sources)
 		return HE_COLORSET_TOP;
 	if (nr > 1)
 		return HE_COLORSET_MEDIUM;
@@ -998,8 +999,8 @@ static void annotate_browser__mark_jump_targets(struct annotate_browser *browser
 			continue;
 
 		blt = browser_line(al);
-		if (++blt->jump_sources > browser->max_jump_sources)
-			browser->max_jump_sources = blt->jump_sources;
+		if (++blt->jump_sources > notes->max_jump_sources)
+			notes->max_jump_sources = blt->jump_sources;
 
 		++browser->nr_jumps;
 	}
@@ -1099,7 +1100,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
 
 	browser.addr_width = browser.target_width = browser.min_addr_width = hex_width(size);
 	browser.max_addr_width = hex_width(sym->end);
-	browser.jumps_width = width_jumps(browser.max_jump_sources);
+	browser.jumps_width = width_jumps(notes->max_jump_sources);
 	notes->nr_events = nr_pcnt;
 	browser.b.nr_entries = browser.nr_entries;
 	browser.b.entries = &notes->src->source,
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 17cd5d274fe5..b8aca936ed55 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -167,6 +167,7 @@ struct annotation {
 	struct annotation_options *options;
 	struct annotation_line	**offsets;
 	int			nr_events;
+	int			max_jump_sources;
 	bool			have_cycles;
 	struct annotated_source *src;
 };
-- 
cgit v1.2.3


From 27feb761c7211c6c35350277b6c65989b982b377 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 15 Mar 2018 15:14:45 -0300
Subject: perf annotate: Move jumps_percent_color to ui_browser

Since all it needs is in ui_browser and annotation structs members.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-9f8c2f9aetbibcw33d615y9o@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/annotate.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 3bc003fe0b1d..6e2eea09a9b0 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -81,12 +81,11 @@ static bool disasm_line__filter(struct ui_browser *browser, void *entry)
 	return false;
 }
 
-static int annotate_browser__jumps_percent_color(struct annotate_browser *browser,
-						 int nr, bool current)
+static int ui_browser__jumps_percent_color(struct ui_browser *browser, int nr, bool current)
 {
-	struct annotation *notes = browser__annotation(&browser->b);
+	struct annotation *notes = browser__annotation(browser);
 
-	if (current && (!browser->b.use_navkeypressed || browser->b.navkeypressed))
+	if (current && (!browser->use_navkeypressed || browser->navkeypressed))
 		return HE_COLORSET_SELECTED;
 	if (nr == notes->max_jump_sources)
 		return HE_COLORSET_TOP;
@@ -95,11 +94,10 @@ static int annotate_browser__jumps_percent_color(struct annotate_browser *browse
 	return HE_COLORSET_NORMAL;
 }
 
-static int annotate_browser__set_jumps_percent_color(struct annotate_browser *browser,
-						     int nr, bool current)
+static int ui_browser__set_jumps_percent_color(struct ui_browser *browser, int nr, bool current)
 {
-	 int color = annotate_browser__jumps_percent_color(browser, nr, current);
-	 return ui_browser__set_color(&browser->b, color);
+	 int color = ui_browser__jumps_percent_color(browser, nr, current);
+	 return ui_browser__set_color(browser, color);
 }
 
 static void disasm_line__write(struct disasm_line *dl, struct ui_browser *browser,
@@ -237,8 +235,8 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
 					printed = scnprintf(bf, sizeof(bf), "%*d ",
 							    ab->jumps_width,
 							    bl->jump_sources);
-					prev = annotate_browser__set_jumps_percent_color(ab, bl->jump_sources,
-											 current_entry);
+					prev = ui_browser__set_jumps_percent_color(browser, bl->jump_sources,
+										   current_entry);
 					ui_browser__write_nstring(browser, bf, printed);
 					ui_browser__set_color(browser, prev);
 				}
-- 
cgit v1.2.3


From 6dcd57e8ae2080bca776d55e6e2b3d529677cae5 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 15 Mar 2018 15:20:12 -0300
Subject: perf annotate: Move nr_jumps to struct annotation

This is another information that will be useful for the --stdio2 mode,
to provide symbol statistics, so move it from the TUI and change the
mark_jump_targets() method to struct annotation.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-kpgle1qxe7thajvrqleuvi80@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/annotate.c | 13 ++++---------
 tools/perf/util/annotate.h        |  1 +
 2 files changed, 5 insertions(+), 9 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 6e2eea09a9b0..d05a2f991207 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -44,7 +44,6 @@ struct annotate_browser {
 	u64			    start;
 	int			    nr_asm_entries;
 	int			    nr_entries;
-	int			    nr_jumps;
 	bool			    searching_backwards;
 	u8			    addr_width;
 	u8			    jumps_width;
@@ -965,13 +964,9 @@ int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel,
 	return map_symbol__tui_annotate(&he->ms, evsel, hbt);
 }
 
-static void annotate_browser__mark_jump_targets(struct annotate_browser *browser,
-						size_t size)
+static void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym)
 {
-	u64 offset;
-	struct map_symbol *ms = browser->b.priv;
-	struct symbol *sym = ms->sym;
-	struct annotation *notes = symbol__annotation(sym);
+	u64 offset, size = symbol__size(sym);
 
 	/* PLT symbols contain external offsets */
 	if (strstr(sym->name, "@plt"))
@@ -1000,7 +995,7 @@ static void annotate_browser__mark_jump_targets(struct annotate_browser *browser
 		if (++blt->jump_sources > notes->max_jump_sources)
 			notes->max_jump_sources = blt->jump_sources;
 
-		++browser->nr_jumps;
+		++notes->nr_jumps;
 	}
 }
 
@@ -1093,7 +1088,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
 			bpos->idx_asm = -1;
 	}
 
-	annotate_browser__mark_jump_targets(&browser, size);
+	annotation__mark_jump_targets(notes, sym);
 	annotation__compute_ipc(notes, size);
 
 	browser.addr_width = browser.target_width = browser.min_addr_width = hex_width(size);
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index b8aca936ed55..897a84712ab4 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -167,6 +167,7 @@ struct annotation {
 	struct annotation_options *options;
 	struct annotation_line	**offsets;
 	int			nr_events;
+	int			nr_jumps;
 	int			max_jump_sources;
 	bool			have_cycles;
 	struct annotated_source *src;
-- 
cgit v1.2.3


From 0db45bcfac8586c6f5b732f114f456f2f788b19f Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 15 Mar 2018 15:31:56 -0300
Subject: perf annotate: Move mark_jump_targets from the TUI to the annotation
 library

This also is not TUI specific, should be used in the upcoming --stdio2
mode.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-v827xec8z3hxrmgp7bwa6ohs@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/annotate.c | 54 +++------------------------------------
 tools/perf/util/annotate.c        | 44 +++++++++++++++++++++++++++++++
 tools/perf/util/annotate.h        |  4 +++
 3 files changed, 51 insertions(+), 51 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index d05a2f991207..58be0cecb081 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -25,7 +25,6 @@ struct disasm_line_samples {
 struct browser_line {
 	u32	idx;
 	int	idx_asm;
-	int	jump_sources;
 };
 
 static struct annotation_options annotate_browser__opts = {
@@ -132,7 +131,6 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
 	struct annotate_browser *ab = container_of(browser, struct annotate_browser, b);
 	struct annotation *notes = browser__annotation(browser);
 	struct annotation_line *al = list_entry(entry, struct annotation_line, node);
-	struct browser_line *bl = browser_line(al);
 	bool current_entry = ui_browser__is_current_entry(browser, row);
 	bool change_color = (!notes->options->hide_src_code &&
 			     (!current_entry || (browser->use_navkeypressed &&
@@ -228,13 +226,13 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
 		if (!notes->options->use_offset) {
 			printed = scnprintf(bf, sizeof(bf), "%" PRIx64 ": ", addr);
 		} else {
-			if (bl->jump_sources) {
+			if (al->jump_sources) {
 				if (notes->options->show_nr_jumps) {
 					int prev;
 					printed = scnprintf(bf, sizeof(bf), "%*d ",
 							    ab->jumps_width,
-							    bl->jump_sources);
-					prev = ui_browser__set_jumps_percent_color(browser, bl->jump_sources,
+							    al->jump_sources);
+					prev = ui_browser__set_jumps_percent_color(browser, al->jump_sources,
 										   current_entry);
 					ui_browser__write_nstring(browser, bf, printed);
 					ui_browser__set_color(browser, prev);
@@ -263,17 +261,6 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
 		ab->selection = al;
 }
 
-static bool disasm_line__is_valid_jump(struct disasm_line *dl, struct symbol *sym)
-{
-	if (!dl || !dl->ins.ops || !ins__is_jump(&dl->ins)
-	    || !disasm_line__has_offset(dl)
-	    || dl->ops.target.offset < 0
-	    || dl->ops.target.offset >= (s64)symbol__size(sym))
-		return false;
-
-	return true;
-}
-
 static bool is_fused(struct annotate_browser *ab, struct disasm_line *cursor)
 {
 	struct disasm_line *pos = list_prev_entry(cursor, al.node);
@@ -964,41 +951,6 @@ int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel,
 	return map_symbol__tui_annotate(&he->ms, evsel, hbt);
 }
 
-static void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym)
-{
-	u64 offset, size = symbol__size(sym);
-
-	/* PLT symbols contain external offsets */
-	if (strstr(sym->name, "@plt"))
-		return;
-
-	for (offset = 0; offset < size; ++offset) {
-		struct annotation_line *al = notes->offsets[offset];
-		struct disasm_line *dl;
-		struct browser_line *blt;
-
-		dl = disasm_line(al);
-
-		if (!disasm_line__is_valid_jump(dl, sym))
-			continue;
-
-		al = notes->offsets[dl->ops.target.offset];
-
-		/*
- 		 * FIXME: Oops, no jump target? Buggy disassembler? Or do we
- 		 * have to adjust to the previous offset?
- 		 */
-		if (al == NULL)
-			continue;
-
-		blt = browser_line(al);
-		if (++blt->jump_sources > notes->max_jump_sources)
-			notes->max_jump_sources = blt->jump_sources;
-
-		++notes->nr_jumps;
-	}
-}
-
 static inline int width_jumps(int n)
 {
 	if (n >= 100)
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index d737c33c87d0..330275680a1a 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -2012,6 +2012,50 @@ size_t disasm__fprintf(struct list_head *head, FILE *fp)
 	return printed;
 }
 
+
+bool disasm_line__is_valid_jump(struct disasm_line *dl, struct symbol *sym)
+{
+	if (!dl || !dl->ins.ops || !ins__is_jump(&dl->ins) ||
+	    !disasm_line__has_offset(dl) || dl->ops.target.offset < 0 ||
+	    dl->ops.target.offset >= (s64)symbol__size(sym))
+		return false;
+
+	return true;
+}
+
+void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym)
+{
+	u64 offset, size = symbol__size(sym);
+
+	/* PLT symbols contain external offsets */
+	if (strstr(sym->name, "@plt"))
+		return;
+
+	for (offset = 0; offset < size; ++offset) {
+		struct annotation_line *al = notes->offsets[offset];
+		struct disasm_line *dl;
+
+		dl = disasm_line(al);
+
+		if (!disasm_line__is_valid_jump(dl, sym))
+			continue;
+
+		al = notes->offsets[dl->ops.target.offset];
+
+		/*
+		 * FIXME: Oops, no jump target? Buggy disassembler? Or do we
+		 * have to adjust to the previous offset?
+		 */
+		if (al == NULL)
+			continue;
+
+		if (++al->jump_sources > notes->max_jump_sources)
+			notes->max_jump_sources = al->jump_sources;
+
+		++notes->nr_jumps;
+	}
+}
+
 static void annotation__calc_lines(struct annotation *notes, struct map *map,
 				  struct rb_root *root, u64 start)
 {
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 897a84712ab4..ab4a8b7710a0 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -90,6 +90,7 @@ struct annotation_line {
 	s64			 offset;
 	char			*line;
 	int			 line_nr;
+	int			 jump_sources;
 	float			 ipc;
 	u64			 cycles;
 	size_t			 privsize;
@@ -116,6 +117,8 @@ static inline bool disasm_line__has_offset(const struct disasm_line *dl)
 	return dl->ops.target.offset_avail;
 }
 
+bool disasm_line__is_valid_jump(struct disasm_line *dl, struct symbol *sym);
+
 void disasm_line__free(struct disasm_line *dl);
 struct annotation_line *
 annotation_line__next(struct annotation_line *pos, struct list_head *head);
@@ -184,6 +187,7 @@ static inline int annotation__pcnt_width(struct annotation *notes)
 }
 
 void annotation__compute_ipc(struct annotation *notes, size_t size);
+void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym);
 
 static inline struct sym_hist *annotation__histogram(struct annotation *notes, int idx)
 {
-- 
cgit v1.2.3


From 4850c92e40835ea9ded3cd2051d4c95b2b69e426 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 15 Mar 2018 15:43:18 -0300
Subject: perf annotate: Nuke struct browser_line

The information in there are all related to things already moved to
struct annotation, so move those members to struct annotation_line.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-uc2b9c8iocvuuvbl7hyind84@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/annotate.c | 66 ++++++++++++---------------------------
 tools/perf/util/annotate.h        |  2 ++
 2 files changed, 22 insertions(+), 46 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 58be0cecb081..50f8e671644f 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -22,11 +22,6 @@ struct disasm_line_samples {
 	struct sym_hist_entry he;
 };
 
-struct browser_line {
-	u32	idx;
-	int	idx_asm;
-};
-
 static struct annotation_options annotate_browser__opts = {
 	.use_offset	= true,
 	.jump_arrows	= true,
@@ -58,14 +53,6 @@ static inline struct annotation *browser__annotation(struct ui_browser *browser)
 	return symbol__annotation(ms->sym);
 }
 
-static inline struct browser_line *browser_line(struct annotation_line *al)
-{
-	void *ptr = al;
-
-	ptr = container_of(al, struct disasm_line, al);
-	return ptr - sizeof(struct browser_line);
-}
-
 static bool disasm_line__filter(struct ui_browser *browser, void *entry)
 {
 	struct annotation *notes = browser__annotation(browser);
@@ -285,7 +272,6 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
 	struct annotate_browser *ab = container_of(browser, struct annotate_browser, b);
 	struct disasm_line *cursor = disasm_line(ab->selection);
 	struct annotation_line *target;
-	struct browser_line *btarget, *bcursor;
 	unsigned int from, to;
 	struct map_symbol *ms = ab->b.priv;
 	struct symbol *sym = ms->sym;
@@ -327,15 +313,12 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
 		return;
 	}
 
-	bcursor = browser_line(&cursor->al);
-	btarget = browser_line(target);
-
 	if (notes->options->hide_src_code) {
-		from = bcursor->idx_asm;
-		to = btarget->idx_asm;
+		from = cursor->al.idx_asm;
+		to = target->idx_asm;
 	} else {
-		from = (u64)bcursor->idx;
-		to = (u64)btarget->idx;
+		from = (u64)cursor->al.idx;
+		to = (u64)target->idx;
 	}
 
 	width = annotation__cycles_width(notes);
@@ -425,16 +408,11 @@ static void annotate_browser__set_rb_top(struct annotate_browser *browser,
 					 struct rb_node *nd)
 {
 	struct annotation *notes = browser__annotation(&browser->b);
-	struct browser_line *bpos;
-	struct annotation_line *pos;
-	u32 idx;
-
-	pos = rb_entry(nd, struct annotation_line, rb_node);
-	bpos = browser_line(pos);
+	struct annotation_line * pos = rb_entry(nd, struct annotation_line, rb_node);
+	u32 idx = pos->idx;
 
-	idx = bpos->idx;
 	if (notes->options->hide_src_code)
-		idx = bpos->idx_asm;
+		idx = pos->idx_asm;
 	annotate_browser__set_top(browser, pos, idx);
 	browser->curr_hot = nd;
 }
@@ -484,37 +462,35 @@ static bool annotate_browser__toggle_source(struct annotate_browser *browser)
 {
 	struct annotation *notes = browser__annotation(&browser->b);
 	struct annotation_line *al;
-	struct browser_line *bl;
 	off_t offset = browser->b.index - browser->b.top_idx;
 
 	browser->b.seek(&browser->b, offset, SEEK_CUR);
 	al = list_entry(browser->b.top, struct annotation_line, node);
-	bl = browser_line(al);
 
 	if (notes->options->hide_src_code) {
-		if (bl->idx_asm < offset)
-			offset = bl->idx;
+		if (al->idx_asm < offset)
+			offset = al->idx;
 
 		browser->b.nr_entries = browser->nr_entries;
 		notes->options->hide_src_code = false;
 		browser->b.seek(&browser->b, -offset, SEEK_CUR);
-		browser->b.top_idx = bl->idx - offset;
-		browser->b.index = bl->idx;
+		browser->b.top_idx = al->idx - offset;
+		browser->b.index = al->idx;
 	} else {
-		if (bl->idx_asm < 0) {
+		if (al->idx_asm < 0) {
 			ui_helpline__puts("Only available for assembly lines.");
 			browser->b.seek(&browser->b, -offset, SEEK_CUR);
 			return false;
 		}
 
-		if (bl->idx_asm < offset)
-			offset = bl->idx_asm;
+		if (al->idx_asm < offset)
+			offset = al->idx_asm;
 
 		browser->b.nr_entries = browser->nr_asm_entries;
 		notes->options->hide_src_code = true;
 		browser->b.seek(&browser->b, -offset, SEEK_CUR);
-		browser->b.top_idx = bl->idx_asm - offset;
-		browser->b.index = bl->idx_asm;
+		browser->b.top_idx = al->idx_asm - offset;
+		browser->b.index = al->idx_asm;
 	}
 
 	return true;
@@ -1003,7 +979,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
 	if (perf_evsel__is_group_event(evsel))
 		nr_pcnt = evsel->nr_members;
 
-	err = symbol__annotate(sym, map, evsel, sizeof(struct browser_line), &browser.arch);
+	err = symbol__annotate(sym, map, evsel, 0, &browser.arch);
 	if (err) {
 		char msg[BUFSIZ];
 		symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg));
@@ -1018,15 +994,13 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
 	browser.start = map__rip_2objdump(map, sym->start);
 
 	list_for_each_entry(al, &notes->src->source, node) {
-		struct browser_line *bpos;
 		size_t line_len = strlen(al->line);
 
 		if (browser.b.width < line_len)
 			browser.b.width = line_len;
-		bpos = browser_line(al);
-		bpos->idx = browser.nr_entries++;
+		al->idx = browser.nr_entries++;
 		if (al->offset != -1) {
-			bpos->idx_asm = browser.nr_asm_entries++;
+			al->idx_asm = browser.nr_asm_entries++;
 			/*
 			 * FIXME: short term bandaid to cope with assembly
 			 * routines that comes with labels in the same column
@@ -1037,7 +1011,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
 			if (al->offset < (s64)size)
 				notes->offsets[al->offset] = al;
 		} else
-			bpos->idx_asm = -1;
+			al->idx_asm = -1;
 	}
 
 	annotation__mark_jump_targets(notes, sym);
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index ab4a8b7710a0..2018a47790c7 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -95,6 +95,8 @@ struct annotation_line {
 	u64			 cycles;
 	size_t			 privsize;
 	char			*path;
+	u32			 idx;
+	int			 idx_asm;
 	int			 samples_nr;
 	struct annotation_data	 samples[0];
 };
-- 
cgit v1.2.3


From 0ca693b315aa6893c9d3552fa6a5d536c38b6c4a Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 15 Mar 2018 15:46:22 -0300
Subject: perf annotate: Move 'start' to struct annotation

Another field that is not TUI specific.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-jj3dwswndft5mln8hu9k0idv@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/annotate.c | 5 ++---
 tools/perf/util/annotate.h        | 1 +
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 50f8e671644f..7ec441f93f7e 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -35,7 +35,6 @@ struct annotate_browser {
 	struct rb_node		   *curr_hot;
 	struct annotation_line	   *selection;
 	struct arch		   *arch;
-	u64			    start;
 	int			    nr_asm_entries;
 	int			    nr_entries;
 	bool			    searching_backwards;
@@ -208,7 +207,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
 		int color = -1;
 
 		if (!notes->options->use_offset)
-			addr += ab->start;
+			addr += notes->start;
 
 		if (!notes->options->use_offset) {
 			printed = scnprintf(bf, sizeof(bf), "%" PRIx64 ": ", addr);
@@ -991,7 +990,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
 
 	ui_helpline__push("Press ESC to exit");
 
-	browser.start = map__rip_2objdump(map, sym->start);
+	notes->start = map__rip_2objdump(map, sym->start);
 
 	list_for_each_entry(al, &notes->src->source, node) {
 		size_t line_len = strlen(al->line);
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 2018a47790c7..0424c127b004 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -169,6 +169,7 @@ struct annotated_source {
 struct annotation {
 	pthread_mutex_t		lock;
 	u64			max_coverage;
+	u64			start;
 	struct annotation_options *options;
 	struct annotation_line	**offsets;
 	int			nr_events;
-- 
cgit v1.2.3


From 1cf5f98a5edbb4b628349bea37da0aa259ded07c Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 15 Mar 2018 15:52:33 -0300
Subject: perf annotate: Move nr_{asm_}entries to struct annotation

More non-TUI stuff.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-yd4g6q0rngq4i49hz6iymtta@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/annotate.c | 23 +++++++++++------------
 tools/perf/util/annotate.h        |  2 ++
 2 files changed, 13 insertions(+), 12 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 7ec441f93f7e..00b88349a3c2 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -35,8 +35,6 @@ struct annotate_browser {
 	struct rb_node		   *curr_hot;
 	struct annotation_line	   *selection;
 	struct arch		   *arch;
-	int			    nr_asm_entries;
-	int			    nr_entries;
 	bool			    searching_backwards;
 	u8			    addr_width;
 	u8			    jumps_width;
@@ -470,7 +468,7 @@ static bool annotate_browser__toggle_source(struct annotate_browser *browser)
 		if (al->idx_asm < offset)
 			offset = al->idx;
 
-		browser->b.nr_entries = browser->nr_entries;
+		browser->b.nr_entries = notes->nr_entries;
 		notes->options->hide_src_code = false;
 		browser->b.seek(&browser->b, -offset, SEEK_CUR);
 		browser->b.top_idx = al->idx - offset;
@@ -485,7 +483,7 @@ static bool annotate_browser__toggle_source(struct annotate_browser *browser)
 		if (al->idx_asm < offset)
 			offset = al->idx_asm;
 
-		browser->b.nr_entries = browser->nr_asm_entries;
+		browser->b.nr_entries = notes->nr_asm_entries;
 		notes->options->hide_src_code = true;
 		browser->b.seek(&browser->b, -offset, SEEK_CUR);
 		browser->b.top_idx = al->idx_asm - offset;
@@ -495,10 +493,11 @@ static bool annotate_browser__toggle_source(struct annotate_browser *browser)
 	return true;
 }
 
-static void annotate_browser__init_asm_mode(struct annotate_browser *browser)
+static void ui_browser__init_asm_mode(struct ui_browser *browser)
 {
-	ui_browser__reset_index(&browser->b);
-	browser->b.nr_entries = browser->nr_asm_entries;
+	struct annotation *notes = browser__annotation(browser);
+	ui_browser__reset_index(browser);
+	browser->nr_entries = notes->nr_asm_entries;
 }
 
 #define SYM_TITLE_MAX_SIZE (PATH_MAX + 64)
@@ -854,7 +853,7 @@ show_help:
 					   browser->b.height,
 					   browser->b.index,
 					   browser->b.top_idx,
-					   browser->nr_asm_entries);
+					   notes->nr_asm_entries);
 		}
 			continue;
 		case K_ENTER:
@@ -997,9 +996,9 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
 
 		if (browser.b.width < line_len)
 			browser.b.width = line_len;
-		al->idx = browser.nr_entries++;
+		al->idx = notes->nr_entries++;
 		if (al->offset != -1) {
-			al->idx_asm = browser.nr_asm_entries++;
+			al->idx_asm = notes->nr_asm_entries++;
 			/*
 			 * FIXME: short term bandaid to cope with assembly
 			 * routines that comes with labels in the same column
@@ -1020,12 +1019,12 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
 	browser.max_addr_width = hex_width(sym->end);
 	browser.jumps_width = width_jumps(notes->max_jump_sources);
 	notes->nr_events = nr_pcnt;
-	browser.b.nr_entries = browser.nr_entries;
+	browser.b.nr_entries = notes->nr_entries;
 	browser.b.entries = &notes->src->source,
 	browser.b.width += 18; /* Percentage */
 
 	if (notes->options->hide_src_code)
-		annotate_browser__init_asm_mode(&browser);
+		ui_browser__init_asm_mode(&browser.b);
 
 	annotate_browser__update_addr_width(&browser);
 
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 0424c127b004..0c34eb0bd7c8 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -175,6 +175,8 @@ struct annotation {
 	int			nr_events;
 	int			nr_jumps;
 	int			max_jump_sources;
+	int			nr_entries;
+	int			nr_asm_entries;
 	bool			have_cycles;
 	struct annotated_source *src;
 };
-- 
cgit v1.2.3


From 5bc49f6120203c9fbe8e63fdcb9598e0ba615de7 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 15 Mar 2018 15:59:01 -0300
Subject: perf annotate: Introduce set_offsets() method out of TUI code

More non-strictly TUI code being moved to the UI neutral annotation
library, to be used in the upcoming --stdio2 output mode.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-ek20dnd8z2y5v54pcepihybz@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/annotate.c | 24 ++----------------------
 tools/perf/util/annotate.c        | 28 ++++++++++++++++++++++++++++
 tools/perf/util/annotate.h        |  3 +++
 3 files changed, 33 insertions(+), 22 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 00b88349a3c2..977c7e9fdadb 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -938,7 +938,6 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
 			 struct perf_evsel *evsel,
 			 struct hist_browser_timer *hbt)
 {
-	struct annotation_line *al;
 	struct annotation *notes = symbol__annotation(sym);
 	size_t size;
 	struct map_symbol ms = {
@@ -991,27 +990,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
 
 	notes->start = map__rip_2objdump(map, sym->start);
 
-	list_for_each_entry(al, &notes->src->source, node) {
-		size_t line_len = strlen(al->line);
-
-		if (browser.b.width < line_len)
-			browser.b.width = line_len;
-		al->idx = notes->nr_entries++;
-		if (al->offset != -1) {
-			al->idx_asm = notes->nr_asm_entries++;
-			/*
-			 * FIXME: short term bandaid to cope with assembly
-			 * routines that comes with labels in the same column
-			 * as the address in objdump, sigh.
-			 *
-			 * E.g. copy_user_generic_unrolled
- 			 */
-			if (al->offset < (s64)size)
-				notes->offsets[al->offset] = al;
-		} else
-			al->idx_asm = -1;
-	}
-
+	annotation__set_offsets(notes, size);
+	browser.b.width = notes->max_line_len;
 	annotation__mark_jump_targets(notes, sym);
 	annotation__compute_ipc(notes, size);
 
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 330275680a1a..b976e3951662 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -2056,6 +2056,34 @@ void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym)
 	}
 }
 
+void annotation__set_offsets(struct annotation *notes, s64 size)
+{
+	struct annotation_line *al;
+
+	notes->max_line_len = 0;
+
+	list_for_each_entry(al, &notes->src->source, node) {
+		size_t line_len = strlen(al->line);
+
+		if (notes->max_line_len < line_len)
+			notes->max_line_len = line_len;
+		al->idx = notes->nr_entries++;
+		if (al->offset != -1) {
+			al->idx_asm = notes->nr_asm_entries++;
+			/*
+			 * FIXME: short term bandaid to cope with assembly
+			 * routines that comes with labels in the same column
+			 * as the address in objdump, sigh.
+			 *
+			 * E.g. copy_user_generic_unrolled
+ 			 */
+			if (al->offset < size)
+				notes->offsets[al->offset] = al;
+		} else
+			al->idx_asm = -1;
+	}
+}
+
 static void annotation__calc_lines(struct annotation *notes, struct map *map,
 				  struct rb_root *root, u64 start)
 {
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 0c34eb0bd7c8..8a61ec9a5291 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -177,6 +177,7 @@ struct annotation {
 	int			max_jump_sources;
 	int			nr_entries;
 	int			nr_asm_entries;
+	u16			max_line_len;
 	bool			have_cycles;
 	struct annotated_source *src;
 };
@@ -191,6 +192,8 @@ static inline int annotation__pcnt_width(struct annotation *notes)
 	return (notes->options->show_total_period ? 12 : 7) * notes->nr_events;
 }
 
+
+void annotation__set_offsets(struct annotation *notes, s64 size);
 void annotation__compute_ipc(struct annotation *notes, size_t size);
 void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym);
 
-- 
cgit v1.2.3


From 9761e86e36c0957e76c1b7c328953687a1a38655 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 15 Mar 2018 16:18:21 -0300
Subject: perf annotate: Move the column widths from the TUI to generic lib

This also will be used in other output formats, such as --stdio2.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-86h6ftebc62ij1rx8q9zkpwk@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/annotate.c | 45 +++++++++++++++++----------------------
 tools/perf/util/annotate.h        |  7 ++++++
 2 files changed, 26 insertions(+), 26 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 977c7e9fdadb..07d0d2268008 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -36,11 +36,6 @@ struct annotate_browser {
 	struct annotation_line	   *selection;
 	struct arch		   *arch;
 	bool			    searching_backwards;
-	u8			    addr_width;
-	u8			    jumps_width;
-	u8			    target_width;
-	u8			    min_addr_width;
-	u8			    max_addr_width;
 	char			    search_bf[128];
 };
 
@@ -194,10 +189,10 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
 	else if (al->offset == -1) {
 		if (al->line_nr && notes->options->show_linenr)
 			printed = scnprintf(bf, sizeof(bf), "%-*d ",
-					ab->addr_width + 1, al->line_nr);
+					notes->widths.addr + 1, al->line_nr);
 		else
 			printed = scnprintf(bf, sizeof(bf), "%*s  ",
-				    ab->addr_width, " ");
+				    notes->widths.addr, " ");
 		ui_browser__write_nstring(browser, bf, printed);
 		ui_browser__write_nstring(browser, al->line, width - printed - pcnt_width - cycles_width + 1);
 	} else {
@@ -214,7 +209,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
 				if (notes->options->show_nr_jumps) {
 					int prev;
 					printed = scnprintf(bf, sizeof(bf), "%*d ",
-							    ab->jumps_width,
+							    notes->widths.jumps,
 							    al->jump_sources);
 					prev = ui_browser__set_jumps_percent_color(browser, al->jump_sources,
 										   current_entry);
@@ -223,10 +218,10 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
 				}
 
 				printed = scnprintf(bf, sizeof(bf), "%*" PRIx64 ": ",
-						    ab->target_width, addr);
+						    notes->widths.target, addr);
 			} else {
 				printed = scnprintf(bf, sizeof(bf), "%*s  ",
-						    ab->addr_width, " ");
+						    notes->widths.addr, " ");
 			}
 		}
 
@@ -322,12 +317,12 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
 
 	ui_browser__set_color(browser, HE_COLORSET_JUMP_ARROWS);
 	__ui_browser__line_arrow(browser,
-				 pcnt_width + 2 + ab->addr_width + width,
+				 pcnt_width + 2 + notes->widths.addr + width,
 				 from, to);
 
 	if (is_fused(ab, cursor)) {
 		ui_browser__mark_fused(browser,
-				       pcnt_width + 3 + ab->addr_width + width,
+				       pcnt_width + 3 + notes->widths.addr + width,
 				       from - 1,
 				       to > from ? true : false);
 	}
@@ -703,19 +698,17 @@ bool annotate_browser__continue_search_reverse(struct annotate_browser *browser,
 	return __annotate_browser__search_reverse(browser);
 }
 
-static void annotate_browser__update_addr_width(struct annotate_browser *browser)
+static void annotation__update_column_widths(struct annotation *notes)
 {
-	struct annotation *notes = browser__annotation(&browser->b);
-
 	if (notes->options->use_offset)
-		browser->target_width = browser->min_addr_width;
+		notes->widths.target = notes->widths.min_addr;
 	else
-		browser->target_width = browser->max_addr_width;
+		notes->widths.target = notes->widths.max_addr;
 
-	browser->addr_width = browser->target_width;
+	notes->widths.addr = notes->widths.target;
 
 	if (notes->options->show_nr_jumps)
-		browser->addr_width += browser->jumps_width + 1;
+		notes->widths.addr += notes->widths.jumps + 1;
 }
 
 static int annotate_browser__run(struct annotate_browser *browser,
@@ -820,14 +813,14 @@ static int annotate_browser__run(struct annotate_browser *browser,
 			continue;
 		case 'o':
 			notes->options->use_offset = !notes->options->use_offset;
-			annotate_browser__update_addr_width(browser);
+			annotation__update_column_widths(notes);
 			continue;
 		case 'j':
 			notes->options->jump_arrows = !notes->options->jump_arrows;
 			continue;
 		case 'J':
 			notes->options->show_nr_jumps = !notes->options->show_nr_jumps;
-			annotate_browser__update_addr_width(browser);
+			annotation__update_column_widths(notes);
 			continue;
 		case '/':
 			if (annotate_browser__search(browser, delay_secs)) {
@@ -884,7 +877,7 @@ show_sup_ins:
 				notes->options->show_nr_samples = false;
 			else
 				notes->options->show_total_period = true;
-			annotate_browser__update_addr_width(browser);
+			annotation__update_column_widths(notes);
 			continue;
 		case K_LEFT:
 		case K_ESC:
@@ -995,9 +988,9 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
 	annotation__mark_jump_targets(notes, sym);
 	annotation__compute_ipc(notes, size);
 
-	browser.addr_width = browser.target_width = browser.min_addr_width = hex_width(size);
-	browser.max_addr_width = hex_width(sym->end);
-	browser.jumps_width = width_jumps(notes->max_jump_sources);
+	notes->widths.addr = notes->widths.target = notes->widths.min_addr = hex_width(size);
+	notes->widths.max_addr = hex_width(sym->end);
+	notes->widths.jumps = width_jumps(notes->max_jump_sources);
 	notes->nr_events = nr_pcnt;
 	browser.b.nr_entries = notes->nr_entries;
 	browser.b.entries = &notes->src->source,
@@ -1006,7 +999,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
 	if (notes->options->hide_src_code)
 		ui_browser__init_asm_mode(&browser.b);
 
-	annotate_browser__update_addr_width(&browser);
+	annotation__update_column_widths(notes);
 
 	ret = annotate_browser__run(&browser, evsel, hbt);
 
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 8a61ec9a5291..0aa77c154fe3 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -178,6 +178,13 @@ struct annotation {
 	int			nr_entries;
 	int			nr_asm_entries;
 	u16			max_line_len;
+	struct {
+		u8		addr;
+		u8		jumps;
+		u8		target;
+		u8		min_addr;
+		u8		max_addr;
+	} widths;
 	bool			have_cycles;
 	struct annotated_source *src;
 };
-- 
cgit v1.2.3


From 7232bf7a8954e4f6558e6b74fb6a2403e7a3b7be Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 15 Mar 2018 16:19:59 -0300
Subject: perf annotate: Move update_column_widths() to the generic lib

Previous patch left it where it was to ease review, move it to its
right place.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-ikdjr014p7k5kachgyjrgiey@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/annotate.c | 13 -------------
 tools/perf/util/annotate.c        | 13 +++++++++++++
 tools/perf/util/annotate.h        |  1 +
 3 files changed, 14 insertions(+), 13 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 07d0d2268008..ab4d004fc184 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -698,19 +698,6 @@ bool annotate_browser__continue_search_reverse(struct annotate_browser *browser,
 	return __annotate_browser__search_reverse(browser);
 }
 
-static void annotation__update_column_widths(struct annotation *notes)
-{
-	if (notes->options->use_offset)
-		notes->widths.target = notes->widths.min_addr;
-	else
-		notes->widths.target = notes->widths.max_addr;
-
-	notes->widths.addr = notes->widths.target;
-
-	if (notes->options->show_nr_jumps)
-		notes->widths.addr += notes->widths.jumps + 1;
-}
-
 static int annotate_browser__run(struct annotate_browser *browser,
 				 struct perf_evsel *evsel,
 				 struct hist_browser_timer *hbt)
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index b976e3951662..8a2fda80a221 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -2084,6 +2084,19 @@ void annotation__set_offsets(struct annotation *notes, s64 size)
 	}
 }
 
+void annotation__update_column_widths(struct annotation *notes)
+{
+	if (notes->options->use_offset)
+		notes->widths.target = notes->widths.min_addr;
+	else
+		notes->widths.target = notes->widths.max_addr;
+
+	notes->widths.addr = notes->widths.target;
+
+	if (notes->options->show_nr_jumps)
+		notes->widths.addr += notes->widths.jumps + 1;
+}
+
 static void annotation__calc_lines(struct annotation *notes, struct map *map,
 				  struct rb_root *root, u64 start)
 {
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 0aa77c154fe3..a8bea758490a 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -203,6 +203,7 @@ static inline int annotation__pcnt_width(struct annotation *notes)
 void annotation__set_offsets(struct annotation *notes, s64 size);
 void annotation__compute_ipc(struct annotation *notes, size_t size);
 void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym);
+void annotation__update_column_widths(struct annotation *notes);
 
 static inline struct sym_hist *annotation__histogram(struct annotation *notes, int idx)
 {
-- 
cgit v1.2.3


From b8b0d819858e1140e98ce858a0c839f3d03cb0f5 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 15 Mar 2018 16:26:29 -0300
Subject: perf annotate: Introduce init_column_widths() method out of TUI code

More non-TUI stuff goes to the UI-agnostic library

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-hngv7rpqvtta69ouj7ne770q@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/annotate.c | 14 +-------------
 tools/perf/util/annotate.c        | 17 +++++++++++++++++
 tools/perf/util/annotate.h        |  1 +
 3 files changed, 19 insertions(+), 13 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index ab4d004fc184..06ad5ecaa67a 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -905,15 +905,6 @@ int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel,
 	return map_symbol__tui_annotate(&he->ms, evsel, hbt);
 }
 
-static inline int width_jumps(int n)
-{
-	if (n >= 100)
-		return 5;
-	if (n / 10)
-		return 2;
-	return 1;
-}
-
 int symbol__tui_annotate(struct symbol *sym, struct map *map,
 			 struct perf_evsel *evsel,
 			 struct hist_browser_timer *hbt)
@@ -974,10 +965,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
 	browser.b.width = notes->max_line_len;
 	annotation__mark_jump_targets(notes, sym);
 	annotation__compute_ipc(notes, size);
-
-	notes->widths.addr = notes->widths.target = notes->widths.min_addr = hex_width(size);
-	notes->widths.max_addr = hex_width(sym->end);
-	notes->widths.jumps = width_jumps(notes->max_jump_sources);
+	annotation__init_column_widths(notes, sym);
 	notes->nr_events = nr_pcnt;
 	browser.b.nr_entries = notes->nr_entries;
 	browser.b.entries = &notes->src->source,
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 8a2fda80a221..9c05b534f428 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -2084,6 +2084,23 @@ void annotation__set_offsets(struct annotation *notes, s64 size)
 	}
 }
 
+static inline int width_jumps(int n)
+{
+	if (n >= 100)
+		return 5;
+	if (n / 10)
+		return 2;
+	return 1;
+}
+
+void annotation__init_column_widths(struct annotation *notes, struct symbol *sym)
+{
+	notes->widths.addr = notes->widths.target =
+		notes->widths.min_addr = hex_width(symbol__size(sym));
+	notes->widths.max_addr = hex_width(sym->end);
+	notes->widths.jumps = width_jumps(notes->max_jump_sources);
+}
+
 void annotation__update_column_widths(struct annotation *notes)
 {
 	if (notes->options->use_offset)
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index a8bea758490a..c4528e03a031 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -204,6 +204,7 @@ void annotation__set_offsets(struct annotation *notes, s64 size);
 void annotation__compute_ipc(struct annotation *notes, size_t size);
 void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym);
 void annotation__update_column_widths(struct annotation *notes);
+void annotation__init_column_widths(struct annotation *notes, struct symbol *sym);
 
 static inline struct sym_hist *annotation__histogram(struct annotation *notes, int idx)
 {
-- 
cgit v1.2.3


From ecda45bd6cfe0badda0e8215c5a008eaf7647716 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 15 Mar 2018 16:54:11 -0300
Subject: perf annotate: Introduce symbol__annotate2 method

That does all the extended boilerplate the TUI browser did, leaving the
symbol__annotate() function to be used by the old --stdio output mode.

Now the upcoming --stdio2 output mode should just use this one to set
things up.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-e2x8wuf6gvdhzdryo229vj4i@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/annotate.c | 28 +---------------------------
 tools/perf/util/annotate.c        | 39 +++++++++++++++++++++++++++++++++++++++
 tools/perf/util/annotate.h        |  4 ++++
 3 files changed, 44 insertions(+), 27 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 06ad5ecaa67a..ab21739f27ae 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -910,7 +910,6 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
 			 struct hist_browser_timer *hbt)
 {
 	struct annotation *notes = symbol__annotation(sym);
-	size_t size;
 	struct map_symbol ms = {
 		.map = map,
 		.sym = sym,
@@ -926,28 +925,14 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
 		},
 	};
 	int ret = -1, err;
-	int nr_pcnt = 1;
 
 	if (sym == NULL)
 		return -1;
 
-	size = symbol__size(sym);
-
 	if (map->dso->annotate_warned)
 		return -1;
 
-	notes->options = &annotate_browser__opts;
-
-	notes->offsets = zalloc(size * sizeof(struct annotation_line *));
-	if (notes->offsets == NULL) {
-		ui__error("Not enough memory!");
-		return -1;
-	}
-
-	if (perf_evsel__is_group_event(evsel))
-		nr_pcnt = evsel->nr_members;
-
-	err = symbol__annotate(sym, map, evsel, 0, &browser.arch);
+	err = symbol__annotate2(sym, map, evsel, &annotate_browser__opts, &browser.arch);
 	if (err) {
 		char msg[BUFSIZ];
 		symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg));
@@ -955,18 +940,9 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
 		goto out_free_offsets;
 	}
 
-	symbol__calc_percent(sym, evsel);
-
 	ui_helpline__push("Press ESC to exit");
 
-	notes->start = map__rip_2objdump(map, sym->start);
-
-	annotation__set_offsets(notes, size);
 	browser.b.width = notes->max_line_len;
-	annotation__mark_jump_targets(notes, sym);
-	annotation__compute_ipc(notes, size);
-	annotation__init_column_widths(notes, sym);
-	notes->nr_events = nr_pcnt;
 	browser.b.nr_entries = notes->nr_entries;
 	browser.b.entries = &notes->src->source,
 	browser.b.width += 18; /* Percentage */
@@ -974,8 +950,6 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
 	if (notes->options->hide_src_code)
 		ui_browser__init_asm_mode(&browser.b);
 
-	annotation__update_column_widths(notes);
-
 	ret = annotate_browser__run(&browser, evsel, hbt);
 
 	annotated_source__purge(notes->src);
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 9c05b534f428..7ad6400a0d4f 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -2183,3 +2183,42 @@ bool ui__has_annotation(void)
 {
 	return use_browser == 1 && perf_hpp_list.sym;
 }
+
+int symbol__annotate2(struct symbol *sym, struct map *map, struct perf_evsel *evsel,
+		      struct annotation_options *options, struct arch **parch)
+{
+	struct annotation *notes = symbol__annotation(sym);
+	size_t size = symbol__size(sym);
+	int nr_pcnt = 1, err;
+
+	notes->offsets = zalloc(size * sizeof(struct annotation_line *));
+	if (notes->offsets == NULL)
+		return -1;
+
+	if (perf_evsel__is_group_event(evsel))
+		nr_pcnt = evsel->nr_members;
+
+	err = symbol__annotate(sym, map, evsel, 0, parch);
+	if (err)
+		goto out_free_offsets;
+
+	notes->options = options;
+
+	symbol__calc_percent(sym, evsel);
+
+	notes->start = map__rip_2objdump(map, sym->start);
+
+	annotation__set_offsets(notes, size);
+	annotation__mark_jump_targets(notes, sym);
+	annotation__compute_ipc(notes, size);
+	annotation__init_column_widths(notes, sym);
+	notes->nr_events = nr_pcnt;
+
+	annotation__update_column_widths(notes);
+
+	return 0;
+
+out_free_offsets:
+	zfree(&notes->offsets);
+	return -1;
+}
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index c4528e03a031..f93c805473f9 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -233,6 +233,10 @@ void symbol__annotate_zero_histograms(struct symbol *sym);
 int symbol__annotate(struct symbol *sym, struct map *map,
 		     struct perf_evsel *evsel, size_t privsize,
 		     struct arch **parch);
+int symbol__annotate2(struct symbol *sym, struct map *map,
+		      struct perf_evsel *evsel,
+		      struct annotation_options *options,
+		      struct arch **parch);
 
 enum symbol_disassemble_errno {
 	SYMBOL_ANNOTATE_ERRNO__SUCCESS		= 0,
-- 
cgit v1.2.3


From 2f025ea0bac2f99a2800f43f139f57c226d3b08b Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 15 Mar 2018 17:04:53 -0300
Subject: perf annotate: Introduce annotation_line__max_percent()

Out of the annotate_browser__write() routine, to be used in the --stdio2
mode.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-0he0wyy4haswqi1qb35x37do@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/annotate.c |  7 +------
 tools/perf/util/annotate.c        | 14 ++++++++++++++
 tools/perf/util/annotate.h        |  3 +++
 3 files changed, 18 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index ab21739f27ae..05f79f36e906 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -117,15 +117,10 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
 	int width = browser->width, printed;
 	int i, pcnt_width = annotation__pcnt_width(notes),
 	       cycles_width = annotation__cycles_width(notes);
-	double percent_max = 0.0;
+	double percent_max = annotation_line__max_percent(al, notes);
 	char bf[256];
 	bool show_title = false;
 
-	for (i = 0; i < notes->nr_events; i++) {
-		if (al->samples[i].percent > percent_max)
-			percent_max = al->samples[i].percent;
-	}
-
 	if ((row == 0) && (al->offset == -1 || percent_max == 0.0)) {
 		if (notes->have_cycles) {
 			if (al->ipc == 0.0 && al->cycles == 0)
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 7ad6400a0d4f..3bd6f9b0147f 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -2184,6 +2184,20 @@ bool ui__has_annotation(void)
 	return use_browser == 1 && perf_hpp_list.sym;
 }
 
+
+double annotation_line__max_percent(struct annotation_line *al, struct annotation *notes)
+{
+	double percent_max = 0.0;
+	int i;
+
+	for (i = 0; i < notes->nr_events; i++) {
+		if (al->samples[i].percent > percent_max)
+			percent_max = al->samples[i].percent;
+	}
+
+	return percent_max;
+}
+
 int symbol__annotate2(struct symbol *sym, struct map *map, struct perf_evsel *evsel,
 		      struct annotation_options *options, struct arch **parch)
 {
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index f93c805473f9..83484e236f33 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -124,6 +124,9 @@ bool disasm_line__is_valid_jump(struct disasm_line *dl, struct symbol *sym);
 void disasm_line__free(struct disasm_line *dl);
 struct annotation_line *
 annotation_line__next(struct annotation_line *pos, struct list_head *head);
+
+double annotation_line__max_percent(struct annotation_line *al, struct annotation *notes);
+
 int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw);
 size_t disasm__fprintf(struct list_head *head, FILE *fp);
 void symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel);
-- 
cgit v1.2.3


From c52202434de2bd3e0c447c6dce992266fd7fc589 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 15 Mar 2018 17:52:53 -0300
Subject: perf ui browser: Add vprintf() method

We'll need it for some callbacks for the upcoming
annotation__line_print() routines.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-t3qiobj4ua38xzsq8cyw9ky5@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browser.c | 7 ++++++-
 tools/perf/ui/browser.h | 2 ++
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c
index 63399af3049f..33c30325885f 100644
--- a/tools/perf/ui/browser.c
+++ b/tools/perf/ui/browser.c
@@ -56,12 +56,17 @@ void ui_browser__write_nstring(struct ui_browser *browser __maybe_unused, const
 	slsmg_write_nstring(msg, width);
 }
 
+void ui_browser__vprintf(struct ui_browser *browser __maybe_unused, const char *fmt, va_list args)
+{
+	slsmg_vprintf(fmt, args);
+}
+
 void ui_browser__printf(struct ui_browser *browser __maybe_unused, const char *fmt, ...)
 {
 	va_list args;
 
 	va_start(args, fmt);
-	slsmg_vprintf(fmt, args);
+	ui_browser__vprintf(browser, fmt, args);
 	va_end(args);
 }
 
diff --git a/tools/perf/ui/browser.h b/tools/perf/ui/browser.h
index 03e1734412b9..9e69c6a43514 100644
--- a/tools/perf/ui/browser.h
+++ b/tools/perf/ui/browser.h
@@ -3,6 +3,7 @@
 #define _PERF_UI_BROWSER_H_ 1
 
 #include <linux/types.h>
+#include <stdarg.h>
 
 #define HE_COLORSET_TOP		50
 #define HE_COLORSET_MEDIUM	51
@@ -40,6 +41,7 @@ void ui_browser__reset_index(struct ui_browser *browser);
 void ui_browser__gotorc(struct ui_browser *browser, int y, int x);
 void ui_browser__write_nstring(struct ui_browser *browser, const char *msg,
 			       unsigned int width);
+void ui_browser__vprintf(struct ui_browser *browser, const char *fmt, va_list args);
 void ui_browser__printf(struct ui_browser *browser, const char *fmt, ...);
 void ui_browser__write_graph(struct ui_browser *browser, int graph);
 void __ui_browser__line_arrow(struct ui_browser *browser, unsigned int column,
-- 
cgit v1.2.3


From 2ba5eca10486eeb37030f8bce27cecda3763502f Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 15 Mar 2018 17:54:36 -0300
Subject: perf annotate: Introduce annotation_line__print_start() out of TUI
 code

For the --tui and --stdio2 cases using callbacks for print() and
set_percent_color() end up being the easiest path, real GUI remains as
an exercise.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-1o7az1ng55g2g6ppr2jpeuct@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/annotate.c | 82 ++++++++++-----------------------------
 tools/perf/util/annotate.c        | 75 +++++++++++++++++++++++++++++++++++
 tools/perf/util/annotate.h        |  5 +++
 3 files changed, 101 insertions(+), 61 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 05f79f36e906..9b77a016e299 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -105,6 +105,20 @@ static void disasm_line__write(struct disasm_line *dl, struct ui_browser *browse
 	disasm_line__scnprintf(dl, bf, size, !notes->options->use_offset);
 }
 
+static void annotate_browser__set_percent_color(void *browser, double percent, bool current)
+{
+	ui_browser__set_percent_color(browser, percent, current);
+}
+
+static void annotate_browser__printf(void *browser, const char *fmt, ...)
+{
+	va_list args;
+
+	va_start(args, fmt);
+	ui_browser__vprintf(browser, fmt, args);
+	va_end(args);
+}
+
 static void annotate_browser__write(struct ui_browser *browser, void *entry, int row)
 {
 	struct annotate_browser *ab = container_of(browser, struct annotate_browser, b);
@@ -115,65 +129,13 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
 			     (!current_entry || (browser->use_navkeypressed &&
 					         !browser->navkeypressed)));
 	int width = browser->width, printed;
-	int i, pcnt_width = annotation__pcnt_width(notes),
-	       cycles_width = annotation__cycles_width(notes);
-	double percent_max = annotation_line__max_percent(al, notes);
+	int pcnt_width = annotation__pcnt_width(notes),
+	    cycles_width = annotation__cycles_width(notes);
 	char bf[256];
-	bool show_title = false;
-
-	if ((row == 0) && (al->offset == -1 || percent_max == 0.0)) {
-		if (notes->have_cycles) {
-			if (al->ipc == 0.0 && al->cycles == 0)
-				show_title = true;
-		} else
-			show_title = true;
-	}
-
-	if (al->offset != -1 && percent_max != 0.0) {
-		for (i = 0; i < notes->nr_events; i++) {
-			ui_browser__set_percent_color(browser,
-						al->samples[i].percent,
-						current_entry);
-			if (notes->options->show_total_period) {
-				ui_browser__printf(browser, "%11" PRIu64 " ",
-						   al->samples[i].he.period);
-			} else if (notes->options->show_nr_samples) {
-				ui_browser__printf(browser, "%6" PRIu64 " ",
-						   al->samples[i].he.nr_samples);
-			} else {
-				ui_browser__printf(browser, "%6.2f ",
-						   al->samples[i].percent);
-			}
-		}
-	} else {
-		ui_browser__set_percent_color(browser, 0, current_entry);
-
-		if (!show_title)
-			ui_browser__write_nstring(browser, " ", pcnt_width);
-		else {
-			ui_browser__printf(browser, "%*s", pcnt_width,
-					   notes->options->show_total_period ? "Period" :
-					   notes->options->show_nr_samples ? "Samples" : "Percent");
-		}
-	}
-	if (notes->have_cycles) {
-		if (al->ipc)
-			ui_browser__printf(browser, "%*.2f ", ANNOTATION__IPC_WIDTH - 1, al->ipc);
-		else if (!show_title)
-			ui_browser__write_nstring(browser, " ", ANNOTATION__IPC_WIDTH);
-		else
-			ui_browser__printf(browser, "%*s ", ANNOTATION__IPC_WIDTH - 1, "IPC");
-
-		if (al->cycles)
-			ui_browser__printf(browser, "%*" PRIu64 " ",
-					   ANNOTATION__CYCLES_WIDTH - 1, al->cycles);
-		else if (!show_title)
-			ui_browser__write_nstring(browser, " ", ANNOTATION__CYCLES_WIDTH);
-		else
-			ui_browser__printf(browser, "%*s ", ANNOTATION__CYCLES_WIDTH - 1, "Cycle");
-	}
 
-	SLsmg_write_char(' ');
+	annotation_line__print_start(al, notes, row == 0, current_entry, browser,
+				     annotate_browser__set_percent_color,
+				     annotate_browser__printf);
 
 	/* The scroll bar isn't being used */
 	if (!browser->navkeypressed)
@@ -183,11 +145,9 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
 		ui_browser__write_nstring(browser, " ", width - pcnt_width - cycles_width);
 	else if (al->offset == -1) {
 		if (al->line_nr && notes->options->show_linenr)
-			printed = scnprintf(bf, sizeof(bf), "%-*d ",
-					notes->widths.addr + 1, al->line_nr);
+			printed = scnprintf(bf, sizeof(bf), "%-*d ", notes->widths.addr + 1, al->line_nr);
 		else
-			printed = scnprintf(bf, sizeof(bf), "%*s  ",
-				    notes->widths.addr, " ");
+			printed = scnprintf(bf, sizeof(bf), "%*s  ", notes->widths.addr, " ");
 		ui_browser__write_nstring(browser, bf, printed);
 		ui_browser__write_nstring(browser, al->line, width - printed - pcnt_width - cycles_width + 1);
 	} else {
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 3bd6f9b0147f..046feda11052 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -2198,6 +2198,81 @@ double annotation_line__max_percent(struct annotation_line *al, struct annotatio
 	return percent_max;
 }
 
+static void set_percent_color_stub(void *obj __maybe_unused,
+				   double percent __maybe_unused,
+				   bool current __maybe_unused)
+{
+}
+
+void annotation_line__print_start(struct annotation_line *al, struct annotation *notes,
+				  bool first_line, bool current_entry,
+				  void *obj,
+				  void (*obj__set_percent_color)(void *obj, double percent, bool current),
+				  void (*obj__printf)(void *obj, const char *fmt, ...))
+{
+	double percent_max = annotation_line__max_percent(al, notes);
+	bool show_title = false;
+
+	if (first_line && (al->offset == -1 || percent_max == 0.0)) {
+		if (notes->have_cycles) {
+			if (al->ipc == 0.0 && al->cycles == 0)
+				show_title = true;
+		} else
+			show_title = true;
+	}
+
+	if (!obj__set_percent_color)
+		obj__set_percent_color = set_percent_color_stub;
+
+	if (al->offset != -1 && percent_max != 0.0) {
+		int i;
+
+		for (i = 0; i < notes->nr_events; i++) {
+			obj__set_percent_color(obj, al->samples[i].percent, current_entry);
+			if (notes->options->show_total_period) {
+				obj__printf(obj, "%11" PRIu64 " ", al->samples[i].he.period);
+			} else if (notes->options->show_nr_samples) {
+				obj__printf(obj, "%6" PRIu64 " ",
+						   al->samples[i].he.nr_samples);
+			} else {
+				obj__printf(obj, "%6.2f ",
+						   al->samples[i].percent);
+			}
+		}
+	} else {
+		int pcnt_width = annotation__pcnt_width(notes);
+
+		obj__set_percent_color(obj, 0, current_entry);
+
+		if (!show_title)
+			obj__printf(obj, "%*s", pcnt_width, " ");
+		else {
+			obj__printf(obj, "%*s", pcnt_width,
+					   notes->options->show_total_period ? "Period" :
+					   notes->options->show_nr_samples ? "Samples" : "Percent");
+		}
+	}
+
+	if (notes->have_cycles) {
+		if (al->ipc)
+			obj__printf(obj, "%*.2f ", ANNOTATION__IPC_WIDTH - 1, al->ipc);
+		else if (!show_title)
+			obj__printf(obj, "%*s", ANNOTATION__IPC_WIDTH, " ");
+		else
+			obj__printf(obj, "%*s ", ANNOTATION__IPC_WIDTH - 1, "IPC");
+
+		if (al->cycles)
+			obj__printf(obj, "%*" PRIu64 " ",
+					   ANNOTATION__CYCLES_WIDTH - 1, al->cycles);
+		else if (!show_title)
+			obj__printf(obj, "%*s", ANNOTATION__CYCLES_WIDTH, " ");
+		else
+			obj__printf(obj, "%*s ", ANNOTATION__CYCLES_WIDTH - 1, "Cycle");
+	}
+
+	obj__printf(obj, " ");
+}
+
 int symbol__annotate2(struct symbol *sym, struct map *map, struct perf_evsel *evsel,
 		      struct annotation_options *options, struct arch **parch)
 {
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 83484e236f33..84c99774bfed 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -126,6 +126,11 @@ struct annotation_line *
 annotation_line__next(struct annotation_line *pos, struct list_head *head);
 
 double annotation_line__max_percent(struct annotation_line *al, struct annotation *notes);
+void annotation_line__print_start(struct annotation_line *al, struct annotation *notes,
+				  bool first_line, bool current_entry,
+				  void *obj,
+				  void (*obj__set_percent_color)(void *obj, double percent, bool current),
+				  void (*obj__printf)(void *obj, const char *fmt, ...));
 
 int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw);
 size_t disasm__fprintf(struct list_head *head, FILE *fp);
-- 
cgit v1.2.3


From a1e9b74cc2ef80131b9f955c0e1acc25285dc88c Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 15 Mar 2018 19:12:39 -0300
Subject: perf annotate: Finish the generalization of annotate_browser__write()

We pass some more callbacks and all of annotate_browser__write() seems
to be free of TUI code (except for some arrow constants, will fix).

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-5uo6yvwnxtsbe8y6v0ysaakf@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/annotate.c |  97 +++++---------------------------
 tools/perf/util/annotate.c        | 113 +++++++++++++++++++++++++++++++++++---
 tools/perf/util/annotate.h        |  13 +++--
 3 files changed, 127 insertions(+), 96 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 9b77a016e299..2b18c462b882 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -71,38 +71,20 @@ static int ui_browser__jumps_percent_color(struct ui_browser *browser, int nr, b
 	return HE_COLORSET_NORMAL;
 }
 
-static int ui_browser__set_jumps_percent_color(struct ui_browser *browser, int nr, bool current)
+static int ui_browser__set_jumps_percent_color(void *browser, int nr, bool current)
 {
 	 int color = ui_browser__jumps_percent_color(browser, nr, current);
 	 return ui_browser__set_color(browser, color);
 }
 
-static void disasm_line__write(struct disasm_line *dl, struct ui_browser *browser,
-			       char *bf, size_t size)
+static int annotate_browser__set_color(void *browser, int color)
 {
-	struct annotation *notes = browser__annotation(browser);
-
-	if (dl->ins.ops && dl->ins.ops->scnprintf) {
-		if (ins__is_jump(&dl->ins)) {
-			bool fwd = dl->ops.target.offset > dl->al.offset;
-
-			ui_browser__write_graph(browser, fwd ? SLSMG_DARROW_CHAR :
-							    SLSMG_UARROW_CHAR);
-			SLsmg_write_char(' ');
-		} else if (ins__is_call(&dl->ins)) {
-			ui_browser__write_graph(browser, SLSMG_RARROW_CHAR);
-			SLsmg_write_char(' ');
-		} else if (ins__is_ret(&dl->ins)) {
-			ui_browser__write_graph(browser, SLSMG_LARROW_CHAR);
-			SLsmg_write_char(' ');
-		} else {
-			ui_browser__write_nstring(browser, " ", 2);
-		}
-	} else {
-		ui_browser__write_nstring(browser, " ", 2);
-	}
+	return ui_browser__set_color(browser, color);
+}
 
-	disasm_line__scnprintf(dl, bf, size, !notes->options->use_offset);
+static void annotate_browser__write_graph(void *browser, int graph)
+{
+	ui_browser__write_graph(browser, graph);
 }
 
 static void annotate_browser__set_percent_color(void *browser, double percent, bool current)
@@ -128,68 +110,19 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
 	bool change_color = (!notes->options->hide_src_code &&
 			     (!current_entry || (browser->use_navkeypressed &&
 					         !browser->navkeypressed)));
-	int width = browser->width, printed;
-	int pcnt_width = annotation__pcnt_width(notes),
-	    cycles_width = annotation__cycles_width(notes);
-	char bf[256];
-
-	annotation_line__print_start(al, notes, row == 0, current_entry, browser,
-				     annotate_browser__set_percent_color,
-				     annotate_browser__printf);
+	int width = browser->width;
 
 	/* The scroll bar isn't being used */
 	if (!browser->navkeypressed)
 		width += 1;
 
-	if (!*al->line)
-		ui_browser__write_nstring(browser, " ", width - pcnt_width - cycles_width);
-	else if (al->offset == -1) {
-		if (al->line_nr && notes->options->show_linenr)
-			printed = scnprintf(bf, sizeof(bf), "%-*d ", notes->widths.addr + 1, al->line_nr);
-		else
-			printed = scnprintf(bf, sizeof(bf), "%*s  ", notes->widths.addr, " ");
-		ui_browser__write_nstring(browser, bf, printed);
-		ui_browser__write_nstring(browser, al->line, width - printed - pcnt_width - cycles_width + 1);
-	} else {
-		u64 addr = al->offset;
-		int color = -1;
-
-		if (!notes->options->use_offset)
-			addr += notes->start;
-
-		if (!notes->options->use_offset) {
-			printed = scnprintf(bf, sizeof(bf), "%" PRIx64 ": ", addr);
-		} else {
-			if (al->jump_sources) {
-				if (notes->options->show_nr_jumps) {
-					int prev;
-					printed = scnprintf(bf, sizeof(bf), "%*d ",
-							    notes->widths.jumps,
-							    al->jump_sources);
-					prev = ui_browser__set_jumps_percent_color(browser, al->jump_sources,
-										   current_entry);
-					ui_browser__write_nstring(browser, bf, printed);
-					ui_browser__set_color(browser, prev);
-				}
-
-				printed = scnprintf(bf, sizeof(bf), "%*" PRIx64 ": ",
-						    notes->widths.target, addr);
-			} else {
-				printed = scnprintf(bf, sizeof(bf), "%*s  ",
-						    notes->widths.addr, " ");
-			}
-		}
-
-		if (change_color)
-			color = ui_browser__set_color(browser, HE_COLORSET_ADDR);
-		ui_browser__write_nstring(browser, bf, printed);
-		if (change_color)
-			ui_browser__set_color(browser, color);
-
-		disasm_line__write(disasm_line(al), browser, bf, sizeof(bf));
-
-		ui_browser__write_nstring(browser, bf, width - pcnt_width - cycles_width - 3 - printed);
-	}
+	annotation_line__write(al, notes, row == 0, current_entry, change_color,
+			       width, browser,
+			       annotate_browser__set_color,
+			       annotate_browser__set_percent_color,
+			       ui_browser__set_jumps_percent_color,
+			       annotate_browser__printf,
+			       annotate_browser__write_graph);
 
 	if (current_entry)
 		ab->selection = al;
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 046feda11052..45a52e2658c8 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -27,6 +27,18 @@
 #include <linux/bitops.h>
 #include <linux/kernel.h>
 
+/* FIXME: For the HE_COLORSET */
+#include "ui/browser.h"
+
+/*
+ * FIXME: Using the same values as slang.h,
+ * but that header may not be available everywhere
+ */
+#define LARROW_CHAR	0x1B
+#define RARROW_CHAR	0x1A
+#define DARROW_CHAR	0x19
+#define UARROW_CHAR	0x18
+
 #include "sane_ctype.h"
 
 const char 	*disassembler_style;
@@ -2204,14 +2216,48 @@ static void set_percent_color_stub(void *obj __maybe_unused,
 {
 }
 
-void annotation_line__print_start(struct annotation_line *al, struct annotation *notes,
-				  bool first_line, bool current_entry,
-				  void *obj,
-				  void (*obj__set_percent_color)(void *obj, double percent, bool current),
-				  void (*obj__printf)(void *obj, const char *fmt, ...))
+static void disasm_line__write(struct disasm_line *dl, struct annotation *notes,
+			       void *obj, char *bf, size_t size,
+			       void (*obj__printf)(void *obj, const char *fmt, ...),
+			       void (*obj__write_graph)(void *obj, int graph))
+{
+	if (dl->ins.ops && dl->ins.ops->scnprintf) {
+		if (ins__is_jump(&dl->ins)) {
+			bool fwd = dl->ops.target.offset > dl->al.offset;
+
+			obj__write_graph(obj, fwd ? DARROW_CHAR : UARROW_CHAR);
+			obj__printf(obj, " ");
+		} else if (ins__is_call(&dl->ins)) {
+			obj__write_graph(obj, RARROW_CHAR);
+			obj__printf(obj, " ");
+		} else if (ins__is_ret(&dl->ins)) {
+			obj__write_graph(obj, LARROW_CHAR);
+			obj__printf(obj, " ");
+		} else {
+			obj__printf(obj, "  ");
+		}
+	} else {
+		obj__printf(obj, "  ");
+	}
+
+	disasm_line__scnprintf(dl, bf, size, !notes->options->use_offset);
+}
+
+void annotation_line__write(struct annotation_line *al, struct annotation *notes,
+			    bool first_line, bool current_entry, bool change_color, int width,
+			    void *obj,
+			    int  (*obj__set_color)(void *obj, int color),
+			    void (*obj__set_percent_color)(void *obj, double percent, bool current),
+			    int  (*obj__set_jumps_percent_color)(void *obj, int nr, bool current),
+			    void (*obj__printf)(void *obj, const char *fmt, ...),
+			    void (*obj__write_graph)(void *obj, int graph))
 {
 	double percent_max = annotation_line__max_percent(al, notes);
+	int pcnt_width = annotation__pcnt_width(notes),
+	    cycles_width = annotation__cycles_width(notes);
 	bool show_title = false;
+	char bf[256];
+	int printed;
 
 	if (first_line && (al->offset == -1 || percent_max == 0.0)) {
 		if (notes->have_cycles) {
@@ -2240,14 +2286,12 @@ void annotation_line__print_start(struct annotation_line *al, struct annotation
 			}
 		}
 	} else {
-		int pcnt_width = annotation__pcnt_width(notes);
-
 		obj__set_percent_color(obj, 0, current_entry);
 
 		if (!show_title)
-			obj__printf(obj, "%*s", pcnt_width, " ");
+			obj__printf(obj, "%-*s", pcnt_width, " ");
 		else {
-			obj__printf(obj, "%*s", pcnt_width,
+			obj__printf(obj, "%-*s", pcnt_width,
 					   notes->options->show_total_period ? "Period" :
 					   notes->options->show_nr_samples ? "Samples" : "Percent");
 		}
@@ -2271,6 +2315,57 @@ void annotation_line__print_start(struct annotation_line *al, struct annotation
 	}
 
 	obj__printf(obj, " ");
+
+	if (!*al->line)
+		obj__printf(obj, "%-*s", width - pcnt_width - cycles_width, " ");
+	else if (al->offset == -1) {
+		if (al->line_nr && notes->options->show_linenr)
+			printed = scnprintf(bf, sizeof(bf), "%-*d ", notes->widths.addr + 1, al->line_nr);
+		else
+			printed = scnprintf(bf, sizeof(bf), "%-*s  ", notes->widths.addr, " ");
+		obj__printf(obj, bf);
+		obj__printf(obj, "%-*s", width - printed - pcnt_width - cycles_width + 1, al->line);
+	} else {
+		u64 addr = al->offset;
+		int color = -1;
+
+		if (!notes->options->use_offset)
+			addr += notes->start;
+
+		if (!notes->options->use_offset) {
+			printed = scnprintf(bf, sizeof(bf), "%" PRIx64 ": ", addr);
+		} else {
+			if (al->jump_sources) {
+				if (notes->options->show_nr_jumps) {
+					int prev;
+					printed = scnprintf(bf, sizeof(bf), "%*d ",
+							    notes->widths.jumps,
+							    al->jump_sources);
+					prev = obj__set_jumps_percent_color(obj, al->jump_sources,
+									    current_entry);
+					obj__printf(obj, bf);
+					obj__set_color(obj, prev);
+				}
+
+				printed = scnprintf(bf, sizeof(bf), "%*" PRIx64 ": ",
+						    notes->widths.target, addr);
+			} else {
+				printed = scnprintf(bf, sizeof(bf), "%-*s  ",
+						    notes->widths.addr, " ");
+			}
+		}
+
+		if (change_color)
+			color = obj__set_color(obj, HE_COLORSET_ADDR);
+		obj__printf(obj, bf);
+		if (change_color)
+			obj__set_color(obj, color);
+
+		disasm_line__write(disasm_line(al), notes, obj, bf, sizeof(bf), obj__printf, obj__write_graph);
+
+		obj__printf(obj, "%-*s", width - pcnt_width - cycles_width - 3 - printed, bf);
+	}
+
 }
 
 int symbol__annotate2(struct symbol *sym, struct map *map, struct perf_evsel *evsel,
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 84c99774bfed..27fcdacbb497 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -126,11 +126,14 @@ struct annotation_line *
 annotation_line__next(struct annotation_line *pos, struct list_head *head);
 
 double annotation_line__max_percent(struct annotation_line *al, struct annotation *notes);
-void annotation_line__print_start(struct annotation_line *al, struct annotation *notes,
-				  bool first_line, bool current_entry,
-				  void *obj,
-				  void (*obj__set_percent_color)(void *obj, double percent, bool current),
-				  void (*obj__printf)(void *obj, const char *fmt, ...));
+void annotation_line__write(struct annotation_line *al, struct annotation *notes,
+			    bool first_line, bool current_entry, bool change_color, int width,
+			    void *obj,
+			    int  (*obj__set_color)(void *obj, int color),
+			    void (*obj__set_percent_color)(void *obj, double percent, bool current),
+			    int  (*obj__set_jumps_percent_color)(void *obj, int nr, bool current),
+			    void (*obj__printf)(void *obj, const char *fmt, ...),
+			    void (*obj__write_graph)(void *obj, int graph));
 
 int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw);
 size_t disasm__fprintf(struct list_head *head, FILE *fp);
-- 
cgit v1.2.3


From c298304bd747d6a0b733f0becb470ff07ead0317 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 15 Mar 2018 23:14:51 -0300
Subject: perf annotate: Use a ops table for annotation_line__write()

To simplify the passing of arguments, the --stdio2 code will have to set
all the fields with operations printing to stdout.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-pcs3c7vdy9ucygxflo4nl1o7@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/annotate.c | 32 +++++++++++++++-------------
 tools/perf/util/annotate.c        | 44 ++++++++++++++++++++-------------------
 tools/perf/util/annotate.h        | 19 ++++++++++-------
 3 files changed, 53 insertions(+), 42 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 2b18c462b882..bed647807d37 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -106,25 +106,29 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
 	struct annotate_browser *ab = container_of(browser, struct annotate_browser, b);
 	struct annotation *notes = browser__annotation(browser);
 	struct annotation_line *al = list_entry(entry, struct annotation_line, node);
-	bool current_entry = ui_browser__is_current_entry(browser, row);
-	bool change_color = (!notes->options->hide_src_code &&
-			     (!current_entry || (browser->use_navkeypressed &&
-					         !browser->navkeypressed)));
-	int width = browser->width;
+	struct annotation_write_ops ops = {
+		.first_line		 = row == 0,
+		.current_entry		 = ui_browser__is_current_entry(browser, row),
+		.change_color		 = (!notes->options->hide_src_code &&
+					    (!ops.current_entry ||
+					     (browser->use_navkeypressed &&
+					      !browser->navkeypressed))),
+		.width			 = browser->width,
+		.obj			 = browser,
+		.set_color		 = annotate_browser__set_color,
+		.set_percent_color	 = annotate_browser__set_percent_color,
+		.set_jumps_percent_color = ui_browser__set_jumps_percent_color,
+		.printf			 = annotate_browser__printf,
+		.write_graph		 = annotate_browser__write_graph,
+	};
 
 	/* The scroll bar isn't being used */
 	if (!browser->navkeypressed)
-		width += 1;
+		ops.width += 1;
 
-	annotation_line__write(al, notes, row == 0, current_entry, change_color,
-			       width, browser,
-			       annotate_browser__set_color,
-			       annotate_browser__set_percent_color,
-			       ui_browser__set_jumps_percent_color,
-			       annotate_browser__printf,
-			       annotate_browser__write_graph);
+	annotation_line__write(al, notes, &ops);
 
-	if (current_entry)
+	if (ops.current_entry)
 		ab->selection = al;
 }
 
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 45a52e2658c8..11ad73211538 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -34,10 +34,10 @@
  * FIXME: Using the same values as slang.h,
  * but that header may not be available everywhere
  */
-#define LARROW_CHAR	0x1B
-#define RARROW_CHAR	0x1A
-#define DARROW_CHAR	0x19
-#define UARROW_CHAR	0x18
+#define LARROW_CHAR	((unsigned char)',')
+#define RARROW_CHAR	((unsigned char)'+')
+#define DARROW_CHAR	((unsigned char)'.')
+#define UARROW_CHAR	((unsigned char)'-')
 
 #include "sane_ctype.h"
 
@@ -2210,12 +2210,6 @@ double annotation_line__max_percent(struct annotation_line *al, struct annotatio
 	return percent_max;
 }
 
-static void set_percent_color_stub(void *obj __maybe_unused,
-				   double percent __maybe_unused,
-				   bool current __maybe_unused)
-{
-}
-
 static void disasm_line__write(struct disasm_line *dl, struct annotation *notes,
 			       void *obj, char *bf, size_t size,
 			       void (*obj__printf)(void *obj, const char *fmt, ...),
@@ -2243,14 +2237,15 @@ static void disasm_line__write(struct disasm_line *dl, struct annotation *notes,
 	disasm_line__scnprintf(dl, bf, size, !notes->options->use_offset);
 }
 
-void annotation_line__write(struct annotation_line *al, struct annotation *notes,
-			    bool first_line, bool current_entry, bool change_color, int width,
-			    void *obj,
-			    int  (*obj__set_color)(void *obj, int color),
-			    void (*obj__set_percent_color)(void *obj, double percent, bool current),
-			    int  (*obj__set_jumps_percent_color)(void *obj, int nr, bool current),
-			    void (*obj__printf)(void *obj, const char *fmt, ...),
-			    void (*obj__write_graph)(void *obj, int graph))
+static void __annotation_line__write(struct annotation_line *al, struct annotation *notes,
+				     bool first_line, bool current_entry, bool change_color, int width,
+				     void *obj,
+				     int  (*obj__set_color)(void *obj, int color),
+				     void (*obj__set_percent_color)(void *obj, double percent, bool current),
+				     int  (*obj__set_jumps_percent_color)(void *obj, int nr, bool current),
+				     void (*obj__printf)(void *obj, const char *fmt, ...),
+				     void (*obj__write_graph)(void *obj, int graph))
+
 {
 	double percent_max = annotation_line__max_percent(al, notes);
 	int pcnt_width = annotation__pcnt_width(notes),
@@ -2267,9 +2262,6 @@ void annotation_line__write(struct annotation_line *al, struct annotation *notes
 			show_title = true;
 	}
 
-	if (!obj__set_percent_color)
-		obj__set_percent_color = set_percent_color_stub;
-
 	if (al->offset != -1 && percent_max != 0.0) {
 		int i;
 
@@ -2368,6 +2360,16 @@ void annotation_line__write(struct annotation_line *al, struct annotation *notes
 
 }
 
+void annotation_line__write(struct annotation_line *al, struct annotation *notes,
+			    struct annotation_write_ops *ops)
+{
+	__annotation_line__write(al, notes, ops->first_line, ops->current_entry,
+				 ops->change_color, ops->width, ops->obj,
+				 ops->set_color, ops->set_percent_color,
+				 ops->set_jumps_percent_color, ops->printf,
+				 ops->write_graph);
+}
+
 int symbol__annotate2(struct symbol *sym, struct map *map, struct perf_evsel *evsel,
 		      struct annotation_options *options, struct arch **parch)
 {
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 27fcdacbb497..6fbb34b9bd77 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -125,15 +125,20 @@ void disasm_line__free(struct disasm_line *dl);
 struct annotation_line *
 annotation_line__next(struct annotation_line *pos, struct list_head *head);
 
+struct annotation_write_ops {
+	bool first_line, current_entry, change_color;
+	int  width;
+	void *obj;
+	int  (*set_color)(void *obj, int color);
+	void (*set_percent_color)(void *obj, double percent, bool current);
+	int  (*set_jumps_percent_color)(void *obj, int nr, bool current);
+	void (*printf)(void *obj, const char *fmt, ...);
+	void (*write_graph)(void *obj, int graph);
+};
+
 double annotation_line__max_percent(struct annotation_line *al, struct annotation *notes);
 void annotation_line__write(struct annotation_line *al, struct annotation *notes,
-			    bool first_line, bool current_entry, bool change_color, int width,
-			    void *obj,
-			    int  (*obj__set_color)(void *obj, int color),
-			    void (*obj__set_percent_color)(void *obj, double percent, bool current),
-			    int  (*obj__set_jumps_percent_color)(void *obj, int nr, bool current),
-			    void (*obj__printf)(void *obj, const char *fmt, ...),
-			    void (*obj__write_graph)(void *obj, int graph));
+			    struct annotation_write_ops *ops);
 
 int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw);
 size_t disasm__fprintf(struct list_head *head, FILE *fp);
-- 
cgit v1.2.3


From 9b80d1f946ee40923f7bf51c69cb3a6ac6097e4a Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 16 Mar 2018 10:39:24 -0300
Subject: perf annotate: Introduce annotation_line__filter()

Out of the TUI logic that allows toggling the presentation of source
code lines.

Will be used in the upcoming --stdio2 mode.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-g0ckz9ajy6unswrv2iy39mxk@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/annotate.c | 19 +++++++------------
 tools/perf/util/annotate.h        |  4 ++++
 2 files changed, 11 insertions(+), 12 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index bed647807d37..74a26f4e9b06 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -48,14 +48,8 @@ static inline struct annotation *browser__annotation(struct ui_browser *browser)
 static bool disasm_line__filter(struct ui_browser *browser, void *entry)
 {
 	struct annotation *notes = browser__annotation(browser);
-
-	if (notes->options->hide_src_code) {
-		struct annotation_line *al = list_entry(entry, struct annotation_line, node);
-
-		return al->offset == -1;
-	}
-
-	return false;
+	struct annotation_line *al = list_entry(entry, struct annotation_line, node);
+	return annotation_line__filter(al, notes);
 }
 
 static int ui_browser__jumps_percent_color(struct ui_browser *browser, int nr, bool current)
@@ -268,6 +262,7 @@ static void disasm_rb_tree__insert(struct rb_root *root, struct annotation_line
 static void annotate_browser__set_top(struct annotate_browser *browser,
 				      struct annotation_line *pos, u32 idx)
 {
+	struct annotation *notes = browser__annotation(&browser->b);
 	unsigned back;
 
 	ui_browser__refresh_dimensions(&browser->b);
@@ -277,7 +272,7 @@ static void annotate_browser__set_top(struct annotate_browser *browser,
 	while (browser->b.top_idx != 0 && back != 0) {
 		pos = list_entry(pos->node.prev, struct annotation_line, node);
 
-		if (disasm_line__filter(&browser->b, &pos->node))
+		if (annotation_line__filter(pos, notes))
 			continue;
 
 		--browser->b.top_idx;
@@ -440,7 +435,7 @@ struct disasm_line *annotate_browser__find_offset(struct annotate_browser *brows
 	list_for_each_entry(pos, &notes->src->source, al.node) {
 		if (pos->al.offset == offset)
 			return pos;
-		if (!disasm_line__filter(&browser->b, &pos->al.node))
+		if (!annotation_line__filter(&pos->al, notes))
 			++*idx;
 	}
 
@@ -477,7 +472,7 @@ struct annotation_line *annotate_browser__find_string(struct annotate_browser *b
 
 	*idx = browser->b.index;
 	list_for_each_entry_continue(al, &notes->src->source, node) {
-		if (disasm_line__filter(&browser->b, &al->node))
+		if (annotation_line__filter(al, notes))
 			continue;
 
 		++*idx;
@@ -514,7 +509,7 @@ struct annotation_line *annotate_browser__find_string_reverse(struct annotate_br
 
 	*idx = browser->b.index;
 	list_for_each_entry_continue_reverse(al, &notes->src->source, node) {
-		if (disasm_line__filter(&browser->b, &al->node))
+		if (annotation_line__filter(al, notes))
 			continue;
 
 		--*idx;
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 6fbb34b9bd77..165845de1243 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -215,6 +215,10 @@ static inline int annotation__pcnt_width(struct annotation *notes)
 	return (notes->options->show_total_period ? 12 : 7) * notes->nr_events;
 }
 
+static inline bool annotation_line__filter(struct annotation_line *al, struct annotation *notes)
+{
+	return notes->options->hide_src_code && al->offset == -1;
+}
 
 void annotation__set_offsets(struct annotation *notes, s64 size);
 void annotation__compute_ipc(struct annotation *notes, size_t size);
-- 
cgit v1.2.3


From befd2a38a632b1f27ad652fea67c8cf97ce59409 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 15 Mar 2018 23:44:34 -0300
Subject: perf annotate: Introduce the --stdio2 output mode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This uses the TUI augmented formatting routines, modulo interactivity.

  # perf annotate --ignore-vmlinux --stdio2 _raw_spin_lock_irqsave
  _raw_spin_lock_irqsave() /proc/kcore
  Event: cycles:ppp

  Percent

              Disassembly of section load0:

              ffffffff9a8734b0 <load0>:
                nop
                push   %rbx
   50.00        pushfq
                pop    %rax
                nop
                mov    %rax,%rbx
                cli
                nop
                xor    %eax,%eax
                mov    $0x1,%edx
   50.00        lock   cmpxchg %edx,(%rdi)
                test   %eax,%eax
              ↓ jne    2b
                mov    %rbx,%rax
                pop    %rbx
              ← retq
          2b:   mov    %eax,%esi
              → callq  queued_spin_lock_slowpath
                mov    %rbx,%rax
                pop    %rbx
              ← retq

Tested-by: Jin Yao <yao.jin@linux.intel.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-6cte5o8z84mbivbvqlg14uh1@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-annotate.txt |  2 +
 tools/perf/builtin-annotate.c              | 23 +++++---
 tools/perf/util/annotate.c                 | 92 ++++++++++++++++++++++++++++++
 tools/perf/util/annotate.h                 |  5 ++
 4 files changed, 115 insertions(+), 7 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt
index 292809c3c0ca..c29c7fc93023 100644
--- a/tools/perf/Documentation/perf-annotate.txt
+++ b/tools/perf/Documentation/perf-annotate.txt
@@ -69,6 +69,8 @@ OPTIONS
 
 --stdio:: Use the stdio interface.
 
+--stdio2:: Use the stdio2 interface, non-interactive, uses the TUI formatting.
+
 --stdio-color=<mode>::
 	'always', 'never' or 'auto', allowing configuring color output
 	via the command line, in addition to via "color.ui" .perfconfig.
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index ead6ae4549e5..e03f9bea9303 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -40,7 +40,7 @@
 struct perf_annotate {
 	struct perf_tool tool;
 	struct perf_session *session;
-	bool	   use_tui, use_stdio, use_gtk;
+	bool	   use_tui, use_stdio, use_stdio2, use_gtk;
 	bool	   full_paths;
 	bool	   print_line;
 	bool	   skip_missing;
@@ -202,6 +202,11 @@ static int process_branch_callback(struct perf_evsel *evsel,
 	return ret;
 }
 
+static bool has_annotation(struct perf_annotate *ann)
+{
+	return ui__has_annotation() || ann->use_stdio2;
+}
+
 static int perf_evsel__add_sample(struct perf_evsel *evsel,
 				  struct perf_sample *sample,
 				  struct addr_location *al,
@@ -212,7 +217,7 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel,
 	struct hist_entry *he;
 	int ret;
 
-	if ((!ann->has_br_stack || !ui__has_annotation()) &&
+	if ((!ann->has_br_stack || !has_annotation(ann)) &&
 	    ann->sym_hist_filter != NULL &&
 	    (al->sym == NULL ||
 	     strcmp(ann->sym_hist_filter, al->sym->name) != 0)) {
@@ -236,7 +241,7 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel,
 	 */
 	process_branch_stack(sample->branch_stack, al, sample);
 
-	if (ann->has_br_stack && ui__has_annotation())
+	if (ann->has_br_stack && has_annotation(ann))
 		return process_branch_callback(evsel, sample, al, ann, machine);
 
 	he = hists__add_entry(hists, al, NULL, NULL, NULL, sample, true);
@@ -282,8 +287,11 @@ static int hist_entry__tty_annotate(struct hist_entry *he,
 				    struct perf_evsel *evsel,
 				    struct perf_annotate *ann)
 {
-	return symbol__tty_annotate(he->ms.sym, he->ms.map, evsel,
-				    ann->print_line, ann->full_paths, 0, 0);
+	if (!ann->use_stdio2)
+		return symbol__tty_annotate(he->ms.sym, he->ms.map, evsel,
+					    ann->print_line, ann->full_paths, 0, 0);
+	return symbol__tty_annotate2(he->ms.sym, he->ms.map, evsel,
+				     ann->print_line, ann->full_paths);
 }
 
 static void hists__find_annotations(struct hists *hists,
@@ -487,6 +495,7 @@ int cmd_annotate(int argc, const char **argv)
 	OPT_BOOLEAN(0, "gtk", &annotate.use_gtk, "Use the GTK interface"),
 	OPT_BOOLEAN(0, "tui", &annotate.use_tui, "Use the TUI interface"),
 	OPT_BOOLEAN(0, "stdio", &annotate.use_stdio, "Use the stdio interface"),
+	OPT_BOOLEAN(0, "stdio2", &annotate.use_stdio2, "Use the stdio interface"),
 	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
 		   "file", "vmlinux pathname"),
 	OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
@@ -569,7 +578,7 @@ int cmd_annotate(int argc, const char **argv)
 	if (ret < 0)
 		goto out_delete;
 
-	if (annotate.use_stdio)
+	if (annotate.use_stdio || annotate.use_stdio2)
 		use_browser = 0;
 	else if (annotate.use_tui)
 		use_browser = 1;
@@ -578,7 +587,7 @@ int cmd_annotate(int argc, const char **argv)
 
 	setup_browser(true);
 
-	if (use_browser == 1 && annotate.has_br_stack) {
+	if ((use_browser == 1 || annotate.use_stdio2) && annotate.has_br_stack) {
 		sort__mode = SORT_MODE__BRANCH;
 		if (setup_sorting(annotate.session->evlist) < 0)
 			usage_with_options(annotate_usage, options);
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 11ad73211538..98cf3e5380bc 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -1965,6 +1965,72 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map,
 	return more;
 }
 
+static void FILE__set_percent_color(void *fp __maybe_unused,
+				    double percent __maybe_unused,
+				    bool current __maybe_unused)
+{
+}
+
+static int FILE__set_jumps_percent_color(void *fp __maybe_unused,
+					 int nr __maybe_unused, bool current __maybe_unused)
+{
+	return 0;
+}
+
+static int FILE__set_color(void *fp __maybe_unused, int color __maybe_unused)
+{
+	return 0;
+}
+
+static void FILE__printf(void *fp, const char *fmt, ...)
+{
+	va_list args;
+
+	va_start(args, fmt);
+	vfprintf(fp, fmt, args);
+	va_end(args);
+}
+
+static void FILE__write_graph(void *fp, int graph)
+{
+	const char *s;
+	switch (graph) {
+
+	case DARROW_CHAR: s = "↓"; break;
+	case UARROW_CHAR: s = "↑"; break;
+	case LARROW_CHAR: s = "←"; break;
+	case RARROW_CHAR: s = "→"; break;
+	default:		s = "?"; break;
+	}
+
+	fputs(s, fp);
+}
+
+int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp)
+{
+	struct annotation *notes = symbol__annotation(sym);
+	struct annotation_write_ops ops = {
+		.first_line		 = true,
+		.obj			 = fp,
+		.set_color		 = FILE__set_color,
+		.set_percent_color	 = FILE__set_percent_color,
+		.set_jumps_percent_color = FILE__set_jumps_percent_color,
+		.printf			 = FILE__printf,
+		.write_graph		 = FILE__write_graph,
+	};
+	struct annotation_line *al;
+
+	list_for_each_entry(al, &notes->src->source, node) {
+		if (annotation_line__filter(al, notes))
+			continue;
+		annotation_line__write(al, notes, &ops);
+		fputc('\n', fp);
+		ops.first_line = false;
+	}
+
+	return 0;
+}
+
 void symbol__annotate_zero_histogram(struct symbol *sym, int evidx)
 {
 	struct annotation *notes = symbol__annotation(sym);
@@ -2165,6 +2231,32 @@ static void symbol__calc_lines(struct symbol *sym, struct map *map,
 	annotation__calc_lines(notes, map, root, start);
 }
 
+int symbol__tty_annotate2(struct symbol *sym, struct map *map,
+			  struct perf_evsel *evsel, bool print_lines,
+			  bool full_paths)
+{
+	struct dso *dso = map->dso;
+	struct rb_root source_line = RB_ROOT;
+	struct annotation_options opts = {
+		.use_offset	= true,
+	};
+
+	if (symbol__annotate2(sym, map, evsel, &opts, NULL) < 0)
+		return -1;
+
+	if (print_lines) {
+		srcline_full_filename = full_paths;
+		symbol__calc_lines(sym, map, &source_line);
+		print_summary(&source_line, dso->long_name);
+	}
+
+	symbol__annotate_fprintf2(sym, stdout);
+
+	annotated_source__purge(symbol__annotation(sym)->src);
+
+	return 0;
+}
+
 int symbol__tty_annotate(struct symbol *sym, struct map *map,
 			 struct perf_evsel *evsel, bool print_lines,
 			 bool full_paths, int min_pcnt, int max_lines)
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 165845de1243..cf32cbc87930 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -281,6 +281,7 @@ int symbol__strerror_disassemble(struct symbol *sym, struct map *map,
 int symbol__annotate_printf(struct symbol *sym, struct map *map,
 			    struct perf_evsel *evsel, bool full_paths,
 			    int min_pcnt, int max_lines, int context);
+int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp);
 void symbol__annotate_zero_histogram(struct symbol *sym, int evidx);
 void symbol__annotate_decay_histogram(struct symbol *sym, int evidx);
 void annotated_source__purge(struct annotated_source *as);
@@ -291,6 +292,10 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map,
 			 struct perf_evsel *evsel, bool print_lines,
 			 bool full_paths, int min_pcnt, int max_lines);
 
+int symbol__tty_annotate2(struct symbol *sym, struct map *map,
+			  struct perf_evsel *evsel, bool print_lines,
+			  bool full_paths);
+
 #ifdef HAVE_SLANG_SUPPORT
 int symbol__tui_annotate(struct symbol *sym, struct map *map,
 			 struct perf_evsel *evsel,
-- 
cgit v1.2.3


From 7f0b6fde3111aec82487662ccef5a4ebecb93381 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 16 Mar 2018 14:33:38 -0300
Subject: perf annotate: Move the default annotate options to the library

One more thing that goes from the TUI code to be used more widely,
for instance it'll affect the default options used by:

  perf annotate --stdio2

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-0nsz0dm0akdbo30vgja2a10e@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-annotate.c     |  2 ++
 tools/perf/builtin-report.c       |  1 +
 tools/perf/builtin-top.c          |  2 ++
 tools/perf/ui/browser.c           |  2 --
 tools/perf/ui/browser.h           |  1 -
 tools/perf/ui/browsers/annotate.c | 67 +--------------------------------------
 tools/perf/util/annotate.c        | 62 ++++++++++++++++++++++++++++++++++++
 tools/perf/util/annotate.h        |  4 +++
 8 files changed, 72 insertions(+), 69 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index e03f9bea9303..fd5aac3fd949 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -572,6 +572,8 @@ int cmd_annotate(int argc, const char **argv)
 	if (ret < 0)
 		goto out_delete;
 
+	annotation_config__init();
+
 	symbol_conf.try_vmlinux_path = true;
 
 	ret = symbol__init(&annotate.session->header.env);
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 91da12975642..1a82f38671a8 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -1340,6 +1340,7 @@ repeat:
 			symbol_conf.priv_size += sizeof(u32);
 			symbol_conf.sort_by_name = true;
 		}
+		annotation_config__init();
 	}
 
 	if (symbol__init(&session->header.env) < 0)
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 113c298ed38b..f39bd60d2708 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1493,6 +1493,8 @@ int cmd_top(int argc, const char **argv)
 	if (status < 0)
 		goto out_delete_evlist;
 
+	annotation_config__init();
+
 	symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
 	if (symbol__init(NULL) < 0)
 		return -1;
diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c
index 33c30325885f..9f6ce29b83b4 100644
--- a/tools/perf/ui/browser.c
+++ b/tools/perf/ui/browser.c
@@ -784,6 +784,4 @@ void ui_browser__init(void)
 		struct ui_browser_colorset *c = &ui_browser__colorsets[i++];
 		sltt_set_color(c->colorset, c->name, c->fg, c->bg);
 	}
-
-	annotate_browser__init();
 }
diff --git a/tools/perf/ui/browser.h b/tools/perf/ui/browser.h
index 9e69c6a43514..70057178ee34 100644
--- a/tools/perf/ui/browser.h
+++ b/tools/perf/ui/browser.h
@@ -79,5 +79,4 @@ void ui_browser__list_head_seek(struct ui_browser *browser, off_t offset, int wh
 unsigned int ui_browser__list_head_refresh(struct ui_browser *browser);
 
 void ui_browser__init(void);
-void annotate_browser__init(void);
 #endif /* _PERF_UI_BROWSER_H_ */
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 74a26f4e9b06..916f237c1df8 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -9,7 +9,6 @@
 #include "../../util/sort.h"
 #include "../../util/symbol.h"
 #include "../../util/evsel.h"
-#include "../../util/config.h"
 #include "../../util/evlist.h"
 #include <inttypes.h>
 #include <pthread.h>
@@ -22,11 +21,6 @@ struct disasm_line_samples {
 	struct sym_hist_entry he;
 };
 
-static struct annotation_options annotate_browser__opts = {
-	.use_offset	= true,
-	.jump_arrows	= true,
-};
-
 struct arch;
 
 struct annotate_browser {
@@ -773,12 +767,6 @@ out:
 int map_symbol__tui_annotate(struct map_symbol *ms, struct perf_evsel *evsel,
 			     struct hist_browser_timer *hbt)
 {
-	/* Set default value for show_total_period and show_nr_samples  */
-	annotate_browser__opts.show_total_period =
-		symbol_conf.show_total_period;
-	annotate_browser__opts.show_nr_samples =
-		symbol_conf.show_nr_samples;
-
 	return symbol__tui_annotate(ms->sym, ms->map, evsel, hbt);
 }
 
@@ -819,7 +807,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
 	if (map->dso->annotate_warned)
 		return -1;
 
-	err = symbol__annotate2(sym, map, evsel, &annotate_browser__opts, &browser.arch);
+	err = symbol__annotate2(sym, map, evsel, &annotation__default_options, &browser.arch);
 	if (err) {
 		char msg[BUFSIZ];
 		symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg));
@@ -845,56 +833,3 @@ out_free_offsets:
 	zfree(&notes->offsets);
 	return ret;
 }
-
-#define ANNOTATE_CFG(n) \
-	{ .name = #n, .value = &annotate_browser__opts.n, }
-
-/*
- * Keep the entries sorted, they are bsearch'ed
- */
-static struct annotate_config {
-	const char *name;
-	bool *value;
-} annotate__configs[] = {
-	ANNOTATE_CFG(hide_src_code),
-	ANNOTATE_CFG(jump_arrows),
-	ANNOTATE_CFG(show_linenr),
-	ANNOTATE_CFG(show_nr_jumps),
-	ANNOTATE_CFG(show_nr_samples),
-	ANNOTATE_CFG(show_total_period),
-	ANNOTATE_CFG(use_offset),
-};
-
-#undef ANNOTATE_CFG
-
-static int annotate_config__cmp(const void *name, const void *cfgp)
-{
-	const struct annotate_config *cfg = cfgp;
-
-	return strcmp(name, cfg->name);
-}
-
-static int annotate__config(const char *var, const char *value,
-			    void *data __maybe_unused)
-{
-	struct annotate_config *cfg;
-	const char *name;
-
-	if (!strstarts(var, "annotate."))
-		return 0;
-
-	name = var + 9;
-	cfg = bsearch(name, annotate__configs, ARRAY_SIZE(annotate__configs),
-		      sizeof(struct annotate_config), annotate_config__cmp);
-
-	if (cfg == NULL)
-		ui__warning("%s variable unknown, ignoring...", var);
-	else
-		*cfg->value = perf_config_bool(name, value);
-	return 0;
-}
-
-void annotate_browser__init(void)
-{
-	perf_config(annotate__config, NULL);
-}
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 98cf3e5380bc..cfa641bc1df6 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -14,6 +14,7 @@
 #include "sort.h"
 #include "build-id.h"
 #include "color.h"
+#include "config.h"
 #include "cache.h"
 #include "symbol.h"
 #include "debug.h"
@@ -41,6 +42,11 @@
 
 #include "sane_ctype.h"
 
+struct annotation_options annotation__default_options = {
+	.use_offset     = true,
+	.jump_arrows    = true,
+};
+
 const char 	*disassembler_style;
 const char	*objdump_path;
 static regex_t	 file_lineno;
@@ -2500,3 +2506,59 @@ out_free_offsets:
 	zfree(&notes->offsets);
 	return -1;
 }
+
+#define ANNOTATION__CFG(n) \
+	{ .name = #n, .value = &annotation__default_options.n, }
+
+/*
+ * Keep the entries sorted, they are bsearch'ed
+ */
+static struct annotation_config {
+	const char *name;
+	bool *value;
+} annotation__configs[] = {
+	ANNOTATION__CFG(hide_src_code),
+	ANNOTATION__CFG(jump_arrows),
+	ANNOTATION__CFG(show_linenr),
+	ANNOTATION__CFG(show_nr_jumps),
+	ANNOTATION__CFG(show_nr_samples),
+	ANNOTATION__CFG(show_total_period),
+	ANNOTATION__CFG(use_offset),
+};
+
+#undef ANNOTATION__CFG
+
+static int annotation_config__cmp(const void *name, const void *cfgp)
+{
+	const struct annotation_config *cfg = cfgp;
+
+	return strcmp(name, cfg->name);
+}
+
+static int annotation__config(const char *var, const char *value,
+			    void *data __maybe_unused)
+{
+	struct annotation_config *cfg;
+	const char *name;
+
+	if (!strstarts(var, "annotate."))
+		return 0;
+
+	name = var + 9;
+	cfg = bsearch(name, annotation__configs, ARRAY_SIZE(annotation__configs),
+		      sizeof(struct annotation_config), annotation_config__cmp);
+
+	if (cfg == NULL)
+		pr_debug("%s variable unknown, ignoring...", var);
+	else
+		*cfg->value = perf_config_bool(name, value);
+	return 0;
+}
+
+void annotation_config__init(void)
+{
+	perf_config(annotation__config, NULL);
+
+	annotation__default_options.show_total_period = symbol_conf.show_total_period;
+	annotation__default_options.show_nr_samples   = symbol_conf.show_nr_samples;
+}
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index cf32cbc87930..3faa58045b22 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -71,6 +71,8 @@ struct annotation_options {
 	     show_total_period;
 };
 
+extern struct annotation_options annotation__default_options;
+
 struct annotation;
 
 struct sym_hist_entry {
@@ -313,4 +315,6 @@ static inline int symbol__tui_annotate(struct symbol *sym __maybe_unused,
 
 extern const char	*disassembler_style;
 
+void annotation_config__init(void);
+
 #endif	/* __PERF_ANNOTATE_H */
-- 
cgit v1.2.3


From 3563289208ecef339853692ecbf8690084744b53 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 16 Mar 2018 14:37:33 -0300
Subject: perf annotate: Use the default annotation options for --stdio2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With an empty '[annotate]' section in ~/.perfconfig:

  # perf record -a --all-kernel -e '{cycles,instructions}:P' sleep 5
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 2.243 MB perf.data (5513 samples) ]
  # perf annotate --stdio2 _raw_spin_lock | head -20

                     Disassembly of section .text:

                     ffffffff81868790 <_raw_spin_lock>:
                     _raw_spin_lock():
                     EXPORT_SYMBOL(_raw_spin_trylock_bh);
                     #endif

                     #ifndef CONFIG_INLINE_SPIN_LOCK
                     void __lockfunc _raw_spin_lock(raw_spinlock_t *lock)
                     {
                     → callq  __fentry__
                     atomic_cmpxchg():
                             return xadd(&v->counter, -i);
                     }

                     static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new)
                     {
  # perf annotate --stdio2 _raw_spin_lock | head -20
                     → callq  __fentry__
                       xor    %eax,%eax
                       mov    $0x1,%edx
   87.50 100.00        lock   cmpxchg %edx,(%rdi)
    6.25   0.00        test   %eax,%eax
                     ↓ jne    16
    6.25   0.00        repz   retq
                 16:   mov    %eax,%esi
                     ↑ jmpq   ffffffff810e96b0 <queued_spin_lock_slowpath>
  #
  # cat ~/.perfconfig
  [annotate]

    hide_src_code = false
    show_linenr = true
  # perf annotate --stdio2 _raw_spin_lock | head -20

                 3   Disassembly of section .text:

                 5   ffffffff81868790 <_raw_spin_lock>:
                 6   _raw_spin_lock():
                 143 EXPORT_SYMBOL(_raw_spin_trylock_bh);
                 144 #endif

                 146 #ifndef CONFIG_INLINE_SPIN_LOCK
                 147 void __lockfunc _raw_spin_lock(raw_spinlock_t *lock)
                 148 {
                     → callq  __fentry__
                 150 atomic_cmpxchg():
                 187         return xadd(&v->counter, -i);
                 188 }

                 190 static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new)
                 191 {
  #
  # cat ~/.perfconfig
  [annotate]

    hide_src_code = true
    show_total_period = true
  # perf annotate --stdio2 _raw_spin_lock | head -20
                               → callq  __fentry__
                                 xor    %eax,%eax
                                 mov    $0x1,%edx
      1411316      152339        lock   cmpxchg %edx,(%rdi)
       344694           0        test   %eax,%eax
                               ↓ jne    16
        80806           0        repz   retq
                           16:   mov    %eax,%esi
                               ↑ jmpq   ffffffff810e96b0 <queued_spin_lock_slowpath>
  #

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-nu4rxg5zkdtgs1b2gc40p7v7@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/annotate.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index cfa641bc1df6..ea83b9754ab0 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -2243,9 +2243,7 @@ int symbol__tty_annotate2(struct symbol *sym, struct map *map,
 {
 	struct dso *dso = map->dso;
 	struct rb_root source_line = RB_ROOT;
-	struct annotation_options opts = {
-		.use_offset	= true,
-	};
+	struct annotation_options opts = annotation__default_options;
 
 	if (symbol__annotate2(sym, map, evsel, &opts, NULL) < 0)
 		return -1;
-- 
cgit v1.2.3


From 864298f224f20fb7b981b05dd0f77315c75eb189 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 16 Mar 2018 15:17:23 -0300
Subject: perf annotate: Add function header to --stdio2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

  # perf annotate --stdio2 _raw_spin_lock_irqsave
  _raw_spin_lock_irqsave() /lib/modules/4.16.0-rc4/build/vmlinux
  Event: anon group { cycles, instructions }

    0.00   3.17      → callq  __fentry__
    0.00   7.94        push   %rbx
    7.69  36.51      → callq  __page_file_index
                       mov    %rax,%rbx
    7.69   3.17      → callq  *ffffffff82225cd0
                       xor    %eax,%eax
                       mov    $0x1,%edx
   80.77  49.21        lock   cmpxchg %edx,(%rdi)
                       test   %eax,%eax
                     ↓ jne    2b
    3.85   0.00        mov    %rbx,%rax
                       pop    %rbx
                     ← retq
                 2b:   mov    %eax,%esi
                     → callq  queued_spin_lock_slowpath
                       mov    %rbx,%rax
                       pop    %rbx
                     ← retq
  #

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-i86yfyzl8m194ioxgj1jo32f@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/annotate.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index ea83b9754ab0..7a6a85f9fea6 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -2244,6 +2244,8 @@ int symbol__tty_annotate2(struct symbol *sym, struct map *map,
 	struct dso *dso = map->dso;
 	struct rb_root source_line = RB_ROOT;
 	struct annotation_options opts = annotation__default_options;
+	const char *ev_name = perf_evsel__name(evsel);
+	char buf[1024];
 
 	if (symbol__annotate2(sym, map, evsel, &opts, NULL) < 0)
 		return -1;
@@ -2254,6 +2256,12 @@ int symbol__tty_annotate2(struct symbol *sym, struct map *map,
 		print_summary(&source_line, dso->long_name);
 	}
 
+	if (perf_evsel__is_group_event(evsel)) {
+		perf_evsel__group_desc(evsel, buf, sizeof(buf));
+		ev_name = buf;
+	}
+
+	fprintf(stdout, "%s() %s\nEvent: %s\n\n", sym->name, dso->long_name, ev_name);
 	symbol__annotate_fprintf2(sym, stdout);
 
 	annotated_source__purge(symbol__annotation(sym)->src);
-- 
cgit v1.2.3


From be316409e9819423fc965e11486e6631734520ba Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 16 Mar 2018 15:58:35 -0300
Subject: perf annotate: Introduce --ignore-vmlinux command line option
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is already present in 'perf top', albeit undocumented (will fix),
and is useful to use /proc/kcore instead of vmlinux and then get what is
really in place, not what the kernel starts with, before alternatives,
ftrace .text patching, etc, see the differences:

  # perf annotate --stdio2 _raw_spin_lock_irqsave
  _raw_spin_lock_irqsave() /lib/modules/4.16.0-rc4/build/vmlinux
  Event: anon group { cycles, instructions }

    0.00   3.17      → callq  __fentry__
    0.00   7.94        push   %rbx
    7.69  36.51      → callq  __page_file_index
                       mov    %rax,%rbx
    7.69   3.17      → callq  *ffffffff82225cd0
                       xor    %eax,%eax
                       mov    $0x1,%edx
   80.77  49.21        lock   cmpxchg %edx,(%rdi)
                       test   %eax,%eax
                     ↓ jne    2b
    3.85   0.00        mov    %rbx,%rax
                       pop    %rbx
                     ← retq
                 2b:   mov    %eax,%esi
                     → callq  queued_spin_lock_slowpath
                       mov    %rbx,%rax
                       pop    %rbx
                     ← retq
  [root@jouet ~]# perf annotate --ignore-vmlinux --stdio2 _raw_spin_lock_irqsave
  _raw_spin_lock_irqsave() /proc/kcore
  Event: anon group { cycles, instructions }

    0.00   3.17        nop
    0.00   7.94        push   %rbx
    0.00  23.81        pushfq
    7.69  12.70        pop    %rax
                       nop
                       mov    %rax,%rbx
    7.69   3.17        cli
                       nop
                       xor    %eax,%eax
                       mov    $0x1,%edx
   80.77  49.21        lock   cmpxchg %edx,(%rdi)
                       test   %eax,%eax
                     ↓ jne    2b
    3.85   0.00        mov    %rbx,%rax
                       pop    %rbx
                     ← retq
                 2b:   mov    %eax,%esi
                     → callq  *ffffffff820e96b0
                       mov    %rbx,%rax
                       pop    %rbx
                     ← retq
  #

Diff of the output of those commands:

  # perf annotate --stdio2 _raw_spin_lock_irqsave > /tmp/vmlinux
  # perf annotate --ignore-vmlinux --stdio2 _raw_spin_lock_irqsave > /tmp/kcore
  # diff -y /tmp/vmlinux /tmp/kcore
  _raw_spin_lock_irqsave() vmlinux             | _raw_spin_lock_irqsave() /proc/kcore
  Event: anon group { cycles, instructions }     Event: anon group { cycles, instructions }

   0.00  3.17  → callq __fentry__              |  0.00  3.17     nop
   0.00  7.94    push  %rbx                       0.00  7.94     push  %rbx
   7.69 36.51  → callq __page_file_index       |  0.00 23.81     pushfq
                                               >  7.69 12.70     pop   %rax
                                               >                 nop
                 mov   %rax,%rbx                                 mov   %rax,%rbx
   7.69  3.17  → callq *ffffffff82225cd0       |  7.69  3.17     cli
                                               >                 nop
                 xor   %eax,%eax                                 xor   %eax,%eax
                 mov   $0x1,%edx                                 mov   $0x1,%edx
  80.77 49.21    lock  cmpxchg %edx,(%rdi)       80.77 49.21     lock  cmpxchg %edx,(%rdi)
                 test  %eax,%eax                                 test  %eax,%eax
               ↓ jne   2b                                      ↓ jne   2b
   3.85  0.00    mov   %rbx,%rax                  3.85  0.00     mov   %rbx,%rax
                 pop   %rbx                                      pop   %rbx
               ← retq                                          ← retq
            2b:  mov   %eax,%esi                            2b:  mov   %eax,%esi
               → callq queued_spin_lock_slowpath|              → callq *ffffffff820e96b0
                 mov   %rbx,%rax                                 mov   %rbx,%rax
                 pop   %rbx                                      pop   %rbx
               ← retq                                          ← retq
  #

This should be further streamlined by doing both annotations and
allowing the TUI to toggle initial/current, and show the patched
instructions in a slightly different color.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-wz8d269hxkcwaczr0r4rhyjg@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-annotate.txt | 3 +++
 tools/perf/builtin-annotate.c              | 2 ++
 2 files changed, 5 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt
index c29c7fc93023..749cc6055dac 100644
--- a/tools/perf/Documentation/perf-annotate.txt
+++ b/tools/perf/Documentation/perf-annotate.txt
@@ -55,6 +55,9 @@ OPTIONS
 --vmlinux=<file>::
         vmlinux pathname.
 
+--ignore-vmlinux::
+	Ignore vmlinux files.
+
 -m::
 --modules::
         Load module symbols. WARNING: use only with -k and LIVE kernel.
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index fd5aac3fd949..51709a961496 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -496,6 +496,8 @@ int cmd_annotate(int argc, const char **argv)
 	OPT_BOOLEAN(0, "tui", &annotate.use_tui, "Use the TUI interface"),
 	OPT_BOOLEAN(0, "stdio", &annotate.use_stdio, "Use the stdio interface"),
 	OPT_BOOLEAN(0, "stdio2", &annotate.use_stdio2, "Use the stdio interface"),
+	OPT_BOOLEAN(0, "ignore-vmlinux", &symbol_conf.ignore_vmlinux,
+                    "don't load vmlinux even if found"),
 	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
 		   "file", "vmlinux pathname"),
 	OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
-- 
cgit v1.2.3


From 91340c5184f316d687d4522b9aa41b56d58a49b0 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 16 Mar 2018 16:27:04 -0300
Subject: perf report: Introduce --ignore-vmlinux command line option

We've had this in 'perf top' for quite a while, useful if one wishes
to force using /proc/kcore to do annotation using the patched kernel
instead of the ELF image it started from, aka vmlinux.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-ircpvox4wzsv7gasrpb28fw9@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-report.txt | 3 +++
 tools/perf/builtin-report.c              | 2 ++
 2 files changed, 5 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index cba16d8a970e..e1a660e60849 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -296,6 +296,9 @@ OPTIONS
 --vmlinux=<file>::
         vmlinux pathname
 
+--ignore-vmlinux::
+	Ignore vmlinux files.
+
 --kallsyms=<file>::
         kallsyms pathname
 
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 1a82f38671a8..0f198f6d9b77 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -1018,6 +1018,8 @@ int cmd_report(int argc, const char **argv)
 	OPT_BOOLEAN(0, "mmaps", &report.mmaps_mode, "Display recorded tasks memory maps"),
 	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
 		   "file", "vmlinux pathname"),
+	OPT_BOOLEAN(0, "ignore-vmlinux", &symbol_conf.ignore_vmlinux,
+                    "don't load vmlinux even if found"),
 	OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
 		   "file", "kallsyms pathname"),
 	OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"),
-- 
cgit v1.2.3


From d9bd766584491dbb6f96c85a27562eb1289b2ca9 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 16 Mar 2018 16:57:47 -0300
Subject: perf annotate browser: Add 'P' hotkey to dump annotation to file
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Just like we have in the histograms browser used as the main screen for
'perf top --tui' and 'perf report --tui', to print the current
annotation to a file with a named composed by the symbol name and the
".annotation" suffix.

Here is one example of pressing 'A' on 'perf top' to live annotate a
kernel function and then press 'P' to dump that annotation, the
resulting file:

  # cat _raw_spin_lock_irqsave.annotation
  _raw_spin_lock_irqsave() /proc/kcore
  Event: cycles:ppp

    7.14        nop
   21.43        push   %rbx
    7.14        pushfq
                pop    %rax
                nop
                mov    %rax,%rbx
                cli
                nop
                xor    %eax,%eax
                mov    $0x1,%edx
   64.29        lock   cmpxchg %edx,(%rdi)
                test   %eax,%eax
              ↓ jne    2b
                mov    %rbx,%rax
                pop    %rbx
              ← retq
          2b:   mov    %eax,%esi
              → callq  queued_spin_lock_slowpath
                mov    %rbx,%rax
                pop    %rbx
              ← retq
  #

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-zzmnrwugb5vtk7bvg0rbx150@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/annotate.c |  4 ++++
 tools/perf/util/annotate.c        | 31 +++++++++++++++++++++++++++++++
 tools/perf/util/annotate.h        |  2 ++
 3 files changed, 37 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 916f237c1df8..3834b264ba41 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -661,6 +661,7 @@ static int annotate_browser__run(struct annotate_browser *browser,
 		"t             Circulate percent, total period, samples view\n"
 		"/             Search string\n"
 		"k             Toggle line numbers\n"
+		"P             Print to [symbol_name].annotation file.\n"
 		"r             Run available scripts\n"
 		"?             Search string backwards\n");
 			continue;
@@ -737,6 +738,9 @@ show_sup_ins:
 			}
 			continue;
 		}
+		case 'P':
+			map_symbol__annotation_dump(ms, evsel);
+			continue;
 		case 't':
 			if (notes->options->show_total_period) {
 				notes->options->show_total_period = false;
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 7a6a85f9fea6..a7111871440e 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -2037,6 +2037,37 @@ int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp)
 	return 0;
 }
 
+int map_symbol__annotation_dump(struct map_symbol *ms, struct perf_evsel *evsel)
+{
+	const char *ev_name = perf_evsel__name(evsel);
+	char buf[1024];
+	char *filename;
+	int err = -1;
+	FILE *fp;
+
+	if (asprintf(&filename, "%s.annotation", ms->sym->name) < 0)
+		return -1;
+
+	fp = fopen(filename, "w");
+	if (fp == NULL)
+		goto out_free_filename;
+
+	if (perf_evsel__is_group_event(evsel)) {
+		perf_evsel__group_desc(evsel, buf, sizeof(buf));
+		ev_name = buf;
+	}
+
+	fprintf(fp, "%s() %s\nEvent: %s\n\n",
+		ms->sym->name, ms->map->dso->long_name, ev_name);
+	symbol__annotate_fprintf2(ms->sym, fp);
+
+	fclose(fp);
+	err = 0;
+out_free_filename:
+	free(filename);
+	return err;
+}
+
 void symbol__annotate_zero_histogram(struct symbol *sym, int evidx)
 {
 	struct annotation *notes = symbol__annotation(sym);
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 3faa58045b22..365e9df888cf 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -288,6 +288,8 @@ void symbol__annotate_zero_histogram(struct symbol *sym, int evidx);
 void symbol__annotate_decay_histogram(struct symbol *sym, int evidx);
 void annotated_source__purge(struct annotated_source *as);
 
+int map_symbol__annotation_dump(struct map_symbol *ms, struct perf_evsel *evsel);
+
 bool ui__has_annotation(void);
 
 int symbol__tty_annotate(struct symbol *sym, struct map *map,
-- 
cgit v1.2.3


From 425859ff0de33a2362bec2a2c7ca486f87c13100 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 20 Mar 2018 11:03:30 -0300
Subject: perf annotate: No need to calculate notes->start twice

Since we already set notes->start to map__rip_2objdump(map, sym->start)
in symbol__annotate2(), no need to calculate that address again in
symbol__calc_lines(), just use notes->start.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-ycxlg8mm5ueuj21w6gi62l7g@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/annotate.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index a7111871440e..666f62c58e1a 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -2230,7 +2230,7 @@ void annotation__update_column_widths(struct annotation *notes)
 }
 
 static void annotation__calc_lines(struct annotation *notes, struct map *map,
-				  struct rb_root *root, u64 start)
+				  struct rb_root *root)
 {
 	struct annotation_line *al;
 	struct rb_root tmp_root = RB_ROOT;
@@ -2251,8 +2251,8 @@ static void annotation__calc_lines(struct annotation *notes, struct map *map,
 		if (percent_max <= 0.5)
 			continue;
 
-		al->path = get_srcline(map->dso, start + al->offset, NULL,
-				       false, true, start + al->offset);
+		al->path = get_srcline(map->dso, notes->start + al->offset, NULL,
+				       false, true, notes->start + al->offset);
 		insert_source_line(&tmp_root, al);
 	}
 
@@ -2263,9 +2263,8 @@ static void symbol__calc_lines(struct symbol *sym, struct map *map,
 			      struct rb_root *root)
 {
 	struct annotation *notes = symbol__annotation(sym);
-	u64 start = map__rip_2objdump(map, sym->start);
 
-	annotation__calc_lines(notes, map, root, start);
+	annotation__calc_lines(notes, map, root);
 }
 
 int symbol__tty_annotate2(struct symbol *sym, struct map *map,
-- 
cgit v1.2.3


From 85a84e4f813912ab77d872ff6882dd7b435fbf4e Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 20 Mar 2018 16:19:08 -0300
Subject: perf annotate: Pass function descriptor to its instruction parsing
 routines

We need that to figure out if jumps have targets in a different
function.

E.g. _cpp_lex_token(), in /usr/libexec/gcc/x86_64-redhat-linux/5.3.1/cc1
has a line like this:

  jne    c469be <cpp_named_operator2name@@Base+0xa72>

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-ris0ioziyp469pofpzix2atb@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/s390/annotate/instructions.c |  5 +++--
 tools/perf/util/annotate.c                   | 30 ++++++++++++++++------------
 tools/perf/util/annotate.h                   |  2 +-
 3 files changed, 21 insertions(+), 16 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/arch/s390/annotate/instructions.c b/tools/perf/arch/s390/annotate/instructions.c
index 46c21831f2ac..cee4e2f7c057 100644
--- a/tools/perf/arch/s390/annotate/instructions.c
+++ b/tools/perf/arch/s390/annotate/instructions.c
@@ -2,9 +2,10 @@
 #include <linux/compiler.h>
 
 static int s390_call__parse(struct arch *arch, struct ins_operands *ops,
-			    struct map *map)
+			    struct map_symbol *ms)
 {
 	char *endptr, *tok, *name;
+	struct map *map = ms->map;
 	struct addr_map_symbol target = {
 		.map = map,
 	};
@@ -54,7 +55,7 @@ static struct ins_ops s390_call_ops = {
 
 static int s390_mov__parse(struct arch *arch __maybe_unused,
 			   struct ins_operands *ops,
-			   struct map *map __maybe_unused)
+			   struct map_symbol *ms __maybe_unused)
 {
 	char *s = strchr(ops->raw, ','), *target, *endptr;
 
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 666f62c58e1a..3ff829d89178 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -202,9 +202,10 @@ bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2)
 	return arch->ins_is_fused(arch, ins1, ins2);
 }
 
-static int call__parse(struct arch *arch, struct ins_operands *ops, struct map *map)
+static int call__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms)
 {
 	char *endptr, *tok, *name;
+	struct map *map = ms->map;
 	struct addr_map_symbol target = {
 		.map = map,
 	};
@@ -272,7 +273,7 @@ bool ins__is_call(const struct ins *ins)
 	return ins->ops == &call_ops || ins->ops == &s390_call_ops;
 }
 
-static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map *map __maybe_unused)
+static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map_symbol *ms __maybe_unused)
 {
 	const char *s = strchr(ops->raw, '+');
 	const char *c = strchr(ops->raw, ',');
@@ -365,7 +366,7 @@ static int comment__symbol(char *raw, char *comment, u64 *addrp, char **namep)
 	return 0;
 }
 
-static int lock__parse(struct arch *arch, struct ins_operands *ops, struct map *map)
+static int lock__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms)
 {
 	ops->locked.ops = zalloc(sizeof(*ops->locked.ops));
 	if (ops->locked.ops == NULL)
@@ -380,7 +381,7 @@ static int lock__parse(struct arch *arch, struct ins_operands *ops, struct map *
 		goto out_free_ops;
 
 	if (ops->locked.ins.ops->parse &&
-	    ops->locked.ins.ops->parse(arch, ops->locked.ops, map) < 0)
+	    ops->locked.ins.ops->parse(arch, ops->locked.ops, ms) < 0)
 		goto out_free_ops;
 
 	return 0;
@@ -423,7 +424,7 @@ static struct ins_ops lock_ops = {
 	.scnprintf = lock__scnprintf,
 };
 
-static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map *map __maybe_unused)
+static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms __maybe_unused)
 {
 	char *s = strchr(ops->raw, ','), *target, *comment, prev;
 
@@ -484,7 +485,7 @@ static struct ins_ops mov_ops = {
 	.scnprintf = mov__scnprintf,
 };
 
-static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map *map __maybe_unused)
+static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map_symbol *ms __maybe_unused)
 {
 	char *target, *comment, *s, prev;
 
@@ -923,14 +924,14 @@ int hist_entry__inc_addr_samples(struct hist_entry *he, struct perf_sample *samp
 	return symbol__inc_addr_samples(he->ms.sym, he->ms.map, evidx, ip, sample);
 }
 
-static void disasm_line__init_ins(struct disasm_line *dl, struct arch *arch, struct map *map)
+static void disasm_line__init_ins(struct disasm_line *dl, struct arch *arch, struct map_symbol *ms)
 {
 	dl->ins.ops = ins__find(arch, dl->ins.name);
 
 	if (!dl->ins.ops)
 		return;
 
-	if (dl->ins.ops->parse && dl->ins.ops->parse(arch, &dl->ops, map) < 0)
+	if (dl->ins.ops->parse && dl->ins.ops->parse(arch, &dl->ops, ms) < 0)
 		dl->ins.ops = NULL;
 }
 
@@ -967,7 +968,7 @@ out_free_name:
 struct annotate_args {
 	size_t			 privsize;
 	struct arch		*arch;
-	struct map		*map;
+	struct map_symbol	 ms;
 	struct perf_evsel	*evsel;
 	s64			 offset;
 	char			*line;
@@ -1049,7 +1050,7 @@ static struct disasm_line *disasm_line__new(struct annotate_args *args)
 			if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0)
 				goto out_free_line;
 
-			disasm_line__init_ins(dl, args->arch, args->map);
+			disasm_line__init_ins(dl, args->arch, &args->ms);
 		}
 	}
 
@@ -1307,7 +1308,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, FILE *file,
 				      struct annotate_args *args,
 				      int *line_nr)
 {
-	struct map *map = args->map;
+	struct map *map = args->ms.map;
 	struct annotation *notes = symbol__annotation(sym);
 	struct disasm_line *dl;
 	char *line = NULL, *parsed_line, *tmp, *tmp2;
@@ -1354,6 +1355,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, FILE *file,
 	args->offset  = offset;
 	args->line    = parsed_line;
 	args->line_nr = *line_nr;
+	args->ms.sym  = sym;
 
 	dl = disasm_line__new(args);
 	free(line);
@@ -1506,7 +1508,7 @@ fallback:
 
 static int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
 {
-	struct map *map = args->map;
+	struct map *map = args->ms.map;
 	struct dso *dso = map->dso;
 	char *command;
 	FILE *file;
@@ -1705,7 +1707,6 @@ int symbol__annotate(struct symbol *sym, struct map *map,
 {
 	struct annotate_args args = {
 		.privsize	= privsize,
-		.map		= map,
 		.evsel		= evsel,
 	};
 	struct perf_env *env = perf_evsel__env(evsel);
@@ -1731,6 +1732,9 @@ int symbol__annotate(struct symbol *sym, struct map *map,
 		}
 	}
 
+	args.ms.map = map;
+	args.ms.sym = sym;
+
 	return symbol__disassemble(sym, &args);
 }
 
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 365e9df888cf..c0bf0554a9ea 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -46,7 +46,7 @@ struct arch;
 
 struct ins_ops {
 	void (*free)(struct ins_operands *ops);
-	int (*parse)(struct arch *arch, struct ins_operands *ops, struct map *map);
+	int (*parse)(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms);
 	int (*scnprintf)(struct ins *ins, char *bf, size_t size,
 			 struct ins_operands *ops);
 };
-- 
cgit v1.2.3


From 751b1783da784299b0509adb6a9cd3024cc4f837 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 20 Mar 2018 17:20:43 -0300
Subject: perf annotate: Mark jumps to outher functions with the call arrow

Things like this in _cpp_lex_token (gcc's cc1 program):

     cpp_named_operator2name@@Base+0xa72

Point to a place that is after the cpp_named_operator2name boundaries,
i.e.  in the ELF symbol table for cc1 cpp_named_operator2name is marked
as being 32-bytes long, but it in fact is much larger than that, so we
seem to need a symbols__find() routine that looks for >= current->start
and  < next_symbol->start, possibly just for C++ objects?

For now lets just make some progress by marking jumps to outside the
current function as call like.

Actual navigation will come next, with further understanding of how the
symbol searching and disassembly should be done.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-aiys0a0bsgm3e00hbi6fg7yy@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/annotate.c | 55 +++++++++++++++++++++++++++++++++++++++++++---
 tools/perf/util/annotate.h |  1 +
 2 files changed, 53 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 3ff829d89178..c299881c640a 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -273,11 +273,27 @@ bool ins__is_call(const struct ins *ins)
 	return ins->ops == &call_ops || ins->ops == &s390_call_ops;
 }
 
-static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map_symbol *ms __maybe_unused)
+static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map_symbol *ms)
 {
+	struct map *map = ms->map;
+	struct symbol *sym = ms->sym;
+	struct addr_map_symbol target = {
+		.map = map,
+	};
 	const char *s = strchr(ops->raw, '+');
 	const char *c = strchr(ops->raw, ',');
-
+	u64 start, end;
+	/*
+	 * Examples of lines to parse for the _cpp_lex_token@@Base
+	 * function:
+	 *
+	 * 1159e6c: jne    115aa32 <_cpp_lex_token@@Base+0xf92>
+	 * 1159e8b: jne    c469be <cpp_named_operator2name@@Base+0xa72>
+	 *
+	 * The first is a jump to an offset inside the same function,
+	 * the second is to another function, i.e. that 0xa72 is an
+	 * offset in the cpp_named_operator2name@@base function.
+	 */
 	/*
 	 * skip over possible up to 2 operands to get to address, e.g.:
 	 * tbnz	 w0, #26, ffff0000083cd190 <security_file_permission+0xd0>
@@ -293,6 +309,35 @@ static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *op
 		ops->target.addr = strtoull(ops->raw, NULL, 16);
 	}
 
+	target.addr = map__objdump_2mem(map, ops->target.addr);
+	start = map->unmap_ip(map, sym->start),
+	end = map->unmap_ip(map, sym->end);
+
+	ops->target.outside = target.addr < start || target.addr > end;
+
+	/*
+	 * FIXME: things like this in _cpp_lex_token (gcc's cc1 program):
+
+		cpp_named_operator2name@@Base+0xa72
+
+	 * Point to a place that is after the cpp_named_operator2name
+	 * boundaries, i.e.  in the ELF symbol table for cc1
+	 * cpp_named_operator2name is marked as being 32-bytes long, but it in
+	 * fact is much larger than that, so we seem to need a symbols__find()
+	 * routine that looks for >= current->start and  < next_symbol->start,
+	 * possibly just for C++ objects?
+	 *
+	 * For now lets just make some progress by marking jumps to outside the
+	 * current function as call like.
+	 *
+	 * Actual navigation will come next, with further understanding of how
+	 * the symbol searching and disassembly should be done.
+
+	if (map_groups__find_ams(&target) == 0 &&
+	    map__rip_2objdump(target.map, map->map_ip(target.map, target.addr)) == ops->target.addr)
+		ops->target.sym = target.sym;
+	 */
+
 	if (s++ != NULL) {
 		ops->target.offset = strtoull(s, NULL, 16);
 		ops->target.offset_avail = true;
@@ -2355,11 +2400,15 @@ static void disasm_line__write(struct disasm_line *dl, struct annotation *notes,
 {
 	if (dl->ins.ops && dl->ins.ops->scnprintf) {
 		if (ins__is_jump(&dl->ins)) {
-			bool fwd = dl->ops.target.offset > dl->al.offset;
+			bool fwd;
 
+			if (dl->ops.target.outside)
+				goto call_like;
+			fwd = dl->ops.target.offset > dl->al.offset;
 			obj__write_graph(obj, fwd ? DARROW_CHAR : UARROW_CHAR);
 			obj__printf(obj, " ");
 		} else if (ins__is_call(&dl->ins)) {
+call_like:
 			obj__write_graph(obj, RARROW_CHAR);
 			obj__printf(obj, " ");
 		} else if (ins__is_ret(&dl->ins)) {
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index c0bf0554a9ea..ad8baafaf9f9 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -28,6 +28,7 @@ struct ins_operands {
 		u64	addr;
 		s64	offset;
 		bool	offset_avail;
+		bool	outside;
 	} target;
 	union {
 		struct {
-- 
cgit v1.2.3


From 83428f2fad48e16fddff0cb445cb4fedf5afe4ab Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Thu, 22 Mar 2018 00:57:32 +0100
Subject: perf python: Reference Py_None before returning it

Python None objects are handled just like all the other objects with
respect to their reference counting. Before returning Py_None, its
reference count thus needs to be bumped.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Petr Machata <petrm@mellanox.com>
Link: http://lkml.kernel.org/r/b1e565ecccf68064d8d54f37db5d028dda8fa522.1521675563.git.petrm@mellanox.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/python.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index b956868fd445..863b61478edd 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -1004,8 +1004,10 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
 			return PyErr_NoMemory();
 
 		evsel = perf_evlist__event2evsel(evlist, event);
-		if (!evsel)
+		if (!evsel) {
+			Py_INCREF(Py_None);
 			return Py_None;
+		}
 
 		pevent->evsel = evsel;
 
-- 
cgit v1.2.3


From 2eff061162819e00ec6379874ceb47caef17bcba Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 23 Mar 2018 10:12:33 -0300
Subject: perf annotate: Add "_local" to jump/offset validation routines
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Because they all really check if we can access data structures/visual
constructs where a "jump" instruction targets code in the same function,
i.e. things like:

  __pthread_mutex_lock  /usr/lib64/libpthread-2.26.so
  1.95 │       mov    __pthread_force_elision,%ecx
       │    ┌──test   %ecx,%ecx
  0.07 │    ├──je     60
       │    │  test   $0x300,%esi
       │    │↓ jne    60
       │    │  or     $0x100,%esi
       │    │  mov    %esi,0x10(%rdi)
       │ 42:│  mov    %esi,%edx
       │    │  lea    0x16(%r8),%rsi
       │    │  mov    %r8,%rdi
       │    │  and    $0x80,%edx
       │    │  add    $0x8,%rsp
       │    │→ jmpq   __lll_lock_elision
       │    │  nop
  0.29 │ 60:└─→and    $0x80,%esi
  0.07 │       mov    $0x1,%edi
  0.29 │       xor    %eax,%eax
  2.53 │       lock   cmpxchg %edi,(%r8)

And not things like that "jmpq __lll_lock_elision", that instead should behave
like a "call" instruction and "jump" to the disassembly of "___lll_lock_elision".

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-3cwx39u3h66dfw9xjrlt7ca2@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/annotate.c |  2 +-
 tools/perf/util/annotate.c        |  9 ++++-----
 tools/perf/util/annotate.h        | 14 +++++++++++---
 3 files changed, 16 insertions(+), 9 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 3834b264ba41..d77896a99570 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -155,7 +155,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
 	if (strstr(sym->name, "@plt"))
 		return;
 
-	if (!disasm_line__is_valid_jump(cursor, sym))
+	if (!disasm_line__is_valid_local_jump(cursor, sym))
 		return;
 
 	/*
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index c299881c640a..9524f322f597 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -1409,7 +1409,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, FILE *file,
 	if (dl == NULL)
 		return -1;
 
-	if (!disasm_line__has_offset(dl)) {
+	if (!disasm_line__has_local_offset(dl)) {
 		dl->ops.target.offset = dl->ops.target.addr -
 					map__rip_2objdump(map, sym->start);
 		dl->ops.target.offset_avail = true;
@@ -2176,11 +2176,10 @@ size_t disasm__fprintf(struct list_head *head, FILE *fp)
 	return printed;
 }
 
-
-bool disasm_line__is_valid_jump(struct disasm_line *dl, struct symbol *sym)
+bool disasm_line__is_valid_local_jump(struct disasm_line *dl, struct symbol *sym)
 {
 	if (!dl || !dl->ins.ops || !ins__is_jump(&dl->ins) ||
-	    !disasm_line__has_offset(dl) || dl->ops.target.offset < 0 ||
+	    !disasm_line__has_local_offset(dl) || dl->ops.target.offset < 0 ||
 	    dl->ops.target.offset >= (s64)symbol__size(sym))
 		return false;
 
@@ -2201,7 +2200,7 @@ void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym)
 
 		dl = disasm_line(al);
 
-		if (!disasm_line__is_valid_jump(dl, sym))
+		if (!disasm_line__is_valid_local_jump(dl, sym))
 			continue;
 
 		al = notes->offsets[dl->ops.target.offset];
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index ad8baafaf9f9..ff7e3df31efa 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -117,12 +117,20 @@ static inline struct disasm_line *disasm_line(struct annotation_line *al)
 	return al ? container_of(al, struct disasm_line, al) : NULL;
 }
 
-static inline bool disasm_line__has_offset(const struct disasm_line *dl)
+/*
+ * Is this offset in the same function as the line it is used?
+ * asm functions jump to other functions, for instance.
+ */
+static inline bool disasm_line__has_local_offset(const struct disasm_line *dl)
 {
-	return dl->ops.target.offset_avail;
+	return dl->ops.target.offset_avail && !dl->ops.target.outside;
 }
 
-bool disasm_line__is_valid_jump(struct disasm_line *dl, struct symbol *sym);
+/*
+ * Can we draw an arrow from the jump to its target, for instance? I.e.
+ * is the jump and its target in the same function?
+ */
+bool disasm_line__is_valid_local_jump(struct disasm_line *dl, struct symbol *sym);
 
 void disasm_line__free(struct disasm_line *dl);
 struct annotation_line *
-- 
cgit v1.2.3


From e4cc91b8027dbbb8a1f7c24cdecf58cd0b50375f Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 23 Mar 2018 10:50:35 -0300
Subject: perf annotate: Support jumping from one function to another
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For instance:

  entry_SYSCALL_64  /lib/modules/4.16.0-rc5-00086-gdf09348f78dc/build/vmlinux
    5.50 │     → callq  do_syscall_64
   14.56 │       mov    0x58(%rsp),%rcx
    7.44 │       mov    0x80(%rsp),%r11
    0.32 │       cmp    %rcx,%r11
         │     → jne    swapgs_restore_regs_and_return_to_usermode
    0.32 │       shl    $0x10,%rcx
    0.32 │       sar    $0x10,%rcx
    3.24 │       cmp    %rcx,%r11
         │     → jne    swapgs_restore_regs_and_return_to_usermode
    2.27 │       cmpq   $0x33,0x88(%rsp)
    1.29 │     → jne    swapgs_restore_regs_and_return_to_usermode
         │       mov    0x30(%rsp),%r11
    8.74 │       cmp    %r11,0x90(%rsp)
         │     → jne    swapgs_restore_regs_and_return_to_usermode
    0.32 │       test   $0x10100,%r11
         │     → jne    swapgs_restore_regs_and_return_to_usermode
    0.32 │       cmpq   $0x2b,0xa0(%rsp)
    0.65 │     → jne    swapgs_restore_regs_and_return_to_usermode

It'll behave just like a "call" instruction, i.e. press enter or right
arrow over one such line and the browser will navigate to the annotated
disassembly of that function, which when exited, via left arrow or esc,
will come back to the calling function.

Now to support jump to an offset on a different function...

Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-78o508mqvr8inhj63ddtw7mo@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/annotate.c | 23 ++++++++++++++++++-----
 tools/perf/util/annotate.c        |  6 ++++--
 2 files changed, 22 insertions(+), 7 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index d77896a99570..c02fb437ac8e 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -384,6 +384,15 @@ static int sym_title(struct symbol *sym, struct map *map, char *title,
 	return snprintf(title, sz, "%s  %s", sym->name, map->dso->long_name);
 }
 
+/*
+ * This can be called from external jumps, i.e. jumps from one functon
+ * to another, like from the kernel's entry_SYSCALL_64 function to the
+ * swapgs_restore_regs_and_return_to_usermode() function.
+ *
+ * So all we check here is that dl->ops.target.sym is set, if it is, just
+ * go to that function and when exiting from its disassembly, come back
+ * to the calling function.
+ */
 static bool annotate_browser__callq(struct annotate_browser *browser,
 				    struct perf_evsel *evsel,
 				    struct hist_browser_timer *hbt)
@@ -393,9 +402,6 @@ static bool annotate_browser__callq(struct annotate_browser *browser,
 	struct annotation *notes;
 	char title[SYM_TITLE_MAX_SIZE];
 
-	if (!ins__is_call(&dl->ins))
-		return false;
-
 	if (!dl->ops.target.sym) {
 		ui_helpline__puts("The called function was not found.");
 		return true;
@@ -436,7 +442,9 @@ struct disasm_line *annotate_browser__find_offset(struct annotate_browser *brows
 	return NULL;
 }
 
-static bool annotate_browser__jump(struct annotate_browser *browser)
+static bool annotate_browser__jump(struct annotate_browser *browser,
+				   struct perf_evsel *evsel,
+				   struct hist_browser_timer *hbt)
 {
 	struct disasm_line *dl = disasm_line(browser->selection);
 	u64 offset;
@@ -445,6 +453,11 @@ static bool annotate_browser__jump(struct annotate_browser *browser)
 	if (!ins__is_jump(&dl->ins))
 		return false;
 
+	if (dl->ops.target.outside) {
+		annotate_browser__callq(browser, evsel, hbt);
+		return true;
+	}
+
 	offset = dl->ops.target.offset;
 	dl = annotate_browser__find_offset(browser, offset, &idx);
 	if (dl == NULL) {
@@ -731,7 +744,7 @@ show_help:
 				goto show_sup_ins;
 			else if (ins__is_ret(&dl->ins))
 				goto out;
-			else if (!(annotate_browser__jump(browser) ||
+			else if (!(annotate_browser__jump(browser, evsel, hbt) ||
 				     annotate_browser__callq(browser, evsel, hbt))) {
 show_sup_ins:
 				ui_helpline__puts("Actions are only available for function call/return & jump/branch instructions.");
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 9524f322f597..5fa270b24eea 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -332,11 +332,10 @@ static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *op
 	 *
 	 * Actual navigation will come next, with further understanding of how
 	 * the symbol searching and disassembly should be done.
-
+	 */
 	if (map_groups__find_ams(&target) == 0 &&
 	    map__rip_2objdump(target.map, map->map_ip(target.map, target.addr)) == ops->target.addr)
 		ops->target.sym = target.sym;
-	 */
 
 	if (s++ != NULL) {
 		ops->target.offset = strtoull(s, NULL, 16);
@@ -356,6 +355,9 @@ static int jump__scnprintf(struct ins *ins, char *bf, size_t size,
 	if (!ops->target.addr || ops->target.offset < 0)
 		return ins__raw_scnprintf(ins, bf, size, ops);
 
+	if (ops->target.outside && ops->target.sym != NULL)
+		return scnprintf(bf, size, "%-6s %s", ins->name, ops->target.sym->name);
+
 	if (c != NULL) {
 		const char *c2 = strchr(c + 1, ',');
 
-- 
cgit v1.2.3


From c448234cfe46ec5abc0014dca8b3b49989bffe9e Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 23 Mar 2018 10:57:08 -0300
Subject: perf annotate: Defer searching for comma in raw line till it is
 needed

That strchr() in jump__scnprintf() needs to be nuked somehow, as it,
IIRC is already done in jump__parse() and if needed at scnprintf() time,
should be stashed in the struct filled in parse() time.

For now jus defer it to just before where it is used.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-j0t5hagnphoz9xw07bh3ha3g@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/annotate.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 5fa270b24eea..f730e0cf8a26 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -350,7 +350,7 @@ static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *op
 static int jump__scnprintf(struct ins *ins, char *bf, size_t size,
 			   struct ins_operands *ops)
 {
-	const char *c = strchr(ops->raw, ',');
+	const char *c;
 
 	if (!ops->target.addr || ops->target.offset < 0)
 		return ins__raw_scnprintf(ins, bf, size, ops);
@@ -358,6 +358,7 @@ static int jump__scnprintf(struct ins *ins, char *bf, size_t size,
 	if (ops->target.outside && ops->target.sym != NULL)
 		return scnprintf(bf, size, "%-6s %s", ins->name, ops->target.sym->name);
 
+	c = strchr(ops->raw, ',');
 	if (c != NULL) {
 		const char *c2 = strchr(c + 1, ',');
 
-- 
cgit v1.2.3


From 980b68ec0694f250e967cb18c5705ef5de10fdd5 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 23 Mar 2018 12:26:39 -0300
Subject: perf annotate: Use absolute addresses to calculate jump target
 offsets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These types of jumps were confusing the annotate browser:

entry_SYSCALL_64  /lib/modules/4.16.0-rc5-00086-gdf09348f78dc/build/vmlinux

entry_SYSCALL_64  /lib/modules/4.16.0-rc5-00086-gdf09348f78dc/build/vmlinux
  Percent│ffffffff81a00020:   swapgs
  <SNIP>
         │ffffffff81a00128: ↓ jae    ffffffff81a00139 <syscall_return_via_sysret+0x53>
  <SNIP>
         │ffffffff81a00155: → jmpq   *0x825d2d(%rip)   # ffffffff82225e88 <pv_cpu_ops+0xe8>

I.e. the syscall_return_via_sysret function is actually "inside" the
entry_SYSCALL_64 function, and the offsets in jumps like these (+0x53)
are relative to syscall_return_via_sysret, not to syscall_return_via_sysret.

Or this may be some artifact in how the assembler marks the start and
end of a function and how this ends up in the ELF symtab for vmlinux,
i.e. syscall_return_via_sysret() isn't "inside" entry_SYSCALL_64, but
just right after it.

From readelf -sw vmlinux:

 80267: ffffffff81a00020   315 NOTYPE  GLOBAL DEFAULT    1 entry_SYSCALL_64
   316: ffffffff81a000e6     0 NOTYPE  LOCAL  DEFAULT    1 syscall_return_via_sysret

 0xffffffff81a00020 + 315 > 0xffffffff81a000e6

So instead of looking for offsets after that last '+' sign, calculate
offsets for jump target addresses that are inside the function being
disassembled from the absolute address, 0xffffffff81a00139 in this case,
subtracting from it the objdump address for the start of the function
being disassembled, entry_SYSCALL_64() in this case.

So, before this patch:

entry_SYSCALL_64  /lib/modules/4.16.0-rc5-00086-gdf09348f78dc/build/vmlinux
Percent│       pop    %r10
       │       pop    %r9
       │       pop    %r8
       │       pop    %rax
       │       pop    %rsi
       │       pop    %rdx
       │       pop    %rsi
       │       mov    %rsp,%rdi
       │       mov    %gs:0x5004,%rsp
       │       pushq  0x28(%rdi)
       │       pushq  (%rdi)
       │       push   %rax
       │     ↑ jmp    6c
       │       mov    %cr3,%rdi
       │     ↑ jmp    62
       │       mov    %rdi,%rax
       │       and    $0x7ff,%rdi
       │       bt     %rdi,%gs:0x2219a
       │     ↑ jae    53
       │       btr    %rdi,%gs:0x2219a
       │       mov    %rax,%rdi
       │     ↑ jmp    5b

After:

entry_SYSCALL_64  /lib/modules/4.16.0-rc5-00086-gdf09348f78dc/build/vmlinux
  0.65 │     → jne    swapgs_restore_regs_and_return_to_usermode
       │       pop    %r10
       │       pop    %r9
       │       pop    %r8
       │       pop    %rax
       │       pop    %rsi
       │       pop    %rdx
       │       pop    %rsi
       │       mov    %rsp,%rdi
       │       mov    %gs:0x5004,%rsp
       │       pushq  0x28(%rdi)
       │       pushq  (%rdi)
       │       push   %rax
       │     ↓ jmp    132
       │       mov    %cr3,%rdi
       │    ┌──jmp    128
       │    │  mov    %rdi,%rax
       │    │  and    $0x7ff,%rdi
       │    │  bt     %rdi,%gs:0x2219a
       │    │↓ jae    119
       │    │  btr    %rdi,%gs:0x2219a
       │    │  mov    %rax,%rdi
       │    │↓ jmp    121
       │119:│  mov    %rax,%rdi
       │    │  bts    $0x3f,%rdi
       │121:│  or     $0x800,%rdi
       │128:└─→or     $0x1000,%rdi
       │       mov    %rdi,%cr3
       │132:   pop    %rax
       │       pop    %rdi
       │       pop    %rsp
       │     → jmpq   *0x825d2d(%rip)        # ffffffff82225e88 <pv_cpu_ops+0xe8>

With those at least navigating to the right destination, an improvement
for these cases seems to be to be to somehow mark those inner functions,
which in this case could be:

entry_SYSCALL_64  /lib/modules/4.16.0-rc5-00086-gdf09348f78dc/build/vmlinux
       │syscall_return_via_sysret:
       │       pop    %r15
       │       pop    %r14
       │       pop    %r13
       │       pop    %r12
       │       pop    %rbp
       │       pop    %rbx
       │       pop    %rsi
       │       pop    %r10
       │       pop    %r9
       │       pop    %r8
       │       pop    %rax
       │       pop    %rsi
       │       pop    %rdx
       │       pop    %rsi
       │       mov    %rsp,%rdi
       │       mov    %gs:0x5004,%rsp
       │       pushq  0x28(%rdi)
       │       pushq  (%rdi)
       │       push   %rax
       │     ↓ jmp    132
       │       mov    %cr3,%rdi
       │    ┌──jmp    128
       │    │  mov    %rdi,%rax
       │    │  and    $0x7ff,%rdi
       │    │  bt     %rdi,%gs:0x2219a
       │    │↓ jae    119
       │    │  btr    %rdi,%gs:0x2219a
       │    │  mov    %rax,%rdi
       │    │↓ jmp    121
       │119:│  mov    %rax,%rdi
       │    │  bts    $0x3f,%rdi
       │121:│  or     $0x800,%rdi
       │128:└─→or     $0x1000,%rdi
       │       mov    %rdi,%cr3
       │132:   pop    %rax
       │       pop    %rdi
       │       pop    %rsp
       │     → jmpq   *0x825d2d(%rip)        # ffffffff82225e88 <pv_cpu_ops+0xe8>

This all gets much better viewed if one uses 'perf report --ignore-vmlinux'
forcing the usage of /proc/kcore + /proc/kallsyms, when the above
actually gets down to:

  # perf report --ignore-vmlinux
  ## do '/64', will show the function names containing '64',
  ## navigate to /entry_SYSCALL_64_after_hwframe.annotation,
  ## press 'A' to annotate, then 'P' to print that annotation
  ## to a file
  ## From another xterm (or see on screen, this 'P' thing is for
  ## getting rid of those right side scroll bars/spaces):
  # cat /entry_SYSCALL_64_after_hwframe.annotation
  entry_SYSCALL_64_after_hwframe() /proc/kcore
  Event: cycles:ppp

  Percent
              Disassembly of section load0:

              ffffffff9aa00044 <load0>:
   11.97        push   %rax
    4.85        push   %rdi
                push   %rsi
    2.59        push   %rdx
    2.27        push   %rcx
    0.32        pushq  $0xffffffffffffffda
    1.29        push   %r8
                xor    %r8d,%r8d
    1.62        push   %r9
    0.65        xor    %r9d,%r9d
    1.62        push   %r10
                xor    %r10d,%r10d
    5.50        push   %r11
                xor    %r11d,%r11d
    3.56        push   %rbx
                xor    %ebx,%ebx
    4.21        push   %rbp
                xor    %ebp,%ebp
    2.59        push   %r12
    0.97        xor    %r12d,%r12d
    3.24        push   %r13
                xor    %r13d,%r13d
    2.27        push   %r14
                xor    %r14d,%r14d
    4.21        push   %r15
                xor    %r15d,%r15d
    0.97        mov    %rsp,%rdi
    5.50      → callq  do_syscall_64
   14.56        mov    0x58(%rsp),%rcx
    7.44        mov    0x80(%rsp),%r11
    0.32        cmp    %rcx,%r11
              → jne    swapgs_restore_regs_and_return_to_usermode
    0.32        shl    $0x10,%rcx
    0.32        sar    $0x10,%rcx
    3.24        cmp    %rcx,%r11
              → jne    swapgs_restore_regs_and_return_to_usermode
    2.27        cmpq   $0x33,0x88(%rsp)
    1.29      → jne    swapgs_restore_regs_and_return_to_usermode
                mov    0x30(%rsp),%r11
    8.74        cmp    %r11,0x90(%rsp)
              → jne    swapgs_restore_regs_and_return_to_usermode
    0.32        test   $0x10100,%r11
              → jne    swapgs_restore_regs_and_return_to_usermode
    0.32        cmpq   $0x2b,0xa0(%rsp)
    0.65      → jne    swapgs_restore_regs_and_return_to_usermode

I.e. using kallsyms makes the function start/end be done differently
than using what is in the vmlinux ELF symtab and actually the hits
goes to entry_SYSCALL_64_after_hwframe, which is a GLOBAL() after the
start of entry_SYSCALL_64:

  ENTRY(entry_SYSCALL_64)
          UNWIND_HINT_EMPTY
  <SNIP>
          pushq   $__USER_CS                      /* pt_regs->cs */
          pushq   %rcx                            /* pt_regs->ip */
  GLOBAL(entry_SYSCALL_64_after_hwframe)
          pushq   %rax                            /* pt_regs->orig_ax */

          PUSH_AND_CLEAR_REGS rax=$-ENOSYS

And it goes and ends at:

          cmpq    $__USER_DS, SS(%rsp)            /* SS must match SYSRET */
          jne     swapgs_restore_regs_and_return_to_usermode

          /*
           * We win! This label is here just for ease of understanding
           * perf profiles. Nothing jumps here.
           */
  syscall_return_via_sysret:
          /* rcx and r11 are already restored (see code above) */
          UNWIND_HINT_EMPTY
          POP_REGS pop_rdi=0 skip_r11rcx=1

So perhaps some people should really just play with '--ignore-vmlinux'
to force /proc/kcore + kallsyms.

One idea is to do both, i.e. have a vmlinux annotation and a
kcore+kallsyms one, when possible, and even show the patched location,
etc.

Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-r11knxv8voesav31xokjiuo6@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/annotate.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index f730e0cf8a26..3a428d7c59b9 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -280,7 +280,6 @@ static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *op
 	struct addr_map_symbol target = {
 		.map = map,
 	};
-	const char *s = strchr(ops->raw, '+');
 	const char *c = strchr(ops->raw, ',');
 	u64 start, end;
 	/*
@@ -337,8 +336,8 @@ static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *op
 	    map__rip_2objdump(target.map, map->map_ip(target.map, target.addr)) == ops->target.addr)
 		ops->target.sym = target.sym;
 
-	if (s++ != NULL) {
-		ops->target.offset = strtoull(s, NULL, 16);
+	if (!ops->target.outside) {
+		ops->target.offset = target.addr - start;
 		ops->target.offset_avail = true;
 	} else {
 		ops->target.offset_avail = false;
-- 
cgit v1.2.3


From b4c786e5aa69c5a75ac3932f81fdf8e8c120c03b Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 21 Mar 2018 15:05:15 +0100
Subject: perf build: Fix check-headers.sh opts assignment

Currently the "opts" variable is not zero-ed and we keep on adding to
it, ending up with:

  $ check-headers.sh 2>&1
  + opts=' "-B"'
  + opts=' "-B" "-B"'
  + opts=' "-B" "-B" "-B"'
  + opts=' "-B" "-B" "-B" "-B"'
  + opts=' "-B" "-B" "-B" "-B" "-B"'
  + opts=' "-B" "-B" "-B" "-B" "-B" "-B"'

Fix this by initializing it in the check() function, right before
starting the loop.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180321140515.2252-1-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/check-headers.sh | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index bf206ffe5c45..9aff89bc7535 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -59,6 +59,7 @@ check () {
   file=$1
 
   shift
+  opts=
   while [ -n "$*" ]; do
     opts="$opts \"$1\""
     shift
-- 
cgit v1.2.3


From f58385f629c87a9e210108b39c1f4950d0363ad2 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Mon, 26 Mar 2018 09:42:09 -0400
Subject: perf mmap: Fix accessing unmapped mmap in perf_mmap__read_done()

There is a segmentation fault when running 'perf trace'. For example:

  [root@jouet e]# perf trace -e *chdir -o /tmp/bla perf report --ignore-vmlinux -i ../perf.data

The perf_mmap__consume() could unmap the mmap. It needs to check the
refcnt in perf_mmap__read_done().

Reported-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Fixes: ee023de05f35 ("perf mmap: Introduce perf_mmap__read_done()")
Link: http://lkml.kernel.org/r/1522071729-16776-1-git-send-email-kan.liang@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/mmap.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 38ca3ffb9d61..f6cfc52ff1fe 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -317,5 +317,11 @@ out:
  */
 void perf_mmap__read_done(struct perf_mmap *map)
 {
+	/*
+	 * Check if event was unmapped due to a POLLHUP/POLLERR.
+	 */
+	if (!refcount_read(&map->refcnt))
+		return;
+
 	map->prev = perf_mmap__read_head(map);
 }
-- 
cgit v1.2.3


From 895e3b06fc2ce438adc62cb13d31ea001dcfda16 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 26 Mar 2018 11:42:15 -0300
Subject: perf mmap: Be consistent when checking for an unmaped ring buffer

The previous patch is insufficient to cure the reported 'perf trace'
segfault, as it only cures the perf_mmap__read_done() case, moving the
segfault to perf_mmap__read_init() functio, fix it by doing the same
refcount check.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Fixes: 8872481bd048 ("perf mmap: Introduce perf_mmap__read_init()")
Link: https://lkml.kernel.org/r/20180326144127.GF18897@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/mmap.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index f6cfc52ff1fe..fc832676a798 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -234,7 +234,7 @@ static int overwrite_rb_find_range(void *buf, int mask, u64 *start, u64 *end)
 /*
  * Report the start and end of the available data in ringbuffer
  */
-int perf_mmap__read_init(struct perf_mmap *md)
+static int __perf_mmap__read_init(struct perf_mmap *md)
 {
 	u64 head = perf_mmap__read_head(md);
 	u64 old = md->prev;
@@ -268,6 +268,17 @@ int perf_mmap__read_init(struct perf_mmap *md)
 	return 0;
 }
 
+int perf_mmap__read_init(struct perf_mmap *map)
+{
+	/*
+	 * Check if event was unmapped due to a POLLHUP/POLLERR.
+	 */
+	if (!refcount_read(&map->refcnt))
+		return -ENOENT;
+
+	return __perf_mmap__read_init(map);
+}
+
 int perf_mmap__push(struct perf_mmap *md, void *to,
 		    int push(void *to, void *buf, size_t size))
 {
-- 
cgit v1.2.3


From cfbb9be8119dec38a2adefeb8fac526dd66a1d16 Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.vnet.ibm.com>
Date: Mon, 26 Mar 2018 10:25:34 +0200
Subject: perf vendor events s390: Add JSON files for IBM z10EC z10BC

Add CPU measurement counter facility event description files (JSON
files) for IBM z10EC and z10BC.

Signed-off-by: Thomas Richter <tmricht@linux.vnet.ibm.com>
Reviewed-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Link: http://lkml.kernel.org/r/20180326082538.2258-1-tmricht@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/pmu-events/arch/s390/cf_z10/basic.json  |  74 ++++++++++++++
 tools/perf/pmu-events/arch/s390/cf_z10/crypto.json |  98 ++++++++++++++++++
 .../perf/pmu-events/arch/s390/cf_z10/extended.json | 110 +++++++++++++++++++++
 tools/perf/pmu-events/arch/s390/mapfile.csv        |   2 +
 4 files changed, 284 insertions(+)
 create mode 100644 tools/perf/pmu-events/arch/s390/cf_z10/basic.json
 create mode 100644 tools/perf/pmu-events/arch/s390/cf_z10/crypto.json
 create mode 100644 tools/perf/pmu-events/arch/s390/cf_z10/extended.json
 create mode 100644 tools/perf/pmu-events/arch/s390/mapfile.csv

(limited to 'tools')

diff --git a/tools/perf/pmu-events/arch/s390/cf_z10/basic.json b/tools/perf/pmu-events/arch/s390/cf_z10/basic.json
new file mode 100644
index 000000000000..8bf16759ca53
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/cf_z10/basic.json
@@ -0,0 +1,74 @@
+[
+	{
+		"EventCode": "0",
+		"EventName": "CPU_CYCLES",
+		"BriefDescription": "CPU Cycles",
+		"PublicDescription": "Cycle Count"
+	},
+	{
+		"EventCode": "1",
+		"EventName": "INSTRUCTIONS",
+		"BriefDescription": "Instructions",
+		"PublicDescription": "Instruction Count"
+	},
+	{
+		"EventCode": "2",
+		"EventName": "L1I_DIR_WRITES",
+		"BriefDescription": "L1I Directory Writes",
+		"PublicDescription": "Level-1 I-Cache Directory Write Count"
+	},
+	{
+		"EventCode": "3",
+		"EventName": "L1I_PENALTY_CYCLES",
+		"BriefDescription": "L1I Penalty Cycles",
+		"PublicDescription": "Level-1 I-Cache Penalty Cycle Count"
+	},
+	{
+		"EventCode": "4",
+		"EventName": "L1D_DIR_WRITES",
+		"BriefDescription": "L1D Directory Writes",
+		"PublicDescription": "Level-1 D-Cache Directory Write Count"
+	},
+	{
+		"EventCode": "5",
+		"EventName": "L1D_PENALTY_CYCLES",
+		"BriefDescription": "L1D Penalty Cycles",
+		"PublicDescription": "Level-1 D-Cache Penalty Cycle Count"
+	},
+	{
+		"EventCode": "32",
+		"EventName": "PROBLEM_STATE_CPU_CYCLES",
+		"BriefDescription": "Problem-State CPU Cycles",
+		"PublicDescription": "Problem-State Cycle Count"
+	},
+	{
+		"EventCode": "33",
+		"EventName": "PROBLEM_STATE_INSTRUCTIONS",
+		"BriefDescription": "Problem-State Instructions",
+		"PublicDescription": "Problem-State Instruction Count"
+	},
+	{
+		"EventCode": "34",
+		"EventName": "PROBLEM_STATE_L1I_DIR_WRITES",
+		"BriefDescription": "Problem-State L1I Directory Writes",
+		"PublicDescription": "Problem-State Level-1 I-Cache Directory Write Count"
+	},
+	{
+		"EventCode": "35",
+		"EventName": "PROBLEM_STATE_L1I_PENALTY_CYCLES",
+		"BriefDescription": "Problem-State L1I Penalty Cycles",
+		"PublicDescription": "Problem-State Level-1 I-Cache Penalty Cycle Count"
+	},
+	{
+		"EventCode": "36",
+		"EventName": "PROBLEM_STATE_L1D_DIR_WRITES",
+		"BriefDescription": "Problem-State L1D Directory Writes",
+		"PublicDescription": "Problem-State Level-1 D-Cache Directory Write Count"
+	},
+	{
+		"EventCode": "37",
+		"EventName": "PROBLEM_STATE_L1D_PENALTY_CYCLES",
+		"BriefDescription": "Problem-State L1D Penalty Cycles",
+		"PublicDescription": "Problem-State Level-1 D-Cache Penalty Cycle Count"
+	},
+]
diff --git a/tools/perf/pmu-events/arch/s390/cf_z10/crypto.json b/tools/perf/pmu-events/arch/s390/cf_z10/crypto.json
new file mode 100644
index 000000000000..7e5b72492141
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/cf_z10/crypto.json
@@ -0,0 +1,98 @@
+[
+	{
+		"EventCode": "64",
+		"EventName": "PRNG_FUNCTIONS",
+		"BriefDescription": "PRNG Functions",
+		"PublicDescription": "Total number of the PRNG functions issued by the CPU"
+	},
+	{
+		"EventCode": "65",
+		"EventName": "PRNG_CYCLES",
+		"BriefDescription": "PRNG Cycles",
+		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing PRNG functions issued by the CPU"
+	},
+	{
+		"EventCode": "66",
+		"EventName": "PRNG_BLOCKED_FUNCTIONS",
+		"BriefDescription": "PRNG Blocked Functions",
+		"PublicDescription": "Total number of the PRNG functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "67",
+		"EventName": "PRNG_BLOCKED_CYCLES",
+		"BriefDescription": "PRNG Blocked Cycles",
+		"PublicDescription": "Total number of CPU cycles blocked for the PRNG functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "68",
+		"EventName": "SHA_FUNCTIONS",
+		"BriefDescription": "SHA Functions",
+		"PublicDescription": "Total number of SHA functions issued by the CPU"
+	},
+	{
+		"EventCode": "69",
+		"EventName": "SHA_CYCLES",
+		"BriefDescription": "SHA Cycles",
+		"PublicDescription": "Total number of CPU cycles when the SHA coprocessor is busy performing the SHA functions issued by the CPU"
+	},
+	{
+		"EventCode": "70",
+		"EventName": "SHA_BLOCKED_FUNCTIONS",
+		"BriefDescription": "SHA Blocked Functions",
+		"PublicDescription": "Total number of the SHA functions that are issued by the CPU and are blocked because the SHA coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "71",
+		"EventName": "SHA_BLOCKED_CYCLES",
+		"BriefDescription": "SHA Bloced Cycles",
+		"PublicDescription": "Total number of CPU cycles blocked for the SHA functions issued by the CPU because the SHA coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "72",
+		"EventName": "DEA_FUNCTIONS",
+		"BriefDescription": "DEA Functions",
+		"PublicDescription": "Total number of the DEA functions issued by the CPU"
+	},
+	{
+		"EventCode": "73",
+		"EventName": "DEA_CYCLES",
+		"BriefDescription": "DEA Cycles",
+		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the DEA functions issued by the CPU"
+	},
+	{
+		"EventCode": "74",
+		"EventName": "DEA_BLOCKED_FUNCTIONS",
+		"BriefDescription": "DEA Blocked Functions",
+		"PublicDescription": "Total number of the DEA functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "75",
+		"EventName": "DEA_BLOCKED_CYCLES",
+		"BriefDescription": "DEA Blocked Cycles",
+		"PublicDescription": "Total number of CPU cycles blocked for the DEA functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "76",
+		"EventName": "AES_FUNCTIONS",
+		"BriefDescription": "AES Functions",
+		"PublicDescription": "Total number of AES functions issued by the CPU"
+	},
+	{
+		"EventCode": "77",
+		"EventName": "AES_CYCLES",
+		"BriefDescription": "AES Cycles",
+		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the AES functions issued by the CPU"
+	},
+	{
+		"EventCode": "78",
+		"EventName": "AES_BLOCKED_FUNCTIONS",
+		"BriefDescription": "AES Blocked Functions",
+		"PublicDescription": "Total number of AES functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "79",
+		"EventName": "AES_BLOCKED_CYCLES",
+		"BriefDescription": "AES Blocked Cycles",
+		"PublicDescription": "Total number of CPU cycles blocked for the AES functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+]
diff --git a/tools/perf/pmu-events/arch/s390/cf_z10/extended.json b/tools/perf/pmu-events/arch/s390/cf_z10/extended.json
new file mode 100644
index 000000000000..0feedb40f30f
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/cf_z10/extended.json
@@ -0,0 +1,110 @@
+[
+	{
+		"EventCode": "128",
+		"EventName": "L1I_L2_SOURCED_WRITES",
+		"BriefDescription": "L1I L2 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 I-Cache directory where the returned cache line was sourced from the Level-2 (L1.5) cache"
+	},
+	{
+		"EventCode": "129",
+		"EventName": "L1D_L2_SOURCED_WRITES",
+		"BriefDescription": "L1D L2 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 D-Cache directory where the installed cache line was sourced from the Level-2 (L1.5) cache"
+	},
+	{
+		"EventCode": "130",
+		"EventName": "L1I_L3_LOCAL_WRITES",
+		"BriefDescription": "L1I L3 Local Writes",
+		"PublicDescription": "A directory write to the Level-1 I-Cache directory where the installed cache line was sourced from the Level-3 cache that is on the same book as the Instruction cache (Local L2 cache)"
+	},
+	{
+		"EventCode": "131",
+		"EventName": "L1D_L3_LOCAL_WRITES",
+		"BriefDescription": "L1D L3 Local Writes",
+		"PublicDescription": "A directory write to the Level-1 D-Cache directory where the installtion cache line was source from the Level-3 cache that is on the same book as the Data cache (Local L2 cache)"
+	},
+	{
+		"EventCode": "132",
+		"EventName": "L1I_L3_REMOTE_WRITES",
+		"BriefDescription": "L1I L3 Remote Writes",
+		"PublicDescription": "A directory write to the Level-1 I-Cache directory where the installed cache line was sourced from a Level-3 cache that is not on the same book as the Instruction cache (Remote L2 cache)"
+	},
+	{
+		"EventCode": "133",
+		"EventName": "L1D_L3_REMOTE_WRITES",
+		"BriefDescription": "L1D L3 Remote Writes",
+		"PublicDescription": "A directory write to the Level-1 D-Cache directory where the installed cache line was sourced from a Level-3 cache that is not on the same book as the Data cache (Remote L2 cache)"
+	},
+	{
+		"EventCode": "134",
+		"EventName": "L1D_LMEM_SOURCED_WRITES",
+		"BriefDescription": "L1D Local Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 D-Cache directory where the installed cache line was sourced from memory that is attached to the same book as the Data cache (Local Memory)"
+	},
+	{
+		"EventCode": "135",
+		"EventName": "L1I_LMEM_SOURCED_WRITES",
+		"BriefDescription": "L1I Local Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 I-Cache where the installed cache line was sourced from memory that is attached to the s ame book as the Instruction cache (Local Memory)"
+	},
+	{
+		"EventCode": "136",
+		"EventName": "L1D_RO_EXCL_WRITES",
+		"BriefDescription": "L1D Read-only Exclusive Writes",
+		"PublicDescription": "A directory write to the Level-1 D-Cache where the line was originally in a Read-Only state in the cache but has been updated to be in the Exclusive state that allows stores to the cache line"
+	},
+	{
+		"EventCode": "137",
+		"EventName": "L1I_CACHELINE_INVALIDATES",
+		"BriefDescription": "L1I Cacheline Invalidates",
+		"PublicDescription": "A cache line in the Level-1 I-Cache has been invalidated by a store on the same CPU as the Level-1 I-Cache"
+	},
+	{
+		"EventCode": "138",
+		"EventName": "ITLB1_WRITES",
+		"BriefDescription": "ITLB1 Writes",
+		"PublicDescription": "A translation entry has been written into the Level-1 Instruction Translation Lookaside Buffer"
+	},
+	{
+		"EventCode": "139",
+		"EventName": "DTLB1_WRITES",
+		"BriefDescription": "DTLB1 Writes",
+		"PublicDescription": "A translation entry has been written to the Level-1 Data Translation Lookaside Buffer"
+	},
+	{
+		"EventCode": "140",
+		"EventName": "TLB2_PTE_WRITES",
+		"BriefDescription": "TLB2 PTE Writes",
+		"PublicDescription": "A translation entry has been written to the Level-2 TLB Page Table Entry arrays"
+	},
+	{
+		"EventCode": "141",
+		"EventName": "TLB2_CRSTE_WRITES",
+		"BriefDescription": "TLB2 CRSTE Writes",
+		"PublicDescription": "A translation entry has been written to the Level-2 TLB Common Region Segment Table Entry arrays"
+	},
+	{
+		"EventCode": "142",
+		"EventName": "TLB2_CRSTE_HPAGE_WRITES",
+		"BriefDescription": "TLB2 CRSTE One-Megabyte Page Writes",
+		"PublicDescription": "A translation entry has been written to the Level-2 TLB Common Region Segment Table Entry arrays for a one-megabyte large page translation"
+	},
+	{
+		"EventCode": "145",
+		"EventName": "ITLB1_MISSES",
+		"BriefDescription": "ITLB1 Misses",
+		"PublicDescription": "Level-1 Instruction TLB miss in progress. Incremented by one for every cycle an ITLB1 miss is in progress"
+	},
+	{
+		"EventCode": "146",
+		"EventName": "DTLB1_MISSES",
+		"BriefDescription": "DTLB1 Misses",
+		"PublicDescription": "Level-1 Data TLB miss in progress. Incremented by one for every cycle an DTLB1 miss is in progress"
+	},
+	{
+		"EventCode": "147",
+		"EventName": "L2C_STORES_SENT",
+		"BriefDescription": "L2C Stores Sent",
+		"PublicDescription": "Incremented by one for every store sent to Level-2 (L1.5) cache"
+	},
+]
diff --git a/tools/perf/pmu-events/arch/s390/mapfile.csv b/tools/perf/pmu-events/arch/s390/mapfile.csv
new file mode 100644
index 000000000000..735159593c2c
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/mapfile.csv
@@ -0,0 +1,2 @@
+Family-model,Version,Filename,EventType
+209[78],1,cf_z10,core
-- 
cgit v1.2.3


From 0a73d21e9bdf43124241c3253dadc5044e239647 Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.vnet.ibm.com>
Date: Mon, 26 Mar 2018 10:25:35 +0200
Subject: perf vendor events s390: Add JSON files for IBM z196

Add CPU measurement counter facility event description files (json
files) for IBM z196.

Signed-off-by: Thomas Richter <tmricht@linux.vnet.ibm.com>
Reviewed-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Link: http://lkml.kernel.org/r/20180326082538.2258-2-tmricht@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/pmu-events/arch/s390/cf_z196/basic.json |  74 +++++++++++
 .../perf/pmu-events/arch/s390/cf_z196/crypto.json  |  98 ++++++++++++++
 .../pmu-events/arch/s390/cf_z196/extended.json     | 146 +++++++++++++++++++++
 tools/perf/pmu-events/arch/s390/mapfile.csv        |   1 +
 4 files changed, 319 insertions(+)
 create mode 100644 tools/perf/pmu-events/arch/s390/cf_z196/basic.json
 create mode 100644 tools/perf/pmu-events/arch/s390/cf_z196/crypto.json
 create mode 100644 tools/perf/pmu-events/arch/s390/cf_z196/extended.json

(limited to 'tools')

diff --git a/tools/perf/pmu-events/arch/s390/cf_z196/basic.json b/tools/perf/pmu-events/arch/s390/cf_z196/basic.json
new file mode 100644
index 000000000000..8bf16759ca53
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/cf_z196/basic.json
@@ -0,0 +1,74 @@
+[
+	{
+		"EventCode": "0",
+		"EventName": "CPU_CYCLES",
+		"BriefDescription": "CPU Cycles",
+		"PublicDescription": "Cycle Count"
+	},
+	{
+		"EventCode": "1",
+		"EventName": "INSTRUCTIONS",
+		"BriefDescription": "Instructions",
+		"PublicDescription": "Instruction Count"
+	},
+	{
+		"EventCode": "2",
+		"EventName": "L1I_DIR_WRITES",
+		"BriefDescription": "L1I Directory Writes",
+		"PublicDescription": "Level-1 I-Cache Directory Write Count"
+	},
+	{
+		"EventCode": "3",
+		"EventName": "L1I_PENALTY_CYCLES",
+		"BriefDescription": "L1I Penalty Cycles",
+		"PublicDescription": "Level-1 I-Cache Penalty Cycle Count"
+	},
+	{
+		"EventCode": "4",
+		"EventName": "L1D_DIR_WRITES",
+		"BriefDescription": "L1D Directory Writes",
+		"PublicDescription": "Level-1 D-Cache Directory Write Count"
+	},
+	{
+		"EventCode": "5",
+		"EventName": "L1D_PENALTY_CYCLES",
+		"BriefDescription": "L1D Penalty Cycles",
+		"PublicDescription": "Level-1 D-Cache Penalty Cycle Count"
+	},
+	{
+		"EventCode": "32",
+		"EventName": "PROBLEM_STATE_CPU_CYCLES",
+		"BriefDescription": "Problem-State CPU Cycles",
+		"PublicDescription": "Problem-State Cycle Count"
+	},
+	{
+		"EventCode": "33",
+		"EventName": "PROBLEM_STATE_INSTRUCTIONS",
+		"BriefDescription": "Problem-State Instructions",
+		"PublicDescription": "Problem-State Instruction Count"
+	},
+	{
+		"EventCode": "34",
+		"EventName": "PROBLEM_STATE_L1I_DIR_WRITES",
+		"BriefDescription": "Problem-State L1I Directory Writes",
+		"PublicDescription": "Problem-State Level-1 I-Cache Directory Write Count"
+	},
+	{
+		"EventCode": "35",
+		"EventName": "PROBLEM_STATE_L1I_PENALTY_CYCLES",
+		"BriefDescription": "Problem-State L1I Penalty Cycles",
+		"PublicDescription": "Problem-State Level-1 I-Cache Penalty Cycle Count"
+	},
+	{
+		"EventCode": "36",
+		"EventName": "PROBLEM_STATE_L1D_DIR_WRITES",
+		"BriefDescription": "Problem-State L1D Directory Writes",
+		"PublicDescription": "Problem-State Level-1 D-Cache Directory Write Count"
+	},
+	{
+		"EventCode": "37",
+		"EventName": "PROBLEM_STATE_L1D_PENALTY_CYCLES",
+		"BriefDescription": "Problem-State L1D Penalty Cycles",
+		"PublicDescription": "Problem-State Level-1 D-Cache Penalty Cycle Count"
+	},
+]
diff --git a/tools/perf/pmu-events/arch/s390/cf_z196/crypto.json b/tools/perf/pmu-events/arch/s390/cf_z196/crypto.json
new file mode 100644
index 000000000000..7e5b72492141
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/cf_z196/crypto.json
@@ -0,0 +1,98 @@
+[
+	{
+		"EventCode": "64",
+		"EventName": "PRNG_FUNCTIONS",
+		"BriefDescription": "PRNG Functions",
+		"PublicDescription": "Total number of the PRNG functions issued by the CPU"
+	},
+	{
+		"EventCode": "65",
+		"EventName": "PRNG_CYCLES",
+		"BriefDescription": "PRNG Cycles",
+		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing PRNG functions issued by the CPU"
+	},
+	{
+		"EventCode": "66",
+		"EventName": "PRNG_BLOCKED_FUNCTIONS",
+		"BriefDescription": "PRNG Blocked Functions",
+		"PublicDescription": "Total number of the PRNG functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "67",
+		"EventName": "PRNG_BLOCKED_CYCLES",
+		"BriefDescription": "PRNG Blocked Cycles",
+		"PublicDescription": "Total number of CPU cycles blocked for the PRNG functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "68",
+		"EventName": "SHA_FUNCTIONS",
+		"BriefDescription": "SHA Functions",
+		"PublicDescription": "Total number of SHA functions issued by the CPU"
+	},
+	{
+		"EventCode": "69",
+		"EventName": "SHA_CYCLES",
+		"BriefDescription": "SHA Cycles",
+		"PublicDescription": "Total number of CPU cycles when the SHA coprocessor is busy performing the SHA functions issued by the CPU"
+	},
+	{
+		"EventCode": "70",
+		"EventName": "SHA_BLOCKED_FUNCTIONS",
+		"BriefDescription": "SHA Blocked Functions",
+		"PublicDescription": "Total number of the SHA functions that are issued by the CPU and are blocked because the SHA coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "71",
+		"EventName": "SHA_BLOCKED_CYCLES",
+		"BriefDescription": "SHA Bloced Cycles",
+		"PublicDescription": "Total number of CPU cycles blocked for the SHA functions issued by the CPU because the SHA coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "72",
+		"EventName": "DEA_FUNCTIONS",
+		"BriefDescription": "DEA Functions",
+		"PublicDescription": "Total number of the DEA functions issued by the CPU"
+	},
+	{
+		"EventCode": "73",
+		"EventName": "DEA_CYCLES",
+		"BriefDescription": "DEA Cycles",
+		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the DEA functions issued by the CPU"
+	},
+	{
+		"EventCode": "74",
+		"EventName": "DEA_BLOCKED_FUNCTIONS",
+		"BriefDescription": "DEA Blocked Functions",
+		"PublicDescription": "Total number of the DEA functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "75",
+		"EventName": "DEA_BLOCKED_CYCLES",
+		"BriefDescription": "DEA Blocked Cycles",
+		"PublicDescription": "Total number of CPU cycles blocked for the DEA functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "76",
+		"EventName": "AES_FUNCTIONS",
+		"BriefDescription": "AES Functions",
+		"PublicDescription": "Total number of AES functions issued by the CPU"
+	},
+	{
+		"EventCode": "77",
+		"EventName": "AES_CYCLES",
+		"BriefDescription": "AES Cycles",
+		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the AES functions issued by the CPU"
+	},
+	{
+		"EventCode": "78",
+		"EventName": "AES_BLOCKED_FUNCTIONS",
+		"BriefDescription": "AES Blocked Functions",
+		"PublicDescription": "Total number of AES functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "79",
+		"EventName": "AES_BLOCKED_CYCLES",
+		"BriefDescription": "AES Blocked Cycles",
+		"PublicDescription": "Total number of CPU cycles blocked for the AES functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+]
diff --git a/tools/perf/pmu-events/arch/s390/cf_z196/extended.json b/tools/perf/pmu-events/arch/s390/cf_z196/extended.json
new file mode 100644
index 000000000000..b6d7fec7c2e7
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/cf_z196/extended.json
@@ -0,0 +1,146 @@
+[
+	{
+		"EventCode": "128",
+		"EventName": "L1D_L2_SOURCED_WRITES",
+		"BriefDescription": "L1D L2 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 D-Cache directory where the returned cache line was sourced from the Level-2 cache"
+	},
+	{
+		"EventCode": "129",
+		"EventName": "L1I_L2_SOURCED_WRITES",
+		"BriefDescription": "L1I L2 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 I-Cache directory where the returned cache line was sourced from the Level-2 cache"
+	},
+	{
+		"EventCode": "130",
+		"EventName": "DTLB1_MISSES",
+		"BriefDescription": "DTLB1 Misses",
+		"PublicDescription": "Level-1 Data TLB miss in progress. Incremented by one for every cycle a DTLB1 miss is in progress."
+	},
+	{
+		"EventCode": "131",
+		"EventName": "ITLB1_MISSES",
+		"BriefDescription": "ITLB1 Misses",
+		"PublicDescription": "Level-1 Instruction TLB miss in progress. Incremented by one for every cycle a ITLB1 miss is in progress."
+	},
+	{
+		"EventCode": "133",
+		"EventName": "L2C_STORES_SENT",
+		"BriefDescription": "L2C Stores Sent",
+		"PublicDescription": "Incremented by one for every store sent to Level-2 cache"
+	},
+	{
+		"EventCode": "134",
+		"EventName": "L1D_OFFBOOK_L3_SOURCED_WRITES",
+		"BriefDescription": "L1D Off-Book L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 D-Cache directory where the returned cache line was sourced from an Off Book Level-3 cache"
+	},
+	{
+		"EventCode": "135",
+		"EventName": "L1D_ONBOOK_L4_SOURCED_WRITES",
+		"BriefDescription": "L1D On-Book L4 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 D-Cache directory where the returned cache line was sourced from an On Book Level-4 cache"
+	},
+	{
+		"EventCode": "136",
+		"EventName": "L1I_ONBOOK_L4_SOURCED_WRITES",
+		"BriefDescription": "L1I On-Book L4 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 I-Cache directory where the returned cache line was sourced from an On Book Level-4 cache"
+	},
+	{
+		"EventCode": "137",
+		"EventName": "L1D_RO_EXCL_WRITES",
+		"BriefDescription": "L1D Read-only Exclusive Writes",
+		"PublicDescription": "A directory write to the Level-1 D-Cache where the line was originally in a Read-Only state in the cache but has been updated to be in the Exclusive state that allows stores to the cache line"
+	},
+	{
+		"EventCode": "138",
+		"EventName": "L1D_OFFBOOK_L4_SOURCED_WRITES",
+		"BriefDescription": "L1D Off-Book L4 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 D-Cache directory where the returned cache line was sourced from an Off Book Level-4 cache"
+	},
+	{
+		"EventCode": "139",
+		"EventName": "L1I_OFFBOOK_L4_SOURCED_WRITES",
+		"BriefDescription": "L1I Off-Book L4 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 I-Cache directory where the returned cache line was sourced from an Off Book Level-4 cache"
+	},
+	{
+		"EventCode": "140",
+		"EventName": "DTLB1_HPAGE_WRITES",
+		"BriefDescription": "DTLB1 One-Megabyte Page Writes",
+		"PublicDescription": "A translation entry has been written to the Level-1 Data Translation Lookaside Buffer for a one-megabyte page"
+	},
+	{
+		"EventCode": "141",
+		"EventName": "L1D_LMEM_SOURCED_WRITES",
+		"BriefDescription": "L1D Local Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 D-Cache where the installed cache line was sourced from memory that is attached to the same book as the Data cache (Local Memory)"
+	},
+	{
+		"EventCode": "142",
+		"EventName": "L1I_LMEM_SOURCED_WRITES",
+		"BriefDescription": "L1I Local Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 I-Cache where the installed cache line was sourced from memory that is attached to the same book as the Instruction cache (Local Memory)"
+	},
+	{
+		"EventCode": "143",
+		"EventName": "L1I_OFFBOOK_L3_SOURCED_WRITES",
+		"BriefDescription": "L1I Off-Book L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 I-Cache directory where the returned cache line was sourced from an Off Book Level-3 cache"
+	},
+	{
+		"EventCode": "144",
+		"EventName": "DTLB1_WRITES",
+		"BriefDescription": "DTLB1 Writes",
+		"PublicDescription": "A translation entry has been written to the Level-1 Data Translation Lookaside Buffer"
+	},
+	{
+		"EventCode": "145",
+		"EventName": "ITLB1_WRITES",
+		"BriefDescription": "ITLB1 Writes",
+		"PublicDescription": "A translation entry has been written to the Level-1 Instruction Translation Lookaside Buffer"
+	},
+	{
+		"EventCode": "146",
+		"EventName": "TLB2_PTE_WRITES",
+		"BriefDescription": "TLB2 PTE Writes",
+		"PublicDescription": "A translation entry has been written to the Level-2 TLB Page Table Entry arrays"
+	},
+	{
+		"EventCode": "147",
+		"EventName": "TLB2_CRSTE_HPAGE_WRITES",
+		"BriefDescription": "TLB2 CRSTE One-Megabyte Page Writes",
+		"PublicDescription": "A translation entry has been written to the Level-2 TLB Common Region Segment Table Entry arrays for a one-megabyte large page translation"
+	},
+	{
+		"EventCode": "148",
+		"EventName": "TLB2_CRSTE_WRITES",
+		"BriefDescription": "TLB2 CRSTE Writes",
+		"PublicDescription": "A translation entry has been written to the Level-2 TLB Common Region Segment Table Entry arrays"
+	},
+	{
+		"EventCode": "150",
+		"EventName": "L1D_ONCHIP_L3_SOURCED_WRITES",
+		"BriefDescription": "L1D On-Chip L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 D-Cache directory where the returned cache line was sourced from an On Chip Level-3 cache"
+	},
+	{
+		"EventCode": "152",
+		"EventName": "L1D_OFFCHIP_L3_SOURCED_WRITES",
+		"BriefDescription": "L1D Off-Chip L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 D-Cache directory where the returned cache line was sourced from an Off Chip/On Book Level-3 cache"
+	},
+	{
+		"EventCode": "153",
+		"EventName": "L1I_ONCHIP_L3_SOURCED_WRITES",
+		"BriefDescription": "L1I On-Chip L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 I-Cache directory where the returned cache line was sourced from an On Chip Level-3 cache"
+	},
+	{
+		"EventCode": "155",
+		"EventName": "L1I_OFFCHIP_L3_SOURCED_WRITES",
+		"BriefDescription": "L1I Off-Chip L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 I-Cache directory where the returned cache line was sourced from an Off Chip/On Book Level-3 cache"
+	},
+]
diff --git a/tools/perf/pmu-events/arch/s390/mapfile.csv b/tools/perf/pmu-events/arch/s390/mapfile.csv
index 735159593c2c..b9c673087011 100644
--- a/tools/perf/pmu-events/arch/s390/mapfile.csv
+++ b/tools/perf/pmu-events/arch/s390/mapfile.csv
@@ -1,2 +1,3 @@
 Family-model,Version,Filename,EventType
 209[78],1,cf_z10,core
+281[78],1,cf_z196,core
-- 
cgit v1.2.3


From 3fb1a23155e91bd00281425041ec2381e435dcc2 Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.vnet.ibm.com>
Date: Mon, 26 Mar 2018 10:25:36 +0200
Subject: perf vendor events s390: Add JSON files for IBM zEC12 zBC12

Add CPU measurement counter facility event description files (json
files) for IBM zEC12 and zBC12.

Signed-off-by: Thomas Richter <tmricht@linux.vnet.ibm.com>
Reviewed-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Link: http://lkml.kernel.org/r/20180326082538.2258-3-tmricht@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../perf/pmu-events/arch/s390/cf_zec12/basic.json  |  74 +++++++
 .../perf/pmu-events/arch/s390/cf_zec12/crypto.json |  98 ++++++++++
 .../pmu-events/arch/s390/cf_zec12/extended.json    | 212 +++++++++++++++++++++
 tools/perf/pmu-events/arch/s390/mapfile.csv        |   1 +
 4 files changed, 385 insertions(+)
 create mode 100644 tools/perf/pmu-events/arch/s390/cf_zec12/basic.json
 create mode 100644 tools/perf/pmu-events/arch/s390/cf_zec12/crypto.json
 create mode 100644 tools/perf/pmu-events/arch/s390/cf_zec12/extended.json

(limited to 'tools')

diff --git a/tools/perf/pmu-events/arch/s390/cf_zec12/basic.json b/tools/perf/pmu-events/arch/s390/cf_zec12/basic.json
new file mode 100644
index 000000000000..8bf16759ca53
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/cf_zec12/basic.json
@@ -0,0 +1,74 @@
+[
+	{
+		"EventCode": "0",
+		"EventName": "CPU_CYCLES",
+		"BriefDescription": "CPU Cycles",
+		"PublicDescription": "Cycle Count"
+	},
+	{
+		"EventCode": "1",
+		"EventName": "INSTRUCTIONS",
+		"BriefDescription": "Instructions",
+		"PublicDescription": "Instruction Count"
+	},
+	{
+		"EventCode": "2",
+		"EventName": "L1I_DIR_WRITES",
+		"BriefDescription": "L1I Directory Writes",
+		"PublicDescription": "Level-1 I-Cache Directory Write Count"
+	},
+	{
+		"EventCode": "3",
+		"EventName": "L1I_PENALTY_CYCLES",
+		"BriefDescription": "L1I Penalty Cycles",
+		"PublicDescription": "Level-1 I-Cache Penalty Cycle Count"
+	},
+	{
+		"EventCode": "4",
+		"EventName": "L1D_DIR_WRITES",
+		"BriefDescription": "L1D Directory Writes",
+		"PublicDescription": "Level-1 D-Cache Directory Write Count"
+	},
+	{
+		"EventCode": "5",
+		"EventName": "L1D_PENALTY_CYCLES",
+		"BriefDescription": "L1D Penalty Cycles",
+		"PublicDescription": "Level-1 D-Cache Penalty Cycle Count"
+	},
+	{
+		"EventCode": "32",
+		"EventName": "PROBLEM_STATE_CPU_CYCLES",
+		"BriefDescription": "Problem-State CPU Cycles",
+		"PublicDescription": "Problem-State Cycle Count"
+	},
+	{
+		"EventCode": "33",
+		"EventName": "PROBLEM_STATE_INSTRUCTIONS",
+		"BriefDescription": "Problem-State Instructions",
+		"PublicDescription": "Problem-State Instruction Count"
+	},
+	{
+		"EventCode": "34",
+		"EventName": "PROBLEM_STATE_L1I_DIR_WRITES",
+		"BriefDescription": "Problem-State L1I Directory Writes",
+		"PublicDescription": "Problem-State Level-1 I-Cache Directory Write Count"
+	},
+	{
+		"EventCode": "35",
+		"EventName": "PROBLEM_STATE_L1I_PENALTY_CYCLES",
+		"BriefDescription": "Problem-State L1I Penalty Cycles",
+		"PublicDescription": "Problem-State Level-1 I-Cache Penalty Cycle Count"
+	},
+	{
+		"EventCode": "36",
+		"EventName": "PROBLEM_STATE_L1D_DIR_WRITES",
+		"BriefDescription": "Problem-State L1D Directory Writes",
+		"PublicDescription": "Problem-State Level-1 D-Cache Directory Write Count"
+	},
+	{
+		"EventCode": "37",
+		"EventName": "PROBLEM_STATE_L1D_PENALTY_CYCLES",
+		"BriefDescription": "Problem-State L1D Penalty Cycles",
+		"PublicDescription": "Problem-State Level-1 D-Cache Penalty Cycle Count"
+	},
+]
diff --git a/tools/perf/pmu-events/arch/s390/cf_zec12/crypto.json b/tools/perf/pmu-events/arch/s390/cf_zec12/crypto.json
new file mode 100644
index 000000000000..7e5b72492141
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/cf_zec12/crypto.json
@@ -0,0 +1,98 @@
+[
+	{
+		"EventCode": "64",
+		"EventName": "PRNG_FUNCTIONS",
+		"BriefDescription": "PRNG Functions",
+		"PublicDescription": "Total number of the PRNG functions issued by the CPU"
+	},
+	{
+		"EventCode": "65",
+		"EventName": "PRNG_CYCLES",
+		"BriefDescription": "PRNG Cycles",
+		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing PRNG functions issued by the CPU"
+	},
+	{
+		"EventCode": "66",
+		"EventName": "PRNG_BLOCKED_FUNCTIONS",
+		"BriefDescription": "PRNG Blocked Functions",
+		"PublicDescription": "Total number of the PRNG functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "67",
+		"EventName": "PRNG_BLOCKED_CYCLES",
+		"BriefDescription": "PRNG Blocked Cycles",
+		"PublicDescription": "Total number of CPU cycles blocked for the PRNG functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "68",
+		"EventName": "SHA_FUNCTIONS",
+		"BriefDescription": "SHA Functions",
+		"PublicDescription": "Total number of SHA functions issued by the CPU"
+	},
+	{
+		"EventCode": "69",
+		"EventName": "SHA_CYCLES",
+		"BriefDescription": "SHA Cycles",
+		"PublicDescription": "Total number of CPU cycles when the SHA coprocessor is busy performing the SHA functions issued by the CPU"
+	},
+	{
+		"EventCode": "70",
+		"EventName": "SHA_BLOCKED_FUNCTIONS",
+		"BriefDescription": "SHA Blocked Functions",
+		"PublicDescription": "Total number of the SHA functions that are issued by the CPU and are blocked because the SHA coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "71",
+		"EventName": "SHA_BLOCKED_CYCLES",
+		"BriefDescription": "SHA Bloced Cycles",
+		"PublicDescription": "Total number of CPU cycles blocked for the SHA functions issued by the CPU because the SHA coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "72",
+		"EventName": "DEA_FUNCTIONS",
+		"BriefDescription": "DEA Functions",
+		"PublicDescription": "Total number of the DEA functions issued by the CPU"
+	},
+	{
+		"EventCode": "73",
+		"EventName": "DEA_CYCLES",
+		"BriefDescription": "DEA Cycles",
+		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the DEA functions issued by the CPU"
+	},
+	{
+		"EventCode": "74",
+		"EventName": "DEA_BLOCKED_FUNCTIONS",
+		"BriefDescription": "DEA Blocked Functions",
+		"PublicDescription": "Total number of the DEA functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "75",
+		"EventName": "DEA_BLOCKED_CYCLES",
+		"BriefDescription": "DEA Blocked Cycles",
+		"PublicDescription": "Total number of CPU cycles blocked for the DEA functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "76",
+		"EventName": "AES_FUNCTIONS",
+		"BriefDescription": "AES Functions",
+		"PublicDescription": "Total number of AES functions issued by the CPU"
+	},
+	{
+		"EventCode": "77",
+		"EventName": "AES_CYCLES",
+		"BriefDescription": "AES Cycles",
+		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the AES functions issued by the CPU"
+	},
+	{
+		"EventCode": "78",
+		"EventName": "AES_BLOCKED_FUNCTIONS",
+		"BriefDescription": "AES Blocked Functions",
+		"PublicDescription": "Total number of AES functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "79",
+		"EventName": "AES_BLOCKED_CYCLES",
+		"BriefDescription": "AES Blocked Cycles",
+		"PublicDescription": "Total number of CPU cycles blocked for the AES functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+]
diff --git a/tools/perf/pmu-events/arch/s390/cf_zec12/extended.json b/tools/perf/pmu-events/arch/s390/cf_zec12/extended.json
new file mode 100644
index 000000000000..8682126aabb2
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/cf_zec12/extended.json
@@ -0,0 +1,212 @@
+[
+	{
+		"EventCode": "128",
+		"EventName": "DTLB1_MISSES",
+		"BriefDescription": "DTLB1 Misses",
+		"PublicDescription": "Level-1 Data TLB miss in progress. Incremented by one for every cycle a DTLB1 miss is in progress."
+	},
+	{
+		"EventCode": "129",
+		"EventName": "ITLB1_MISSES",
+		"BriefDescription": "ITLB1 Misses",
+		"PublicDescription": "Level-1 Instruction TLB miss in progress. Incremented by one for every cycle a ITLB1 miss is in progress."
+	},
+	{
+		"EventCode": "130",
+		"EventName": "L1D_L2I_SOURCED_WRITES",
+		"BriefDescription": "L1D L2I Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from the Level-2 Instruction cache"
+	},
+	{
+		"EventCode": "131",
+		"EventName": "L1I_L2I_SOURCED_WRITES",
+		"BriefDescription": "L1I L2I Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from the Level-2 Instruction cache"
+	},
+	{
+		"EventCode": "132",
+		"EventName": "L1D_L2D_SOURCED_WRITES",
+		"BriefDescription": "L1D L2D Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from the Level-2 Data cache"
+	},
+	{
+		"EventCode": "133",
+		"EventName": "DTLB1_WRITES",
+		"BriefDescription": "DTLB1 Writes",
+		"PublicDescription": "A translation entry has been written to the Level-1 Data Translation Lookaside Buffer"
+	},
+	{
+		"EventCode": "135",
+		"EventName": "L1D_LMEM_SOURCED_WRITES",
+		"BriefDescription": "L1D Local Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache where the installed cache line was sourced from memory that is attached to the same book as the Data cache (Local Memory)"
+	},
+	{
+		"EventCode": "137",
+		"EventName": "L1I_LMEM_SOURCED_WRITES",
+		"BriefDescription": "L1I Local Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache where the installed cache line was sourced from memory that is attached to the same book as the Instruction cache (Local Memory)"
+	},
+	{
+		"EventCode": "138",
+		"EventName": "L1D_RO_EXCL_WRITES",
+		"BriefDescription": "L1D Read-only Exclusive Writes",
+		"PublicDescription": "A directory write to the Level-1 D-Cache where the line was originally in a Read-Only state in the cache but has been updated to be in the Exclusive state that allows stores to the cache line"
+	},
+	{
+		"EventCode": "139",
+		"EventName": "DTLB1_HPAGE_WRITES",
+		"BriefDescription": "DTLB1 One-Megabyte Page Writes",
+		"PublicDescription": "A translation entry has been written to the Level-1 Data Translation Lookaside Buffer for a one-megabyte page"
+	},
+	{
+		"EventCode": "140",
+		"EventName": "ITLB1_WRITES",
+		"BriefDescription": "ITLB1 Writes",
+		"PublicDescription": "A translation entry has been written to the Level-1 Instruction Translation Lookaside Buffer"
+	},
+	{
+		"EventCode": "141",
+		"EventName": "TLB2_PTE_WRITES",
+		"BriefDescription": "TLB2 PTE Writes",
+		"PublicDescription": "A translation entry has been written to the Level-2 TLB Page Table Entry arrays"
+	},
+	{
+		"EventCode": "142",
+		"EventName": "TLB2_CRSTE_HPAGE_WRITES",
+		"BriefDescription": "TLB2 CRSTE One-Megabyte Page Writes",
+		"PublicDescription": "A translation entry has been written to the Level-2 TLB Common Region Segment Table Entry arrays for a one-megabyte large page translation"
+	},
+	{
+		"EventCode": "143",
+		"EventName": "TLB2_CRSTE_WRITES",
+		"BriefDescription": "TLB2 CRSTE Writes",
+		"PublicDescription": "A translation entry has been written to the Level-2 TLB Common Region Segment Table Entry arrays"
+	},
+	{
+		"EventCode": "144",
+		"EventName": "L1D_ONCHIP_L3_SOURCED_WRITES",
+		"BriefDescription": "L1D On-Chip L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On Chip Level-3 cache without intervention"
+	},
+	{
+		"EventCode": "145",
+		"EventName": "L1D_OFFCHIP_L3_SOURCED_WRITES",
+		"BriefDescription": "L1D Off-Chip L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off Chip/On Book Level-3 cache without intervention"
+	},
+	{
+		"EventCode": "146",
+		"EventName": "L1D_OFFBOOK_L3_SOURCED_WRITES",
+		"BriefDescription": "L1D Off-Book L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off Book Level-3 cache without intervention"
+	},
+	{
+		"EventCode": "147",
+		"EventName": "L1D_ONBOOK_L4_SOURCED_WRITES",
+		"BriefDescription": "L1D On-Book L4 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On Book Level-4 cache"
+	},
+	{
+		"EventCode": "148",
+		"EventName": "L1D_OFFBOOK_L4_SOURCED_WRITES",
+		"BriefDescription": "L1D Off-Book L4 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off Book Level-4 cache"
+	},
+	{
+		"EventCode": "149",
+		"EventName": "TX_NC_TEND",
+		"BriefDescription": "Completed TEND instructions in non-constrained TX mode",
+		"PublicDescription": "A TEND instruction has completed in a nonconstrained transactional-execution mode"
+	},
+	{
+		"EventCode": "150",
+		"EventName": "L1D_ONCHIP_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1D On-Chip L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from a On Chip Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "151",
+		"EventName": "L1D_OFFCHIP_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1D Off-Chip L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off Chip/On Book Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "152",
+		"EventName": "L1D_OFFBOOK_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1D Off-Book L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off Book Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "153",
+		"EventName": "L1I_ONCHIP_L3_SOURCED_WRITES",
+		"BriefDescription": "L1I On-Chip L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On Chip Level-3 cache without intervention"
+	},
+	{
+		"EventCode": "154",
+		"EventName": "L1I_OFFCHIP_L3_SOURCED_WRITES",
+		"BriefDescription": "L1I Off-Chip L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off Chip/On Book Level-3 cache without intervention"
+	},
+	{
+		"EventCode": "155",
+		"EventName": "L1I_OFFBOOK_L3_SOURCED_WRITES",
+		"BriefDescription": "L1I Off-Book L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off Book Level-3 cache without intervention"
+	},
+	{
+		"EventCode": "156",
+		"EventName": "L1I_ONBOOK_L4_SOURCED_WRITES",
+		"BriefDescription": "L1I On-Book L4 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On Book Level-4 cache"
+	},
+	{
+		"EventCode": "157",
+		"EventName": "L1I_OFFBOOK_L4_SOURCED_WRITES",
+		"BriefDescription": "L1I Off-Book L4 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off Book Level-4 cache"
+	},
+	{
+		"EventCode": "158",
+		"EventName": "TX_C_TEND",
+		"BriefDescription": "Completed TEND instructions in constrained TX mode",
+		"PublicDescription": "A TEND instruction has completed in a constrained transactional-execution mode"
+	},
+	{
+		"EventCode": "159",
+		"EventName": "L1I_ONCHIP_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1I On-Chip L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On Chip Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "160",
+		"EventName": "L1I_OFFCHIP_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1I Off-Chip L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off Chip/On Book Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "161",
+		"EventName": "L1I_OFFBOOK_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1I Off-Book L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off Book Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "177",
+		"EventName": "TX_NC_TABORT",
+		"BriefDescription": "Aborted transactions in non-constrained TX mode",
+		"PublicDescription": "A transaction abort has occurred in a nonconstrained transactional-execution mode"
+	},
+	{
+		"EventCode": "178",
+		"EventName": "TX_C_TABORT_NO_SPECIAL",
+		"BriefDescription": "Aborted transactions in constrained TX mode not using special completion logic",
+		"PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is not using any special logic to allow the transaction to complete"
+	},
+	{
+		"EventCode": "179",
+		"EventName": "TX_C_TABORT_SPECIAL",
+		"BriefDescription": "Aborted transactions in constrained TX mode using special completion logic",
+		"PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is using special logic to allow the transaction to complete"
+	},
+]
diff --git a/tools/perf/pmu-events/arch/s390/mapfile.csv b/tools/perf/pmu-events/arch/s390/mapfile.csv
index b9c673087011..c57f8e75fa23 100644
--- a/tools/perf/pmu-events/arch/s390/mapfile.csv
+++ b/tools/perf/pmu-events/arch/s390/mapfile.csv
@@ -1,3 +1,4 @@
 Family-model,Version,Filename,EventType
 209[78],1,cf_z10,core
 281[78],1,cf_z196,core
+282[78],1,cf_zec12,core
-- 
cgit v1.2.3


From bc17f949d6feb633e579ee7e7dd58d9200073215 Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.vnet.ibm.com>
Date: Mon, 26 Mar 2018 10:25:37 +0200
Subject: perf vendor events s390: Add JSON files for IBM z13

Add CPU measurement counter facility event description files (json
files) for IBM z13.

Signed-off-by: Thomas Richter <tmricht@linux.vnet.ibm.com>
Reviewed-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Link: http://lkml.kernel.org/r/20180326082538.2258-4-tmricht@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/pmu-events/arch/s390/cf_z13/basic.json  |  74 +++++
 tools/perf/pmu-events/arch/s390/cf_z13/crypto.json |  98 ++++++
 .../perf/pmu-events/arch/s390/cf_z13/extended.json | 338 +++++++++++++++++++++
 tools/perf/pmu-events/arch/s390/mapfile.csv        |   1 +
 4 files changed, 511 insertions(+)
 create mode 100644 tools/perf/pmu-events/arch/s390/cf_z13/basic.json
 create mode 100644 tools/perf/pmu-events/arch/s390/cf_z13/crypto.json
 create mode 100644 tools/perf/pmu-events/arch/s390/cf_z13/extended.json

(limited to 'tools')

diff --git a/tools/perf/pmu-events/arch/s390/cf_z13/basic.json b/tools/perf/pmu-events/arch/s390/cf_z13/basic.json
new file mode 100644
index 000000000000..8bf16759ca53
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/cf_z13/basic.json
@@ -0,0 +1,74 @@
+[
+	{
+		"EventCode": "0",
+		"EventName": "CPU_CYCLES",
+		"BriefDescription": "CPU Cycles",
+		"PublicDescription": "Cycle Count"
+	},
+	{
+		"EventCode": "1",
+		"EventName": "INSTRUCTIONS",
+		"BriefDescription": "Instructions",
+		"PublicDescription": "Instruction Count"
+	},
+	{
+		"EventCode": "2",
+		"EventName": "L1I_DIR_WRITES",
+		"BriefDescription": "L1I Directory Writes",
+		"PublicDescription": "Level-1 I-Cache Directory Write Count"
+	},
+	{
+		"EventCode": "3",
+		"EventName": "L1I_PENALTY_CYCLES",
+		"BriefDescription": "L1I Penalty Cycles",
+		"PublicDescription": "Level-1 I-Cache Penalty Cycle Count"
+	},
+	{
+		"EventCode": "4",
+		"EventName": "L1D_DIR_WRITES",
+		"BriefDescription": "L1D Directory Writes",
+		"PublicDescription": "Level-1 D-Cache Directory Write Count"
+	},
+	{
+		"EventCode": "5",
+		"EventName": "L1D_PENALTY_CYCLES",
+		"BriefDescription": "L1D Penalty Cycles",
+		"PublicDescription": "Level-1 D-Cache Penalty Cycle Count"
+	},
+	{
+		"EventCode": "32",
+		"EventName": "PROBLEM_STATE_CPU_CYCLES",
+		"BriefDescription": "Problem-State CPU Cycles",
+		"PublicDescription": "Problem-State Cycle Count"
+	},
+	{
+		"EventCode": "33",
+		"EventName": "PROBLEM_STATE_INSTRUCTIONS",
+		"BriefDescription": "Problem-State Instructions",
+		"PublicDescription": "Problem-State Instruction Count"
+	},
+	{
+		"EventCode": "34",
+		"EventName": "PROBLEM_STATE_L1I_DIR_WRITES",
+		"BriefDescription": "Problem-State L1I Directory Writes",
+		"PublicDescription": "Problem-State Level-1 I-Cache Directory Write Count"
+	},
+	{
+		"EventCode": "35",
+		"EventName": "PROBLEM_STATE_L1I_PENALTY_CYCLES",
+		"BriefDescription": "Problem-State L1I Penalty Cycles",
+		"PublicDescription": "Problem-State Level-1 I-Cache Penalty Cycle Count"
+	},
+	{
+		"EventCode": "36",
+		"EventName": "PROBLEM_STATE_L1D_DIR_WRITES",
+		"BriefDescription": "Problem-State L1D Directory Writes",
+		"PublicDescription": "Problem-State Level-1 D-Cache Directory Write Count"
+	},
+	{
+		"EventCode": "37",
+		"EventName": "PROBLEM_STATE_L1D_PENALTY_CYCLES",
+		"BriefDescription": "Problem-State L1D Penalty Cycles",
+		"PublicDescription": "Problem-State Level-1 D-Cache Penalty Cycle Count"
+	},
+]
diff --git a/tools/perf/pmu-events/arch/s390/cf_z13/crypto.json b/tools/perf/pmu-events/arch/s390/cf_z13/crypto.json
new file mode 100644
index 000000000000..7e5b72492141
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/cf_z13/crypto.json
@@ -0,0 +1,98 @@
+[
+	{
+		"EventCode": "64",
+		"EventName": "PRNG_FUNCTIONS",
+		"BriefDescription": "PRNG Functions",
+		"PublicDescription": "Total number of the PRNG functions issued by the CPU"
+	},
+	{
+		"EventCode": "65",
+		"EventName": "PRNG_CYCLES",
+		"BriefDescription": "PRNG Cycles",
+		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing PRNG functions issued by the CPU"
+	},
+	{
+		"EventCode": "66",
+		"EventName": "PRNG_BLOCKED_FUNCTIONS",
+		"BriefDescription": "PRNG Blocked Functions",
+		"PublicDescription": "Total number of the PRNG functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "67",
+		"EventName": "PRNG_BLOCKED_CYCLES",
+		"BriefDescription": "PRNG Blocked Cycles",
+		"PublicDescription": "Total number of CPU cycles blocked for the PRNG functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "68",
+		"EventName": "SHA_FUNCTIONS",
+		"BriefDescription": "SHA Functions",
+		"PublicDescription": "Total number of SHA functions issued by the CPU"
+	},
+	{
+		"EventCode": "69",
+		"EventName": "SHA_CYCLES",
+		"BriefDescription": "SHA Cycles",
+		"PublicDescription": "Total number of CPU cycles when the SHA coprocessor is busy performing the SHA functions issued by the CPU"
+	},
+	{
+		"EventCode": "70",
+		"EventName": "SHA_BLOCKED_FUNCTIONS",
+		"BriefDescription": "SHA Blocked Functions",
+		"PublicDescription": "Total number of the SHA functions that are issued by the CPU and are blocked because the SHA coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "71",
+		"EventName": "SHA_BLOCKED_CYCLES",
+		"BriefDescription": "SHA Bloced Cycles",
+		"PublicDescription": "Total number of CPU cycles blocked for the SHA functions issued by the CPU because the SHA coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "72",
+		"EventName": "DEA_FUNCTIONS",
+		"BriefDescription": "DEA Functions",
+		"PublicDescription": "Total number of the DEA functions issued by the CPU"
+	},
+	{
+		"EventCode": "73",
+		"EventName": "DEA_CYCLES",
+		"BriefDescription": "DEA Cycles",
+		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the DEA functions issued by the CPU"
+	},
+	{
+		"EventCode": "74",
+		"EventName": "DEA_BLOCKED_FUNCTIONS",
+		"BriefDescription": "DEA Blocked Functions",
+		"PublicDescription": "Total number of the DEA functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "75",
+		"EventName": "DEA_BLOCKED_CYCLES",
+		"BriefDescription": "DEA Blocked Cycles",
+		"PublicDescription": "Total number of CPU cycles blocked for the DEA functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "76",
+		"EventName": "AES_FUNCTIONS",
+		"BriefDescription": "AES Functions",
+		"PublicDescription": "Total number of AES functions issued by the CPU"
+	},
+	{
+		"EventCode": "77",
+		"EventName": "AES_CYCLES",
+		"BriefDescription": "AES Cycles",
+		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the AES functions issued by the CPU"
+	},
+	{
+		"EventCode": "78",
+		"EventName": "AES_BLOCKED_FUNCTIONS",
+		"BriefDescription": "AES Blocked Functions",
+		"PublicDescription": "Total number of AES functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "79",
+		"EventName": "AES_BLOCKED_CYCLES",
+		"BriefDescription": "AES Blocked Cycles",
+		"PublicDescription": "Total number of CPU cycles blocked for the AES functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+]
diff --git a/tools/perf/pmu-events/arch/s390/cf_z13/extended.json b/tools/perf/pmu-events/arch/s390/cf_z13/extended.json
new file mode 100644
index 000000000000..9a002b6967f1
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/cf_z13/extended.json
@@ -0,0 +1,338 @@
+[
+	{
+		"EventCode": "128",
+		"EventName": "L1D_RO_EXCL_WRITES",
+		"BriefDescription": "L1D Read-only Exclusive Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache where the line was originally in a Read-Only state in the cache but has been updated to be in the Exclusive state that allows stores to the cache line."
+	},
+	{
+		"EventCode": "129",
+		"EventName": "DTLB1_WRITES",
+		"BriefDescription": "DTLB1 Writes",
+		"PublicDescription": "A translation entry has been written to the Level-1 Data Translation Lookaside Buffer"
+	},
+	{
+		"EventCode": "130",
+		"EventName": "DTLB1_MISSES",
+		"BriefDescription": "DTLB1 Misses",
+		"PublicDescription": "Level-1 Data TLB miss in progress. Incremented by one for every cycle a DTLB1 miss is in progress."
+	},
+	{
+		"EventCode": "131",
+		"EventName": "DTLB1_HPAGE_WRITES",
+		"BriefDescription": "DTLB1 One-Megabyte Page Writes",
+		"PublicDescription": "A translation entry has been written to the Level-1 Data Translation Lookaside Buffer for a one-megabyte page"
+	},
+	{
+		"EventCode": "132",
+		"EventName": "DTLB1_GPAGE_WRITES",
+		"BriefDescription": "DTLB1 Two-Gigabyte Page Writes",
+		"PublicDescription": "Counter:132	Name:DTLB1_GPAGE_WRITES A translation entry has been written to the Level-1 Data Translation Lookaside Buffer for a two-gigabyte page."
+	},
+	{
+		"EventCode": "133",
+		"EventName": "L1D_L2D_SOURCED_WRITES",
+		"BriefDescription": "L1D L2D Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from the Level-2 Data cache"
+	},
+	{
+		"EventCode": "134",
+		"EventName": "ITLB1_WRITES",
+		"BriefDescription": "ITLB1 Writes",
+		"PublicDescription": "A translation entry has been written to the Level-1 Instruction Translation Lookaside Buffer"
+	},
+	{
+		"EventCode": "135",
+		"EventName": "ITLB1_MISSES",
+		"BriefDescription": "ITLB1 Misses",
+		"PublicDescription": "Level-1 Instruction TLB miss in progress. Incremented by one for every cycle an ITLB1 miss is in progress"
+	},
+	{
+		"EventCode": "136",
+		"EventName": "L1I_L2I_SOURCED_WRITES",
+		"BriefDescription": "L1I L2I Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from the Level-2 Instruction cache"
+	},
+	{
+		"EventCode": "137",
+		"EventName": "TLB2_PTE_WRITES",
+		"BriefDescription": "TLB2 PTE Writes",
+		"PublicDescription": "A translation entry has been written to the Level-2 TLB Page Table Entry arrays"
+	},
+	{
+		"EventCode": "138",
+		"EventName": "TLB2_CRSTE_HPAGE_WRITES",
+		"BriefDescription": "TLB2 CRSTE One-Megabyte Page Writes",
+		"PublicDescription": "A translation entry has been written to the Level-2 TLB Combined Region Segment Table Entry arrays for a one-megabyte large page translation"
+	},
+	{
+		"EventCode": "139",
+		"EventName": "TLB2_CRSTE_WRITES",
+		"BriefDescription": "TLB2 CRSTE Writes",
+		"PublicDescription": "A translation entry has been written to the Level-2 TLB Combined Region Segment Table Entry arrays"
+	},
+	{
+		"EventCode": "140",
+		"EventName": "TX_C_TEND",
+		"BriefDescription": "Completed TEND instructions in constrained TX mode",
+		"PublicDescription": "A TEND instruction has completed in a constrained transactional-execution mode"
+	},
+	{
+		"EventCode": "141",
+		"EventName": "TX_NC_TEND",
+		"BriefDescription": "Completed TEND instructions in non-constrained TX mode",
+		"PublicDescription": "A TEND instruction has completed in a non-constrained transactional-execution mode"
+	},
+	{
+		"EventCode": "143",
+		"EventName": "L1C_TLB1_MISSES",
+		"BriefDescription": "L1C TLB1 Misses",
+		"PublicDescription": "Increments by one for any cycle where a Level-1 cache or Level-1 TLB miss is in progress."
+	},
+	{
+		"EventCode": "144",
+		"EventName": "L1D_ONCHIP_L3_SOURCED_WRITES",
+		"BriefDescription": "L1D On-Chip L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Chip Level-3 cache without intervention"
+	},
+	{
+		"EventCode": "145",
+		"EventName": "L1D_ONCHIP_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1D On-Chip L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Chip Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "146",
+		"EventName": "L1D_ONNODE_L4_SOURCED_WRITES",
+		"BriefDescription": "L1D On-Node L4 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Node Level-4 cache"
+	},
+	{
+		"EventCode": "147",
+		"EventName": "L1D_ONNODE_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1D On-Node L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Node Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "148",
+		"EventName": "L1D_ONNODE_L3_SOURCED_WRITES",
+		"BriefDescription": "L1D On-Node L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Node Level-3 cache without intervention"
+	},
+	{
+		"EventCode": "149",
+		"EventName": "L1D_ONDRAWER_L4_SOURCED_WRITES",
+		"BriefDescription": "L1D On-Drawer L4 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Drawer Level-4 cache"
+	},
+	{
+		"EventCode": "150",
+		"EventName": "L1D_ONDRAWER_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1D On-Drawer L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Drawer Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "151",
+		"EventName": "L1D_ONDRAWER_L3_SOURCED_WRITES",
+		"BriefDescription": "L1D On-Drawer L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Drawer Level-3 cache without intervention"
+	},
+	{
+		"EventCode": "152",
+		"EventName": "L1D_OFFDRAWER_SCOL_L4_SOURCED_WRITES",
+		"BriefDescription": "L1D Off-Drawer Same-Column L4 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Same-Column Level-4 cache"
+	},
+	{
+		"EventCode": "153",
+		"EventName": "L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1D Off-Drawer Same-Column L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Same-Column Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "154",
+		"EventName": "L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES",
+		"BriefDescription": "L1D Off-Drawer Same-Column L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Same-Column Level-3 cache without intervention"
+	},
+	{
+		"EventCode": "155",
+		"EventName": "L1D_OFFDRAWER_FCOL_L4_SOURCED_WRITES",
+		"BriefDescription": "L1D Off-Drawer Far-Column L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Far-Column Level-4 cache"
+	},
+	{
+		"EventCode": "156",
+		"EventName": "L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1D Off-Drawer Far-Column L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Far-Column Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "157",
+		"EventName": "L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES",
+		"BriefDescription": "L1D Off-Drawer Far-Column L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Far-Column Level-3 cache without intervention"
+	},
+	{
+		"EventCode": "158",
+		"EventName": "L1D_ONNODE_MEM_SOURCED_WRITES",
+		"BriefDescription": "L1D On-Node Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Node memory"
+	},
+	{
+		"EventCode": "159",
+		"EventName": "L1D_ONDRAWER_MEM_SOURCED_WRITES",
+		"BriefDescription": "L1D On-Drawer Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Drawer memory"
+	},
+	{
+		"EventCode": "160",
+		"EventName": "L1D_OFFDRAWER_MEM_SOURCED_WRITES",
+		"BriefDescription": "L1D Off-Drawer Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Drawer memory"
+	},
+	{
+		"EventCode": "161",
+		"EventName": "L1D_ONCHIP_MEM_SOURCED_WRITES",
+		"BriefDescription": "L1D On-Chip Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Chip memory"
+	},
+	{
+		"EventCode": "162",
+		"EventName": "L1I_ONCHIP_L3_SOURCED_WRITES",
+		"BriefDescription": "L1I On-Chip L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Chip Level-3 cache without intervention"
+	},
+	{
+		"EventCode": "163",
+		"EventName": "L1I_ONCHIP_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1I On-Chip L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On Chip Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "164",
+		"EventName": "L1I_ONNODE_L4_SOURCED_WRITES",
+		"BriefDescription": "L1I On-Chip L4 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Node Level-4 cache"
+	},
+	{
+		"EventCode": "165",
+		"EventName": "L1I_ONNODE_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1I On-Node L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Node Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "166",
+		"EventName": "L1I_ONNODE_L3_SOURCED_WRITES",
+		"BriefDescription": "L1I On-Node L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Node Level-3 cache without intervention"
+	},
+	{
+		"EventCode": "167",
+		"EventName": "L1I_ONDRAWER_L4_SOURCED_WRITES",
+		"BriefDescription": "L1I On-Drawer L4 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Drawer Level-4 cache"
+	},
+	{
+		"EventCode": "168",
+		"EventName": "L1I_ONDRAWER_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1I On-Drawer L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Drawer Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "169",
+		"EventName": "L1I_ONDRAWER_L3_SOURCED_WRITES",
+		"BriefDescription": "L1I On-Drawer L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Drawer Level-3 cache without intervention"
+	},
+	{
+		"EventCode": "170",
+		"EventName": "L1I_OFFDRAWER_SCOL_L4_SOURCED_WRITES",
+		"BriefDescription": "L1I Off-Drawer Same-Column L4 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Same-Column Level-4 cache"
+	},
+	{
+		"EventCode": "171",
+		"EventName": "L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1I Off-Drawer Same-Column L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Same-Column Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "172",
+		"EventName": "L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES",
+		"BriefDescription": "L1I Off-Drawer Same-Column L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Same-Column Level-3 cache without intervention"
+	},
+	{
+		"EventCode": "173",
+		"EventName": "L1I_OFFDRAWER_FCOL_L4_SOURCED_WRITES",
+		"BriefDescription": "L1I Off-Drawer Far-Column L4 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Far-Column Level-4 cache"
+	},
+	{
+		"EventCode": "174",
+		"EventName": "L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1I Off-Drawer Far-Column L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Far-Column Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "175",
+		"EventName": "L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES",
+		"BriefDescription": "L1I Off-Drawer Far-Column L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Far-Column Level-3 cache without intervention"
+	},
+	{
+		"EventCode": "176",
+		"EventName": "L1I_ONNODE_MEM_SOURCED_WRITES",
+		"BriefDescription": "L1I On-Node Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from On-Node memory"
+	},
+	{
+		"EventCode": "177",
+		"EventName": "L1I_ONDRAWER_MEM_SOURCED_WRITES",
+		"BriefDescription": "L1I On-Drawer Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from On-Drawer memory"
+	},
+	{
+		"EventCode": "178",
+		"EventName": "L1I_OFFDRAWER_MEM_SOURCED_WRITES",
+		"BriefDescription": "L1I Off-Drawer Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from On-Drawer memory"
+	},
+	{
+		"EventCode": "179",
+		"EventName": "L1I_ONCHIP_MEM_SOURCED_WRITES",
+		"BriefDescription": "L1I On-Chip Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from On-Chip memory"
+	},
+	{
+		"EventCode": "218",
+		"EventName": "TX_NC_TABORT",
+		"BriefDescription": "Aborted transactions in non-constrained TX mode",
+		"PublicDescription": "A transaction abort has occurred in a non-constrained transactional-execution mode"
+	},
+	{
+		"EventCode": "219",
+		"EventName": "TX_C_TABORT_NO_SPECIAL",
+		"BriefDescription": "Aborted transactions in constrained TX mode not using special completion logic",
+		"PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is not using any special logic to allow the transaction to complete"
+	},
+	{
+		"EventCode": "220",
+		"EventName": "TX_C_TABORT_SPECIAL",
+		"BriefDescription": "Aborted transactions in constrained TX mode using special completion logic",
+		"PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is using special logic to allow the transaction to complete"
+	},
+	{
+		"EventCode": "448",
+		"EventName": "MT_DIAG_CYCLES_ONE_THR_ACTIVE",
+		"BriefDescription": "Cycle count with one thread active",
+		"PublicDescription": "Cycle count with one thread active"
+	},
+	{
+		"EventCode": "449",
+		"EventName": "MT_DIAG_CYCLES_TWO_THR_ACTIVE",
+		"BriefDescription": "Cycle count with two threads active",
+		"PublicDescription": "Cycle count with two threads active"
+	},
+]
diff --git a/tools/perf/pmu-events/arch/s390/mapfile.csv b/tools/perf/pmu-events/arch/s390/mapfile.csv
index c57f8e75fa23..3cff9c64bb85 100644
--- a/tools/perf/pmu-events/arch/s390/mapfile.csv
+++ b/tools/perf/pmu-events/arch/s390/mapfile.csv
@@ -2,3 +2,4 @@ Family-model,Version,Filename,EventType
 209[78],1,cf_z10,core
 281[78],1,cf_z196,core
 282[78],1,cf_zec12,core
+296[45],1,cf_z13,core
-- 
cgit v1.2.3


From 109d59b900e78834c66657dd4748fcedb9a1fe8d Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.vnet.ibm.com>
Date: Mon, 26 Mar 2018 10:25:38 +0200
Subject: perf vendor events s390: Add JSON files for IBM z14

Add CPU measurement counter facility event description files (json
files) for IBM z14.

Signed-off-by: Thomas Richter <tmricht@linux.vnet.ibm.com>
Reviewed-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Link: http://lkml.kernel.org/r/20180326082538.2258-5-tmricht@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/pmu-events/arch/s390/cf_z14/basic.json  |  50 ++++
 tools/perf/pmu-events/arch/s390/cf_z14/crypto.json |  98 +++++++
 .../perf/pmu-events/arch/s390/cf_z14/extended.json | 320 +++++++++++++++++++++
 tools/perf/pmu-events/arch/s390/mapfile.csv        |   1 +
 4 files changed, 469 insertions(+)
 create mode 100644 tools/perf/pmu-events/arch/s390/cf_z14/basic.json
 create mode 100644 tools/perf/pmu-events/arch/s390/cf_z14/crypto.json
 create mode 100644 tools/perf/pmu-events/arch/s390/cf_z14/extended.json

(limited to 'tools')

diff --git a/tools/perf/pmu-events/arch/s390/cf_z14/basic.json b/tools/perf/pmu-events/arch/s390/cf_z14/basic.json
new file mode 100644
index 000000000000..8f653c9d899d
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/cf_z14/basic.json
@@ -0,0 +1,50 @@
+[
+	{
+		"EventCode": "0",
+		"EventName": "CPU_CYCLES",
+		"BriefDescription": "CPU Cycles",
+		"PublicDescription": "Cycle Count"
+	},
+	{
+		"EventCode": "1",
+		"EventName": "INSTRUCTIONS",
+		"BriefDescription": "Instructions",
+		"PublicDescription": "Instruction Count"
+	},
+	{
+		"EventCode": "2",
+		"EventName": "L1I_DIR_WRITES",
+		"BriefDescription": "L1I Directory Writes",
+		"PublicDescription": "Level-1 I-Cache Directory Write Count"
+	},
+	{
+		"EventCode": "3",
+		"EventName": "L1I_PENALTY_CYCLES",
+		"BriefDescription": "L1I Penalty Cycles",
+		"PublicDescription": "Level-1 I-Cache Penalty Cycle Count"
+	},
+	{
+		"EventCode": "4",
+		"EventName": "L1D_DIR_WRITES",
+		"BriefDescription": "L1D Directory Writes",
+		"PublicDescription": "Level-1 D-Cache Directory Write Count"
+	},
+	{
+		"EventCode": "5",
+		"EventName": "L1D_PENALTY_CYCLES",
+		"BriefDescription": "L1D Penalty Cycles",
+		"PublicDescription": "Level-1 D-Cache Penalty Cycle Count"
+	},
+	{
+		"EventCode": "32",
+		"EventName": "PROBLEM_STATE_CPU_CYCLES",
+		"BriefDescription": "Problem-State CPU Cycles",
+		"PublicDescription": "Problem-State Cycle Count"
+	},
+	{
+		"EventCode": "33",
+		"EventName": "PROBLEM_STATE_INSTRUCTIONS",
+		"BriefDescription": "Problem-State Instructions",
+		"PublicDescription": "Problem-State Instruction Count"
+	},
+]
diff --git a/tools/perf/pmu-events/arch/s390/cf_z14/crypto.json b/tools/perf/pmu-events/arch/s390/cf_z14/crypto.json
new file mode 100644
index 000000000000..7e5b72492141
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/cf_z14/crypto.json
@@ -0,0 +1,98 @@
+[
+	{
+		"EventCode": "64",
+		"EventName": "PRNG_FUNCTIONS",
+		"BriefDescription": "PRNG Functions",
+		"PublicDescription": "Total number of the PRNG functions issued by the CPU"
+	},
+	{
+		"EventCode": "65",
+		"EventName": "PRNG_CYCLES",
+		"BriefDescription": "PRNG Cycles",
+		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing PRNG functions issued by the CPU"
+	},
+	{
+		"EventCode": "66",
+		"EventName": "PRNG_BLOCKED_FUNCTIONS",
+		"BriefDescription": "PRNG Blocked Functions",
+		"PublicDescription": "Total number of the PRNG functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "67",
+		"EventName": "PRNG_BLOCKED_CYCLES",
+		"BriefDescription": "PRNG Blocked Cycles",
+		"PublicDescription": "Total number of CPU cycles blocked for the PRNG functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "68",
+		"EventName": "SHA_FUNCTIONS",
+		"BriefDescription": "SHA Functions",
+		"PublicDescription": "Total number of SHA functions issued by the CPU"
+	},
+	{
+		"EventCode": "69",
+		"EventName": "SHA_CYCLES",
+		"BriefDescription": "SHA Cycles",
+		"PublicDescription": "Total number of CPU cycles when the SHA coprocessor is busy performing the SHA functions issued by the CPU"
+	},
+	{
+		"EventCode": "70",
+		"EventName": "SHA_BLOCKED_FUNCTIONS",
+		"BriefDescription": "SHA Blocked Functions",
+		"PublicDescription": "Total number of the SHA functions that are issued by the CPU and are blocked because the SHA coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "71",
+		"EventName": "SHA_BLOCKED_CYCLES",
+		"BriefDescription": "SHA Bloced Cycles",
+		"PublicDescription": "Total number of CPU cycles blocked for the SHA functions issued by the CPU because the SHA coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "72",
+		"EventName": "DEA_FUNCTIONS",
+		"BriefDescription": "DEA Functions",
+		"PublicDescription": "Total number of the DEA functions issued by the CPU"
+	},
+	{
+		"EventCode": "73",
+		"EventName": "DEA_CYCLES",
+		"BriefDescription": "DEA Cycles",
+		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the DEA functions issued by the CPU"
+	},
+	{
+		"EventCode": "74",
+		"EventName": "DEA_BLOCKED_FUNCTIONS",
+		"BriefDescription": "DEA Blocked Functions",
+		"PublicDescription": "Total number of the DEA functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "75",
+		"EventName": "DEA_BLOCKED_CYCLES",
+		"BriefDescription": "DEA Blocked Cycles",
+		"PublicDescription": "Total number of CPU cycles blocked for the DEA functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "76",
+		"EventName": "AES_FUNCTIONS",
+		"BriefDescription": "AES Functions",
+		"PublicDescription": "Total number of AES functions issued by the CPU"
+	},
+	{
+		"EventCode": "77",
+		"EventName": "AES_CYCLES",
+		"BriefDescription": "AES Cycles",
+		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the AES functions issued by the CPU"
+	},
+	{
+		"EventCode": "78",
+		"EventName": "AES_BLOCKED_FUNCTIONS",
+		"BriefDescription": "AES Blocked Functions",
+		"PublicDescription": "Total number of AES functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "79",
+		"EventName": "AES_BLOCKED_CYCLES",
+		"BriefDescription": "AES Blocked Cycles",
+		"PublicDescription": "Total number of CPU cycles blocked for the AES functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+]
diff --git a/tools/perf/pmu-events/arch/s390/cf_z14/extended.json b/tools/perf/pmu-events/arch/s390/cf_z14/extended.json
new file mode 100644
index 000000000000..aa4dfb46b65b
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/cf_z14/extended.json
@@ -0,0 +1,320 @@
+[
+	{
+		"EventCode": "128",
+		"EventName": "L1D_RO_EXCL_WRITES",
+		"BriefDescription": "L1D Read-only Exclusive Writes",
+		"PublicDescription": "Counter:128	Name:L1D_RO_EXCL_WRITES A directory write to the Level-1 Data cache where the line was originally in a Read-Only state in the cache but has been updated to be in the Exclusive state that allows stores to the cache line"
+	},
+	{
+		"EventCode": "129",
+		"EventName": "DTLB2_WRITES",
+		"BriefDescription": "DTLB2 Writes",
+		"PublicDescription": "A translation has been written into The Translation Lookaside Buffer 2 (TLB2) and the request was made by the data cache"
+	},
+	{
+		"EventCode": "130",
+		"EventName": "DTLB2_MISSES",
+		"BriefDescription": "DTLB2 Misses",
+		"PublicDescription": "A TLB2 miss is in progress for a request made by the data cache. Incremented by one for every TLB2 miss in progress for the Level-1 Data cache on this cycle"
+	},
+	{
+		"EventCode": "131",
+		"EventName": "DTLB2_HPAGE_WRITES",
+		"BriefDescription": "DTLB2 One-Megabyte Page Writes",
+		"PublicDescription": "A translation entry was written into the Combined Region and Segment Table Entry array in the Level-2 TLB for a one-megabyte page or a Last Host Translation was done"
+	},
+	{
+		"EventCode": "132",
+		"EventName": "DTLB2_GPAGE_WRITES",
+		"BriefDescription": "DTLB2 Two-Gigabyte Page Writes",
+		"PublicDescription": "A translation entry for a two-gigabyte page was written into the Level-2 TLB"
+	},
+	{
+		"EventCode": "133",
+		"EventName": "L1D_L2D_SOURCED_WRITES",
+		"BriefDescription": "L1D L2D Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from the Level-2 Data cache"
+	},
+	{
+		"EventCode": "134",
+		"EventName": "ITLB2_WRITES",
+		"BriefDescription": "ITLB2 Writes",
+		"PublicDescription": "A translation entry has been written into the Translation Lookaside Buffer 2 (TLB2) and the request was made by the instruction cache"
+	},
+	{
+		"EventCode": "135",
+		"EventName": "ITLB2_MISSES",
+		"BriefDescription": "ITLB2 Misses",
+		"PublicDescription": "A TLB2 miss is in progress for a request made by the instruction cache. Incremented by one for every TLB2 miss in progress for the Level-1 Instruction cache in a cycle"
+	},
+	{
+		"EventCode": "136",
+		"EventName": "L1I_L2I_SOURCED_WRITES",
+		"BriefDescription": "L1I L2I Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from the Level-2 Instruction cache"
+	},
+	{
+		"EventCode": "137",
+		"EventName": "TLB2_PTE_WRITES",
+		"BriefDescription": "TLB2 PTE Writes",
+		"PublicDescription": "A translation entry was written into the Page Table Entry array in the Level-2 TLB"
+	},
+	{
+		"EventCode": "138",
+		"EventName": "TLB2_CRSTE_WRITES",
+		"BriefDescription": "TLB2 CRSTE Writes",
+		"PublicDescription": "Translation entries were written into the Combined Region and Segment Table Entry array and the Page Table Entry array in the Level-2 TLB"
+	},
+	{
+		"EventCode": "139",
+		"EventName": "TLB2_ENGINES_BUSY",
+		"BriefDescription": "TLB2 Engines Busy",
+		"PublicDescription": "The number of Level-2 TLB translation engines busy in a cycle"
+	},
+	{
+		"EventCode": "140",
+		"EventName": "TX_C_TEND",
+		"BriefDescription": "Completed TEND instructions in constrained TX mode",
+		"PublicDescription": "A TEND instruction has completed in a constrained transactional-execution mode"
+	},
+	{
+		"EventCode": "141",
+		"EventName": "TX_NC_TEND",
+		"BriefDescription": "Completed TEND instructions in non-constrained TX mode",
+		"PublicDescription": "A TEND instruction has completed in a non-constrained transactional-execution mode"
+	},
+	{
+		"EventCode": "143",
+		"EventName": "L1C_TLB2_MISSES",
+		"BriefDescription": "L1C TLB2 Misses",
+		"PublicDescription": "Increments by one for any cycle where a level-1 cache or level-2 TLB miss is in progress"
+	},
+	{
+		"EventCode": "144",
+		"EventName": "L1D_ONCHIP_L3_SOURCED_WRITES",
+		"BriefDescription": "L1D On-Chip L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Chip Level-3 cache without intervention"
+	},
+	{
+		"EventCode": "145",
+		"EventName": "L1D_ONCHIP_MEMORY_SOURCED_WRITES",
+		"BriefDescription": "L1D On-Chip Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Chip memory"
+	},
+	{
+		"EventCode": "146",
+		"EventName": "L1D_ONCHIP_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1D On-Chip L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Chip Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "147",
+		"EventName": "L1D_ONCLUSTER_L3_SOURCED_WRITES",
+		"BriefDescription": "L1D On-Cluster L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Cluster Level-3 cache withountervention"
+	},
+	{
+		"EventCode": "148",
+		"EventName": "L1D_ONCLUSTER_MEMORY_SOURCED_WRITES",
+		"BriefDescription": "L1D On-Cluster Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Cluster memory"
+	},
+	{
+		"EventCode": "149",
+		"EventName": "L1D_ONCLUSTER_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1D On-Cluster L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Cluster Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "150",
+		"EventName": "L1D_OFFCLUSTER_L3_SOURCED_WRITES",
+		"BriefDescription": "L1D Off-Cluster L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Cluster Level-3 cache without intervention"
+	},
+	{
+		"EventCode": "151",
+		"EventName": "L1D_OFFCLUSTER_MEMORY_SOURCED_WRITES",
+		"BriefDescription": "L1D Off-Cluster Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from Off-Cluster memory"
+	},
+	{
+		"EventCode": "152",
+		"EventName": "L1D_OFFCLUSTER_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1D Off-Cluster L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Cluster Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "153",
+		"EventName": "L1D_OFFDRAWER_L3_SOURCED_WRITES",
+		"BriefDescription": "L1D Off-Drawer L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Level-3 cache without intervention"
+	},
+	{
+		"EventCode": "154",
+		"EventName": "L1D_OFFDRAWER_MEMORY_SOURCED_WRITES",
+		"BriefDescription": "L1D Off-Drawer Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from Off-Drawer memory"
+	},
+	{
+		"EventCode": "155",
+		"EventName": "L1D_OFFDRAWER_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1D Off-Drawer L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "156",
+		"EventName": "L1D_ONDRAWER_L4_SOURCED_WRITES",
+		"BriefDescription": "L1D On-Drawer L4 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Drawer Level-4 cache"
+	},
+	{
+		"EventCode": "157",
+		"EventName": "L1D_OFFDRAWER_L4_SOURCED_WRITES",
+		"BriefDescription": "L1D Off-Drawer L4 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from Off-Drawer Level-4 cache"
+	},
+	{
+		"EventCode": "158",
+		"EventName": "L1D_ONCHIP_L3_SOURCED_WRITES_RO",
+		"BriefDescription": "L1D On-Chip L3 Sourced Writes read-only",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Chip L3 but a read-only invalidate was done to remove other copies of the cache line"
+	},
+	{
+		"EventCode": "162",
+		"EventName": "L1I_ONCHIP_L3_SOURCED_WRITES",
+		"BriefDescription": "L1I On-Chip L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache ine was sourced from an On-Chip Level-3 cache without intervention"
+	},
+	{
+		"EventCode": "163",
+		"EventName": "L1I_ONCHIP_MEMORY_SOURCED_WRITES",
+		"BriefDescription": "L1I On-Chip Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache ine was sourced from On-Chip memory"
+	},
+	{
+		"EventCode": "164",
+		"EventName": "L1I_ONCHIP_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1I On-Chip L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache ine was sourced from an On-Chip Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "165",
+		"EventName": "L1I_ONCLUSTER_L3_SOURCED_WRITES",
+		"BriefDescription": "L1I On-Cluster L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Cluster Level-3 cache without intervention"
+	},
+	{
+		"EventCode": "166",
+		"EventName": "L1I_ONCLUSTER_MEMORY_SOURCED_WRITES",
+		"BriefDescription": "L1I On-Cluster Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Cluster memory"
+	},
+	{
+		"EventCode": "167",
+		"EventName": "L1I_ONCLUSTER_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1I On-Cluster L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from On-Cluster Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "168",
+		"EventName": "L1I_OFFCLUSTER_L3_SOURCED_WRITES",
+		"BriefDescription": "L1I Off-Cluster L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Cluster Level-3 cache without intervention"
+	},
+	{
+		"EventCode": "169",
+		"EventName": "L1I_OFFCLUSTER_MEMORY_SOURCED_WRITES",
+		"BriefDescription": "L1I Off-Cluster Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from Off-Cluster memory"
+	},
+	{
+		"EventCode": "170",
+		"EventName": "L1I_OFFCLUSTER_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1I Off-Cluster L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Cluster Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "171",
+		"EventName": "L1I_OFFDRAWER_L3_SOURCED_WRITES",
+		"BriefDescription": "L1I Off-Drawer L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Level-3 cache without intervention"
+	},
+	{
+		"EventCode": "172",
+		"EventName": "L1I_OFFDRAWER_MEMORY_SOURCED_WRITES",
+		"BriefDescription": "L1I Off-Drawer Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from Off-Drawer memory"
+	},
+	{
+		"EventCode": "173",
+		"EventName": "L1I_OFFDRAWER_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1I Off-Drawer L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "174",
+		"EventName": "L1I_ONDRAWER_L4_SOURCED_WRITES",
+		"BriefDescription": "L1I On-Drawer L4 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from On-Drawer Level-4 cache"
+	},
+	{
+		"EventCode": "175",
+		"EventName": "L1I_OFFDRAWER_L4_SOURCED_WRITES",
+		"BriefDescription": "L1I Off-Drawer L4 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from Off-Drawer Level-4 cache"
+	},
+	{
+		"EventCode": "224",
+		"EventName": "BCD_DFP_EXECUTION_SLOTS",
+		"BriefDescription": "BCD DFP Execution Slots",
+		"PublicDescription": "Count of floating point execution slots used for finished Binary Coded Decimal to Decimal Floating Point conversions. Instructions: CDZT, CXZT, CZDT, CZXT"
+	},
+	{
+		"EventCode": "225",
+		"EventName": "VX_BCD_EXECUTION_SLOTS",
+		"BriefDescription": "VX BCD Execution Slots",
+		"PublicDescription": "Count of floating point execution slots used for finished vector arithmetic Binary Coded Decimal instructions. Instructions: VAP, VSP, VMPVMSP, VDP, VSDP, VRP, VLIP, VSRP, VPSOPVCP, VTP, VPKZ, VUPKZ, VCVB, VCVBG, VCVDVCVDG"
+	},
+	{
+		"EventCode": "226",
+		"EventName": "DECIMAL_INSTRUCTIONS",
+		"BriefDescription": "Decimal Instructions",
+		"PublicDescription": "Decimal instructions dispatched. Instructions: CVB, CVD, AP, CP, DP, ED, EDMK, MP, SRP, SP, ZAP"
+	},
+	{
+		"EventCode": "232",
+		"EventName": "LAST_HOST_TRANSLATIONS",
+		"BriefDescription": "Last host translation done",
+		"PublicDescription": "Last Host Translation done"
+	},
+	{
+		"EventCode": "243",
+		"EventName": "TX_NC_TABORT",
+		"BriefDescription": "Aborted transactions in non-constrained TX mode",
+		"PublicDescription": "A transaction abort has occurred in a non-constrained transactional-execution mode"
+	},
+	{
+		"EventCode": "244",
+		"EventName": "TX_C_TABORT_NO_SPECIAL",
+		"BriefDescription": "Aborted transactions in constrained TX mode not using special completion logic",
+		"PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is not using any special logic to allow the transaction to complete"
+	},
+	{
+		"EventCode": "245",
+		"EventName": "TX_C_TABORT_SPECIAL",
+		"BriefDescription": "Aborted transactions in constrained TX mode using special completion logic",
+		"PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is using special logic to allow the transaction to complete"
+	},
+	{
+		"EventCode": "448",
+		"EventName": "MT_DIAG_CYCLES_ONE_THR_ACTIVE",
+		"BriefDescription": "Cycle count with one thread active",
+		"PublicDescription": "Cycle count with one thread active"
+	},
+	{
+		"EventCode": "449",
+		"EventName": "MT_DIAG_CYCLES_TWO_THR_ACTIVE",
+		"BriefDescription": "Cycle count with two threads active",
+		"PublicDescription": "Cycle count with two threads active"
+	},
+]
diff --git a/tools/perf/pmu-events/arch/s390/mapfile.csv b/tools/perf/pmu-events/arch/s390/mapfile.csv
index 3cff9c64bb85..ca7682748a4b 100644
--- a/tools/perf/pmu-events/arch/s390/mapfile.csv
+++ b/tools/perf/pmu-events/arch/s390/mapfile.csv
@@ -3,3 +3,4 @@ Family-model,Version,Filename,EventType
 281[78],1,cf_z196,core
 282[78],1,cf_zec12,core
 296[45],1,cf_z13,core
+3906,3,cf_z14,core
-- 
cgit v1.2.3