summaryrefslogtreecommitdiffstats
path: root/tools/perf/util/demangle-rust.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-07-25 13:20:41 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2016-07-25 13:20:41 -0700
commit7e4dc77b2869a683fc43c0394fca5441816390ba (patch)
tree62e734c599bc1da2712fdb63be996622c415a83a /tools/perf/util/demangle-rust.c
parent89e7eb098adfe342bc036f00201eb579d448f033 (diff)
parent5048c2af078d5976895d521262a8802ea791f3b0 (diff)
downloadlinux-7e4dc77b2869a683fc43c0394fca5441816390ba.tar.bz2
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar: "With over 300 commits it's been a busy cycle - with most of the work concentrated on the tooling side (as it should). The main kernel side enhancements were: - Add per event callchain limit: Recently we introduced a sysctl to tune the max-stack for all events for which callchains were requested: $ sysctl kernel.perf_event_max_stack kernel.perf_event_max_stack = 127 Now this patch introduces a way to configure this per event, i.e. this becomes possible: $ perf record -e sched:*/max-stack=2/ -e block:*/max-stack=10/ -a allowing finer tuning of how much buffer space callchains use. This uses an u16 from the reserved space at the end, leaving another u16 for future use. There has been interest in even finer tuning, namely to control the max stack for kernel and userspace callchains separately. Further discussion is needed, we may for instance use the remaining u16 for that and when it is present, assume that the sample_max_stack introduced in this patch applies for the kernel, and the u16 left is used for limiting the userspace callchain (Arnaldo Carvalho de Melo) - Optimize AUX event (hardware assisted side-band event) delivery (Kan Liang) - Rework Intel family name macro usage (this is partially x86 arch work) (Dave Hansen) - Refine and fix Intel LBR support (David Carrillo-Cisneros) - Add support for Intel 'TopDown' events (Andi Kleen) - Intel uncore PMU driver fixes and enhancements (Kan Liang) - ... other misc changes. Here's an incomplete list of the tooling enhancements (but there's much more, see the shortlog and the git log for details): - Support cross unwinding, i.e. collecting '--call-graph dwarf' perf.data files in one machine and then doing analysis in another machine of a different hardware architecture. This enables, for instance, to do: $ perf record -a --call-graph dwarf on a x86-32 or aarch64 system and then do 'perf report' on it on a x86_64 workstation (He Kuang) - Allow reading from a backward ring buffer (one setup via sys_perf_event_open() with perf_event_attr.write_backward = 1) (Wang Nan) - Finish merging initial SDT (Statically Defined Traces) support, see cset comments for details about how it all works (Masami Hiramatsu) - Support attaching eBPF programs to tracepoints (Wang Nan) - Add demangling of symbols in programs written in the Rust language (David Tolnay) - Add support for tracepoints in the python binding, including an example, that sets up and parses sched:sched_switch events, tools/perf/python/tracepoint.py (Jiri Olsa) - Introduce --stdio-color to set up the color output mode selection in 'annotate' and 'report', allowing emit color escape sequences when redirecting the output of these tools (Arnaldo Carvalho de Melo) - Add 'callindent' option to 'perf script -F', to indent the Intel PT call stack, making this output more ftrace-like (Adrian Hunter, Andi Kleen) - Allow dumping the object files generated by llvm when processing eBPF scriptlet events (Wang Nan) - Add stackcollapse.py script to help generating flame graphs (Paolo Bonzini) - Add --ldlat option to 'perf mem' to specify load latency for loads event (e.g. cpu/mem-loads/ ) (Jiri Olsa) - Tooling support for Intel TopDown counters, recently added to the kernel (Andi Kleen)" * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (303 commits) perf tests: Add is_printable_array test perf tools: Make is_printable_array global perf script python: Fix string vs byte array resolving perf probe: Warn unmatched function filter correctly perf cpu_map: Add more helpers perf stat: Balance opening and reading events tools: Copy linux/{hash,poison}.h and check for drift perf tools: Remove include/linux/list.h from perf's MANIFEST tools: Copy the bitops files accessed from the kernel and check for drift Remove: kernel unistd*h files from perf's MANIFEST, not used perf tools: Remove tools/perf/util/include/linux/const.h perf tools: Remove tools/perf/util/include/asm/byteorder.h perf tools: Add missing linux/compiler.h include to perf-sys.h perf jit: Remove some no-op error handling perf jit: Add missing curly braces objtool: Initialize variable to silence old compiler objtool: Add -I$(srctree)/tools/arch/$(ARCH)/include/uapi perf record: Add --tail-synthesize option perf session: Don't warn about out of order event if write_backward is used perf tools: Enable overwrite settings ...
Diffstat (limited to 'tools/perf/util/demangle-rust.c')
-rw-r--r--tools/perf/util/demangle-rust.c269
1 files changed, 269 insertions, 0 deletions
diff --git a/tools/perf/util/demangle-rust.c b/tools/perf/util/demangle-rust.c
new file mode 100644
index 000000000000..f9dafa888c06
--- /dev/null
+++ b/tools/perf/util/demangle-rust.c
@@ -0,0 +1,269 @@
+#include <string.h>
+#include "util.h"
+#include "debug.h"
+
+#include "demangle-rust.h"
+
+/*
+ * Mangled Rust symbols look like this:
+ *
+ * _$LT$std..sys..fd..FileDesc$u20$as$u20$core..ops..Drop$GT$::drop::hc68340e1baa4987a
+ *
+ * The original symbol is:
+ *
+ * <std::sys::fd::FileDesc as core::ops::Drop>::drop
+ *
+ * The last component of the path is a 64-bit hash in lowercase hex, prefixed
+ * with "h". Rust does not have a global namespace between crates, an illusion
+ * which Rust maintains by using the hash to distinguish things that would
+ * otherwise have the same symbol.
+ *
+ * Any path component not starting with a XID_Start character is prefixed with
+ * "_".
+ *
+ * The following escape sequences are used:
+ *
+ * "," => $C$
+ * "@" => $SP$
+ * "*" => $BP$
+ * "&" => $RF$
+ * "<" => $LT$
+ * ">" => $GT$
+ * "(" => $LP$
+ * ")" => $RP$
+ * " " => $u20$
+ * "'" => $u27$
+ * "[" => $u5b$
+ * "]" => $u5d$
+ * "~" => $u7e$
+ *
+ * A double ".." means "::" and a single "." means "-".
+ *
+ * The only characters allowed in the mangled symbol are a-zA-Z0-9 and _.:$
+ */
+
+static const char *hash_prefix = "::h";
+static const size_t hash_prefix_len = 3;
+static const size_t hash_len = 16;
+
+static bool is_prefixed_hash(const char *start);
+static bool looks_like_rust(const char *sym, size_t len);
+static bool unescape(const char **in, char **out, const char *seq, char value);
+
+/*
+ * INPUT:
+ * sym: symbol that has been through BFD-demangling
+ *
+ * This function looks for the following indicators:
+ *
+ * 1. The hash must consist of "h" followed by 16 lowercase hex digits.
+ *
+ * 2. As a sanity check, the hash must use between 5 and 15 of the 16 possible
+ * hex digits. This is true of 99.9998% of hashes so once in your life you
+ * may see a false negative. The point is to notice path components that
+ * could be Rust hashes but are probably not, like "haaaaaaaaaaaaaaaa". In
+ * this case a false positive (non-Rust symbol has an important path
+ * component removed because it looks like a Rust hash) is worse than a
+ * false negative (the rare Rust symbol is not demangled) so this sets the
+ * balance in favor of false negatives.
+ *
+ * 3. There must be no characters other than a-zA-Z0-9 and _.:$
+ *
+ * 4. There must be no unrecognized $-sign sequences.
+ *
+ * 5. There must be no sequence of three or more dots in a row ("...").
+ */
+bool
+rust_is_mangled(const char *sym)
+{
+ size_t len, len_without_hash;
+
+ if (!sym)
+ return false;
+
+ len = strlen(sym);
+ if (len <= hash_prefix_len + hash_len)
+ /* Not long enough to contain "::h" + hash + something else */
+ return false;
+
+ len_without_hash = len - (hash_prefix_len + hash_len);
+ if (!is_prefixed_hash(sym + len_without_hash))
+ return false;
+
+ return looks_like_rust(sym, len_without_hash);
+}
+
+/*
+ * A hash is the prefix "::h" followed by 16 lowercase hex digits. The hex
+ * digits must comprise between 5 and 15 (inclusive) distinct digits.
+ */
+static bool is_prefixed_hash(const char *str)
+{
+ const char *end;
+ bool seen[16];
+ size_t i;
+ int count;
+
+ if (strncmp(str, hash_prefix, hash_prefix_len))
+ return false;
+ str += hash_prefix_len;
+
+ memset(seen, false, sizeof(seen));
+ for (end = str + hash_len; str < end; str++)
+ if (*str >= '0' && *str <= '9')
+ seen[*str - '0'] = true;
+ else if (*str >= 'a' && *str <= 'f')
+ seen[*str - 'a' + 10] = true;
+ else
+ return false;
+
+ /* Count how many distinct digits seen */
+ count = 0;
+ for (i = 0; i < 16; i++)
+ if (seen[i])
+ count++;
+
+ return count >= 5 && count <= 15;
+}
+
+static bool looks_like_rust(const char *str, size_t len)
+{
+ const char *end = str + len;
+
+ while (str < end)
+ switch (*str) {
+ case '$':
+ if (!strncmp(str, "$C$", 3))
+ str += 3;
+ else if (!strncmp(str, "$SP$", 4)
+ || !strncmp(str, "$BP$", 4)
+ || !strncmp(str, "$RF$", 4)
+ || !strncmp(str, "$LT$", 4)
+ || !strncmp(str, "$GT$", 4)
+ || !strncmp(str, "$LP$", 4)
+ || !strncmp(str, "$RP$", 4))
+ str += 4;
+ else if (!strncmp(str, "$u20$", 5)
+ || !strncmp(str, "$u27$", 5)
+ || !strncmp(str, "$u5b$", 5)
+ || !strncmp(str, "$u5d$", 5)
+ || !strncmp(str, "$u7e$", 5))
+ str += 5;
+ else
+ return false;
+ break;
+ case '.':
+ /* Do not allow three or more consecutive dots */
+ if (!strncmp(str, "...", 3))
+ return false;
+ /* Fall through */
+ case 'a' ... 'z':
+ case 'A' ... 'Z':
+ case '0' ... '9':
+ case '_':
+ case ':':
+ str++;
+ break;
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * INPUT:
+ * sym: symbol for which rust_is_mangled(sym) returns true
+ *
+ * The input is demangled in-place because the mangled name is always longer
+ * than the demangled one.
+ */
+void
+rust_demangle_sym(char *sym)
+{
+ const char *in;
+ char *out;
+ const char *end;
+
+ if (!sym)
+ return;
+
+ in = sym;
+ out = sym;
+ end = sym + strlen(sym) - (hash_prefix_len + hash_len);
+
+ while (in < end)
+ switch (*in) {
+ case '$':
+ if (!(unescape(&in, &out, "$C$", ',')
+ || unescape(&in, &out, "$SP$", '@')
+ || unescape(&in, &out, "$BP$", '*')
+ || unescape(&in, &out, "$RF$", '&')
+ || unescape(&in, &out, "$LT$", '<')
+ || unescape(&in, &out, "$GT$", '>')
+ || unescape(&in, &out, "$LP$", '(')
+ || unescape(&in, &out, "$RP$", ')')
+ || unescape(&in, &out, "$u20$", ' ')
+ || unescape(&in, &out, "$u27$", '\'')
+ || unescape(&in, &out, "$u5b$", '[')
+ || unescape(&in, &out, "$u5d$", ']')
+ || unescape(&in, &out, "$u7e$", '~'))) {
+ pr_err("demangle-rust: unexpected escape sequence");
+ goto done;
+ }
+ break;
+ case '_':
+ /*
+ * If this is the start of a path component and the next
+ * character is an escape sequence, ignore the
+ * underscore. The mangler inserts an underscore to make
+ * sure the path component begins with a XID_Start
+ * character.
+ */
+ if ((in == sym || in[-1] == ':') && in[1] == '$')
+ in++;
+ else
+ *out++ = *in++;
+ break;
+ case '.':
+ if (in[1] == '.') {
+ /* ".." becomes "::" */
+ *out++ = ':';
+ *out++ = ':';
+ in += 2;
+ } else {
+ /* "." becomes "-" */
+ *out++ = '-';
+ in++;
+ }
+ break;
+ case 'a' ... 'z':
+ case 'A' ... 'Z':
+ case '0' ... '9':
+ case ':':
+ *out++ = *in++;
+ break;
+ default:
+ pr_err("demangle-rust: unexpected character '%c' in symbol\n",
+ *in);
+ goto done;
+ }
+
+done:
+ *out = '\0';
+}
+
+static bool unescape(const char **in, char **out, const char *seq, char value)
+{
+ size_t len = strlen(seq);
+
+ if (strncmp(*in, seq, len))
+ return false;
+
+ **out = value;
+
+ *in += len;
+ *out += 1;
+
+ return true;
+}