summaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/hv/Makefile2
-rw-r--r--tools/hv/hv_vss_daemon.c10
-rw-r--r--tools/perf/Documentation/perf-kmem.txt8
-rw-r--r--tools/perf/builtin-kmem.c500
-rw-r--r--tools/perf/util/probe-event.c60
-rw-r--r--tools/perf/util/probe-finder.c73
-rw-r--r--tools/perf/util/probe-finder.h4
-rw-r--r--tools/power/x86/turbostat/Makefile6
-rw-r--r--tools/power/x86/turbostat/turbostat.8138
-rw-r--r--tools/power/x86/turbostat/turbostat.c436
-rw-r--r--tools/testing/selftests/x86/Makefile2
-rw-r--r--tools/testing/selftests/x86/run_x86_tests.sh2
-rw-r--r--tools/testing/selftests/x86/single_step_syscall.c181
13 files changed, 1179 insertions, 243 deletions
diff --git a/tools/hv/Makefile b/tools/hv/Makefile
index 99ffe61051a7..a8ab79556926 100644
--- a/tools/hv/Makefile
+++ b/tools/hv/Makefile
@@ -3,7 +3,7 @@
CC = $(CROSS_COMPILE)gcc
PTHREAD_LIBS = -lpthread
WARNINGS = -Wall -Wextra
-CFLAGS = $(WARNINGS) -g $(PTHREAD_LIBS)
+CFLAGS = $(WARNINGS) -g $(PTHREAD_LIBS) $(shell getconf LFS_CFLAGS)
all: hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon
%: %.c
diff --git a/tools/hv/hv_vss_daemon.c b/tools/hv/hv_vss_daemon.c
index 5e63f70bd956..506dd0148828 100644
--- a/tools/hv/hv_vss_daemon.c
+++ b/tools/hv/hv_vss_daemon.c
@@ -81,6 +81,7 @@ static int vss_operate(int operation)
char match[] = "/dev/";
FILE *mounts;
struct mntent *ent;
+ char errdir[1024] = {0};
unsigned int cmd;
int error = 0, root_seen = 0, save_errno = 0;
@@ -115,6 +116,8 @@ static int vss_operate(int operation)
goto err;
}
+ endmntent(mounts);
+
if (root_seen) {
error |= vss_do_freeze("/", cmd);
if (error && operation == VSS_OP_FREEZE)
@@ -124,16 +127,19 @@ static int vss_operate(int operation)
goto out;
err:
save_errno = errno;
+ if (ent) {
+ strncpy(errdir, ent->mnt_dir, sizeof(errdir)-1);
+ endmntent(mounts);
+ }
vss_operate(VSS_OP_THAW);
/* Call syslog after we thaw all filesystems */
if (ent)
syslog(LOG_ERR, "FREEZE of %s failed; error:%d %s",
- ent->mnt_dir, save_errno, strerror(save_errno));
+ errdir, save_errno, strerror(save_errno));
else
syslog(LOG_ERR, "FREEZE of / failed; error:%d %s", save_errno,
strerror(save_errno));
out:
- endmntent(mounts);
return error;
}
diff --git a/tools/perf/Documentation/perf-kmem.txt b/tools/perf/Documentation/perf-kmem.txt
index 150253cc3c97..23219c65c16f 100644
--- a/tools/perf/Documentation/perf-kmem.txt
+++ b/tools/perf/Documentation/perf-kmem.txt
@@ -3,7 +3,7 @@ perf-kmem(1)
NAME
----
-perf-kmem - Tool to trace/measure kernel memory(slab) properties
+perf-kmem - Tool to trace/measure kernel memory properties
SYNOPSIS
--------
@@ -46,6 +46,12 @@ OPTIONS
--raw-ip::
Print raw ip instead of symbol
+--slab::
+ Analyze SLAB allocator events.
+
+--page::
+ Analyze page allocator events
+
SEE ALSO
--------
linkperf:perf-record[1]
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 4ebf65c79434..63ea01349b6e 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -22,6 +22,11 @@
#include <linux/string.h>
#include <locale.h>
+static int kmem_slab;
+static int kmem_page;
+
+static long kmem_page_size;
+
struct alloc_stat;
typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *);
@@ -226,6 +231,244 @@ static int perf_evsel__process_free_event(struct perf_evsel *evsel,
return 0;
}
+static u64 total_page_alloc_bytes;
+static u64 total_page_free_bytes;
+static u64 total_page_nomatch_bytes;
+static u64 total_page_fail_bytes;
+static unsigned long nr_page_allocs;
+static unsigned long nr_page_frees;
+static unsigned long nr_page_fails;
+static unsigned long nr_page_nomatch;
+
+static bool use_pfn;
+
+#define MAX_MIGRATE_TYPES 6
+#define MAX_PAGE_ORDER 11
+
+static int order_stats[MAX_PAGE_ORDER][MAX_MIGRATE_TYPES];
+
+struct page_stat {
+ struct rb_node node;
+ u64 page;
+ int order;
+ unsigned gfp_flags;
+ unsigned migrate_type;
+ u64 alloc_bytes;
+ u64 free_bytes;
+ int nr_alloc;
+ int nr_free;
+};
+
+static struct rb_root page_tree;
+static struct rb_root page_alloc_tree;
+static struct rb_root page_alloc_sorted;
+
+static struct page_stat *search_page(unsigned long page, bool create)
+{
+ struct rb_node **node = &page_tree.rb_node;
+ struct rb_node *parent = NULL;
+ struct page_stat *data;
+
+ while (*node) {
+ s64 cmp;
+
+ parent = *node;
+ data = rb_entry(*node, struct page_stat, node);
+
+ cmp = data->page - page;
+ if (cmp < 0)
+ node = &parent->rb_left;
+ else if (cmp > 0)
+ node = &parent->rb_right;
+ else
+ return data;
+ }
+
+ if (!create)
+ return NULL;
+
+ data = zalloc(sizeof(*data));
+ if (data != NULL) {
+ data->page = page;
+
+ rb_link_node(&data->node, parent, node);
+ rb_insert_color(&data->node, &page_tree);
+ }
+
+ return data;
+}
+
+static int page_stat_cmp(struct page_stat *a, struct page_stat *b)
+{
+ if (a->page > b->page)
+ return -1;
+ if (a->page < b->page)
+ return 1;
+ if (a->order > b->order)
+ return -1;
+ if (a->order < b->order)
+ return 1;
+ if (a->migrate_type > b->migrate_type)
+ return -1;
+ if (a->migrate_type < b->migrate_type)
+ return 1;
+ if (a->gfp_flags > b->gfp_flags)
+ return -1;
+ if (a->gfp_flags < b->gfp_flags)
+ return 1;
+ return 0;
+}
+
+static struct page_stat *search_page_alloc_stat(struct page_stat *stat, bool create)
+{
+ struct rb_node **node = &page_alloc_tree.rb_node;
+ struct rb_node *parent = NULL;
+ struct page_stat *data;
+
+ while (*node) {
+ s64 cmp;
+
+ parent = *node;
+ data = rb_entry(*node, struct page_stat, node);
+
+ cmp = page_stat_cmp(data, stat);
+ if (cmp < 0)
+ node = &parent->rb_left;
+ else if (cmp > 0)
+ node = &parent->rb_right;
+ else
+ return data;
+ }
+
+ if (!create)
+ return NULL;
+
+ data = zalloc(sizeof(*data));
+ if (data != NULL) {
+ data->page = stat->page;
+ data->order = stat->order;
+ data->gfp_flags = stat->gfp_flags;
+ data->migrate_type = stat->migrate_type;
+
+ rb_link_node(&data->node, parent, node);
+ rb_insert_color(&data->node, &page_alloc_tree);
+ }
+
+ return data;
+}
+
+static bool valid_page(u64 pfn_or_page)
+{
+ if (use_pfn && pfn_or_page == -1UL)
+ return false;
+ if (!use_pfn && pfn_or_page == 0)
+ return false;
+ return true;
+}
+
+static int perf_evsel__process_page_alloc_event(struct perf_evsel *evsel,
+ struct perf_sample *sample)
+{
+ u64 page;
+ unsigned int order = perf_evsel__intval(evsel, sample, "order");
+ unsigned int gfp_flags = perf_evsel__intval(evsel, sample, "gfp_flags");
+ unsigned int migrate_type = perf_evsel__intval(evsel, sample,
+ "migratetype");
+ u64 bytes = kmem_page_size << order;
+ struct page_stat *stat;
+ struct page_stat this = {
+ .order = order,
+ .gfp_flags = gfp_flags,
+ .migrate_type = migrate_type,
+ };
+
+ if (use_pfn)
+ page = perf_evsel__intval(evsel, sample, "pfn");
+ else
+ page = perf_evsel__intval(evsel, sample, "page");
+
+ nr_page_allocs++;
+ total_page_alloc_bytes += bytes;
+
+ if (!valid_page(page)) {
+ nr_page_fails++;
+ total_page_fail_bytes += bytes;
+
+ return 0;
+ }
+
+ /*
+ * This is to find the current page (with correct gfp flags and
+ * migrate type) at free event.
+ */
+ stat = search_page(page, true);
+ if (stat == NULL)
+ return -ENOMEM;
+
+ stat->order = order;
+ stat->gfp_flags = gfp_flags;
+ stat->migrate_type = migrate_type;
+
+ this.page = page;
+ stat = search_page_alloc_stat(&this, true);
+ if (stat == NULL)
+ return -ENOMEM;
+
+ stat->nr_alloc++;
+ stat->alloc_bytes += bytes;
+
+ order_stats[order][migrate_type]++;
+
+ return 0;
+}
+
+static int perf_evsel__process_page_free_event(struct perf_evsel *evsel,
+ struct perf_sample *sample)
+{
+ u64 page;
+ unsigned int order = perf_evsel__intval(evsel, sample, "order");
+ u64 bytes = kmem_page_size << order;
+ struct page_stat *stat;
+ struct page_stat this = {
+ .order = order,
+ };
+
+ if (use_pfn)
+ page = perf_evsel__intval(evsel, sample, "pfn");
+ else
+ page = perf_evsel__intval(evsel, sample, "page");
+
+ nr_page_frees++;
+ total_page_free_bytes += bytes;
+
+ stat = search_page(page, false);
+ if (stat == NULL) {
+ pr_debug2("missing free at page %"PRIx64" (order: %d)\n",
+ page, order);
+
+ nr_page_nomatch++;
+ total_page_nomatch_bytes += bytes;
+
+ return 0;
+ }
+
+ this.page = page;
+ this.gfp_flags = stat->gfp_flags;
+ this.migrate_type = stat->migrate_type;
+
+ rb_erase(&stat->node, &page_tree);
+ free(stat);
+
+ stat = search_page_alloc_stat(&this, false);
+ if (stat == NULL)
+ return -ENOENT;
+
+ stat->nr_free++;
+ stat->free_bytes += bytes;
+
+ return 0;
+}
+
typedef int (*tracepoint_handler)(struct perf_evsel *evsel,
struct perf_sample *sample);
@@ -270,8 +513,9 @@ static double fragmentation(unsigned long n_req, unsigned long n_alloc)
return 100.0 - (100.0 * n_req / n_alloc);
}
-static void __print_result(struct rb_root *root, struct perf_session *session,
- int n_lines, int is_caller)
+static void __print_slab_result(struct rb_root *root,
+ struct perf_session *session,
+ int n_lines, int is_caller)
{
struct rb_node *next;
struct machine *machine = &session->machines.host;
@@ -323,9 +567,56 @@ static void __print_result(struct rb_root *root, struct perf_session *session,
printf("%.105s\n", graph_dotted_line);
}
-static void print_summary(void)
+static const char * const migrate_type_str[] = {
+ "UNMOVABL",
+ "RECLAIM",
+ "MOVABLE",
+ "RESERVED",
+ "CMA/ISLT",
+ "UNKNOWN",
+};
+
+static void __print_page_result(struct rb_root *root,
+ struct perf_session *session __maybe_unused,
+ int n_lines)
+{
+ struct rb_node *next = rb_first(root);
+ const char *format;
+
+ printf("\n%.80s\n", graph_dotted_line);
+ printf(" %-16s | Total alloc (KB) | Hits | Order | Mig.type | GFP flags\n",
+ use_pfn ? "PFN" : "Page");
+ printf("%.80s\n", graph_dotted_line);
+
+ if (use_pfn)
+ format = " %16llu | %'16llu | %'9d | %5d | %8s | %08lx\n";
+ else
+ format = " %016llx | %'16llu | %'9d | %5d | %8s | %08lx\n";
+
+ while (next && n_lines--) {
+ struct page_stat *data;
+
+ data = rb_entry(next, struct page_stat, node);
+
+ printf(format, (unsigned long long)data->page,
+ (unsigned long long)data->alloc_bytes / 1024,
+ data->nr_alloc, data->order,
+ migrate_type_str[data->migrate_type],
+ (unsigned long)data->gfp_flags);
+
+ next = rb_next(next);
+ }
+
+ if (n_lines == -1)
+ printf(" ... | ... | ... | ... | ... | ... \n");
+
+ printf("%.80s\n", graph_dotted_line);
+}
+
+static void print_slab_summary(void)
{
- printf("\nSUMMARY\n=======\n");
+ printf("\nSUMMARY (SLAB allocator)");
+ printf("\n========================\n");
printf("Total bytes requested: %'lu\n", total_requested);
printf("Total bytes allocated: %'lu\n", total_allocated);
printf("Total bytes wasted on internal fragmentation: %'lu\n",
@@ -335,13 +626,73 @@ static void print_summary(void)
printf("Cross CPU allocations: %'lu/%'lu\n", nr_cross_allocs, nr_allocs);
}
-static void print_result(struct perf_session *session)
+static void print_page_summary(void)
+{
+ int o, m;
+ u64 nr_alloc_freed = nr_page_frees - nr_page_nomatch;
+ u64 total_alloc_freed_bytes = total_page_free_bytes - total_page_nomatch_bytes;
+
+ printf("\nSUMMARY (page allocator)");
+ printf("\n========================\n");
+ printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total allocation requests",
+ nr_page_allocs, total_page_alloc_bytes / 1024);
+ printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total free requests",
+ nr_page_frees, total_page_free_bytes / 1024);
+ printf("\n");
+
+ printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total alloc+freed requests",
+ nr_alloc_freed, (total_alloc_freed_bytes) / 1024);
+ printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total alloc-only requests",
+ nr_page_allocs - nr_alloc_freed,
+ (total_page_alloc_bytes - total_alloc_freed_bytes) / 1024);
+ printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total free-only requests",
+ nr_page_nomatch, total_page_nomatch_bytes / 1024);
+ printf("\n");
+
+ printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total allocation failures",
+ nr_page_fails, total_page_fail_bytes / 1024);
+ printf("\n");
+
+ printf("%5s %12s %12s %12s %12s %12s\n", "Order", "Unmovable",
+ "Reclaimable", "Movable", "Reserved", "CMA/Isolated");
+ printf("%.5s %.12s %.12s %.12s %.12s %.12s\n", graph_dotted_line,
+ graph_dotted_line, graph_dotted_line, graph_dotted_line,
+ graph_dotted_line, graph_dotted_line);
+
+ for (o = 0; o < MAX_PAGE_ORDER; o++) {
+ printf("%5d", o);
+ for (m = 0; m < MAX_MIGRATE_TYPES - 1; m++) {
+ if (order_stats[o][m])
+ printf(" %'12d", order_stats[o][m]);
+ else
+ printf(" %12c", '.');
+ }
+ printf("\n");
+ }
+}
+
+static void print_slab_result(struct perf_session *session)
{
if (caller_flag)
- __print_result(&root_caller_sorted, session, caller_lines, 1);
+ __print_slab_result(&root_caller_sorted, session, caller_lines, 1);
+ if (alloc_flag)
+ __print_slab_result(&root_alloc_sorted, session, alloc_lines, 0);
+ print_slab_summary();
+}
+
+static void print_page_result(struct perf_session *session)
+{
if (alloc_flag)
- __print_result(&root_alloc_sorted, session, alloc_lines, 0);
- print_summary();
+ __print_page_result(&page_alloc_sorted, session, alloc_lines);
+ print_page_summary();
+}
+
+static void print_result(struct perf_session *session)
+{
+ if (kmem_slab)
+ print_slab_result(session);
+ if (kmem_page)
+ print_page_result(session);
}
struct sort_dimension {
@@ -353,8 +704,8 @@ struct sort_dimension {
static LIST_HEAD(caller_sort);
static LIST_HEAD(alloc_sort);
-static void sort_insert(struct rb_root *root, struct alloc_stat *data,
- struct list_head *sort_list)
+static void sort_slab_insert(struct rb_root *root, struct alloc_stat *data,
+ struct list_head *sort_list)
{
struct rb_node **new = &(root->rb_node);
struct rb_node *parent = NULL;
@@ -383,8 +734,8 @@ static void sort_insert(struct rb_root *root, struct alloc_stat *data,
rb_insert_color(&data->node, root);
}
-static void __sort_result(struct rb_root *root, struct rb_root *root_sorted,
- struct list_head *sort_list)
+static void __sort_slab_result(struct rb_root *root, struct rb_root *root_sorted,
+ struct list_head *sort_list)
{
struct rb_node *node;
struct alloc_stat *data;
@@ -396,26 +747,79 @@ static void __sort_result(struct rb_root *root, struct rb_root *root_sorted,
rb_erase(node, root);
data = rb_entry(node, struct alloc_stat, node);
- sort_insert(root_sorted, data, sort_list);
+ sort_slab_insert(root_sorted, data, sort_list);
+ }
+}
+
+static void sort_page_insert(struct rb_root *root, struct page_stat *data)
+{
+ struct rb_node **new = &root->rb_node;
+ struct rb_node *parent = NULL;
+
+ while (*new) {
+ struct page_stat *this;
+ int cmp = 0;
+
+ this = rb_entry(*new, struct page_stat, node);
+ parent = *new;
+
+ /* TODO: support more sort key */
+ cmp = data->alloc_bytes - this->alloc_bytes;
+
+ if (cmp > 0)
+ new = &parent->rb_left;
+ else
+ new = &parent->rb_right;
+ }
+
+ rb_link_node(&data->node, parent, new);
+ rb_insert_color(&data->node, root);
+}
+
+static void __sort_page_result(struct rb_root *root, struct rb_root *root_sorted)
+{
+ struct rb_node *node;
+ struct page_stat *data;
+
+ for (;;) {
+ node = rb_first(root);
+ if (!node)
+ break;
+
+ rb_erase(node, root);
+ data = rb_entry(node, struct page_stat, node);
+ sort_page_insert(root_sorted, data);
}
}
static void sort_result(void)
{
- __sort_result(&root_alloc_stat, &root_alloc_sorted, &alloc_sort);
- __sort_result(&root_caller_stat, &root_caller_sorted, &caller_sort);
+ if (kmem_slab) {
+ __sort_slab_result(&root_alloc_stat, &root_alloc_sorted,
+ &alloc_sort);
+ __sort_slab_result(&root_caller_stat, &root_caller_sorted,
+ &caller_sort);
+ }
+ if (kmem_page) {
+ __sort_page_result(&page_alloc_tree, &page_alloc_sorted);
+ }
}
static int __cmd_kmem(struct perf_session *session)
{
int err = -EINVAL;
+ struct perf_evsel *evsel;
const struct perf_evsel_str_handler kmem_tracepoints[] = {
+ /* slab allocator */
{ "kmem:kmalloc", perf_evsel__process_alloc_event, },
{ "kmem:kmem_cache_alloc", perf_evsel__process_alloc_event, },
{ "kmem:kmalloc_node", perf_evsel__process_alloc_node_event, },
{ "kmem:kmem_cache_alloc_node", perf_evsel__process_alloc_node_event, },
{ "kmem:kfree", perf_evsel__process_free_event, },
{ "kmem:kmem_cache_free", perf_evsel__process_free_event, },
+ /* page allocator */
+ { "kmem:mm_page_alloc", perf_evsel__process_page_alloc_event, },
+ { "kmem:mm_page_free", perf_evsel__process_page_free_event, },
};
if (!perf_session__has_traces(session, "kmem record"))
@@ -426,10 +830,20 @@ static int __cmd_kmem(struct perf_session *session)
goto out;
}
+ evlist__for_each(session->evlist, evsel) {
+ if (!strcmp(perf_evsel__name(evsel), "kmem:mm_page_alloc") &&
+ perf_evsel__field(evsel, "pfn")) {
+ use_pfn = true;
+ break;
+ }
+ }
+
setup_pager();
err = perf_session__process_events(session);
- if (err != 0)
+ if (err != 0) {
+ pr_err("error during process events: %d\n", err);
goto out;
+ }
sort_result();
print_result(session);
out:
@@ -612,6 +1026,22 @@ static int parse_alloc_opt(const struct option *opt __maybe_unused,
return 0;
}
+static int parse_slab_opt(const struct option *opt __maybe_unused,
+ const char *arg __maybe_unused,
+ int unset __maybe_unused)
+{
+ kmem_slab = (kmem_page + 1);
+ return 0;
+}
+
+static int parse_page_opt(const struct option *opt __maybe_unused,
+ const char *arg __maybe_unused,
+ int unset __maybe_unused)
+{
+ kmem_page = (kmem_slab + 1);
+ return 0;
+}
+
static int parse_line_opt(const struct option *opt __maybe_unused,
const char *arg, int unset __maybe_unused)
{
@@ -634,6 +1064,8 @@ static int __cmd_record(int argc, const char **argv)
{
const char * const record_args[] = {
"record", "-a", "-R", "-c", "1",
+ };
+ const char * const slab_events[] = {
"-e", "kmem:kmalloc",
"-e", "kmem:kmalloc_node",
"-e", "kmem:kfree",
@@ -641,10 +1073,19 @@ static int __cmd_record(int argc, const char **argv)
"-e", "kmem:kmem_cache_alloc_node",
"-e", "kmem:kmem_cache_free",
};
+ const char * const page_events[] = {
+ "-e", "kmem:mm_page_alloc",
+ "-e", "kmem:mm_page_free",
+ };
unsigned int rec_argc, i, j;
const char **rec_argv;
rec_argc = ARRAY_SIZE(record_args) + argc - 1;
+ if (kmem_slab)
+ rec_argc += ARRAY_SIZE(slab_events);
+ if (kmem_page)
+ rec_argc += ARRAY_SIZE(page_events);
+
rec_argv = calloc(rec_argc + 1, sizeof(char *));
if (rec_argv == NULL)
@@ -653,6 +1094,15 @@ static int __cmd_record(int argc, const char **argv)
for (i = 0; i < ARRAY_SIZE(record_args); i++)
rec_argv[i] = strdup(record_args[i]);
+ if (kmem_slab) {
+ for (j = 0; j < ARRAY_SIZE(slab_events); j++, i++)
+ rec_argv[i] = strdup(slab_events[j]);
+ }
+ if (kmem_page) {
+ for (j = 0; j < ARRAY_SIZE(page_events); j++, i++)
+ rec_argv[i] = strdup(page_events[j]);
+ }
+
for (j = 1; j < (unsigned int)argc; j++, i++)
rec_argv[i] = argv[j];
@@ -679,6 +1129,10 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_CALLBACK('l', "line", NULL, "num", "show n lines", parse_line_opt),
OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"),
OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"),
+ OPT_CALLBACK_NOOPT(0, "slab", NULL, NULL, "Analyze slab allocator",
+ parse_slab_opt),
+ OPT_CALLBACK_NOOPT(0, "page", NULL, NULL, "Analyze page allocator",
+ parse_page_opt),
OPT_END()
};
const char *const kmem_subcommands[] = { "record", "stat", NULL };
@@ -695,6 +1149,9 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
if (!argc)
usage_with_options(kmem_usage, kmem_options);
+ if (kmem_slab == 0 && kmem_page == 0)
+ kmem_slab = 1; /* for backward compatibility */
+
if (!strncmp(argv[0], "rec", 3)) {
symbol__init(NULL);
return __cmd_record(argc, argv);
@@ -706,6 +1163,17 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
if (session == NULL)
return -1;
+ if (kmem_page) {
+ struct perf_evsel *evsel = perf_evlist__first(session->evlist);
+
+ if (evsel == NULL || evsel->tp_format == NULL) {
+ pr_err("invalid event found.. aborting\n");
+ return -1;
+ }
+
+ kmem_page_size = pevent_get_page_size(evsel->tp_format->pevent);
+ }
+
symbol__init(&session->header.env);
if (!strcmp(argv[0], "stat")) {
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 30545ce2c712..d8bb616ff57c 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -332,6 +332,7 @@ static int find_alternative_probe_point(struct debuginfo *dinfo,
else {
result->offset += pp->offset;
result->line += pp->line;
+ result->retprobe = pp->retprobe;
ret = 0;
}
@@ -654,65 +655,6 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
return ntevs;
}
-/*
- * Find a src file from a DWARF tag path. Prepend optional source path prefix
- * and chop off leading directories that do not exist. Result is passed back as
- * a newly allocated path on success.
- * Return 0 if file was found and readable, -errno otherwise.
- */
-static int get_real_path(const char *raw_path, const char *comp_dir,
- char **new_path)
-{
- const char *prefix = symbol_conf.source_prefix;
-
- if (!prefix) {
- if (raw_path[0] != '/' && comp_dir)
- /* If not an absolute path, try to use comp_dir */
- prefix = comp_dir;
- else {
- if (access(raw_path, R_OK) == 0) {
- *new_path = strdup(raw_path);
- return *new_path ? 0 : -ENOMEM;
- } else
- return -errno;
- }
- }
-
- *new_path = malloc((strlen(prefix) + strlen(raw_path) + 2));
- if (!*new_path)
- return -ENOMEM;
-
- for (;;) {
- sprintf(*new_path, "%s/%s", prefix, raw_path);
-
- if (access(*new_path, R_OK) == 0)
- return 0;
-
- if (!symbol_conf.source_prefix) {
- /* In case of searching comp_dir, don't retry */
- zfree(new_path);
- return -errno;
- }
-
- switch (errno) {
- case ENAMETOOLONG:
- case ENOENT:
- case EROFS:
- case EFAULT:
- raw_path = strchr(++raw_path, '/');
- if (!raw_path) {
- zfree(new_path);
- return -ENOENT;
- }
- continue;
-
- default:
- zfree(new_path);
- return -errno;
- }
- }
-}
-
#define LINEBUF_SIZE 256
#define NR_ADDITIONAL_LINES 2
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index e3074230f236..b5bf9d5efeaf 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -855,11 +855,22 @@ static int probe_point_lazy_walker(const char *fname, int lineno,
static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf)
{
int ret = 0;
+ char *fpath;
if (intlist__empty(pf->lcache)) {
+ const char *comp_dir;
+
+ comp_dir = cu_get_comp_dir(&pf->cu_die);
+ ret = get_real_path(pf->fname, comp_dir, &fpath);
+ if (ret < 0) {
+ pr_warning("Failed to find source file path.\n");
+ return ret;
+ }
+
/* Matching lazy line pattern */
- ret = find_lazy_match_lines(pf->lcache, pf->fname,
+ ret = find_lazy_match_lines(pf->lcache, fpath,
pf->pev->point.lazy_line);
+ free(fpath);
if (ret <= 0)
return ret;
}
@@ -1055,7 +1066,7 @@ static int debuginfo__find_probes(struct debuginfo *dbg,
if (pp->function)
ret = find_probe_point_by_func(pf);
else if (pp->lazy_line)
- ret = find_probe_point_lazy(NULL, pf);
+ ret = find_probe_point_lazy(&pf->cu_die, pf);
else {
pf->lno = pp->line;
ret = find_probe_point_by_line(pf);
@@ -1622,3 +1633,61 @@ found:
return (ret < 0) ? ret : lf.found;
}
+/*
+ * Find a src file from a DWARF tag path. Prepend optional source path prefix
+ * and chop off leading directories that do not exist. Result is passed back as
+ * a newly allocated path on success.
+ * Return 0 if file was found and readable, -errno otherwise.
+ */
+int get_real_path(const char *raw_path, const char *comp_dir,
+ char **new_path)
+{
+ const char *prefix = symbol_conf.source_prefix;
+
+ if (!prefix) {
+ if (raw_path[0] != '/' && comp_dir)
+ /* If not an absolute path, try to use comp_dir */
+ prefix = comp_dir;
+ else {
+ if (access(raw_path, R_OK) == 0) {
+ *new_path = strdup(raw_path);
+ return *new_path ? 0 : -ENOMEM;
+ } else
+ return -errno;
+ }
+ }
+
+ *new_path = malloc((strlen(prefix) + strlen(raw_path) + 2));
+ if (!*new_path)
+ return -ENOMEM;
+
+ for (;;) {
+ sprintf(*new_path, "%s/%s", prefix, raw_path);
+
+ if (access(*new_path, R_OK) == 0)
+ return 0;
+
+ if (!symbol_conf.source_prefix) {
+ /* In case of searching comp_dir, don't retry */
+ zfree(new_path);
+ return -errno;
+ }
+
+ switch (errno) {
+ case ENAMETOOLONG:
+ case ENOENT:
+ case EROFS:
+ case EFAULT:
+ raw_path = strchr(++raw_path, '/');
+ if (!raw_path) {
+ zfree(new_path);
+ return -ENOENT;
+ }
+ continue;
+
+ default:
+ zfree(new_path);
+ return -errno;
+ }
+ }
+}
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
index 92590b2c7e1c..ebf8c8c81453 100644
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h
@@ -55,6 +55,10 @@ extern int debuginfo__find_available_vars_at(struct debuginfo *dbg,
struct variable_list **vls,
int max_points, bool externs);
+/* Find a src file from a DWARF tag path */
+int get_real_path(const char *raw_path, const char *comp_dir,
+ char **new_path);
+
struct probe_finder {
struct perf_probe_event *pev; /* Target probe event */
diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile
index d1b3a361e526..4039854560d0 100644
--- a/tools/power/x86/turbostat/Makefile
+++ b/tools/power/x86/turbostat/Makefile
@@ -1,8 +1,12 @@
CC = $(CROSS_COMPILE)gcc
-BUILD_OUTPUT := $(PWD)
+BUILD_OUTPUT := $(CURDIR)
PREFIX := /usr
DESTDIR :=
+ifeq ("$(origin O)", "command line")
+ BUILD_OUTPUT := $(O)
+endif
+
turbostat : turbostat.c
CFLAGS += -Wall
CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/uapi/asm/msr-index.h"'
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index feea7ad9500b..05b8fc38dc8b 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -20,9 +20,11 @@ upon its completion.
The second method is to omit the command,
and turbostat displays statistics every 5 seconds.
The 5-second interval can be changed using the --interval option.
-
+.PP
Some information is not available on older processors.
.SS Options
+Options can be specified with a single or double '-', and only as much of the option
+name as necessary to disambiguate it from others is necessary. Note that options are case-sensitive.
\fB--Counter MSR#\fP shows the delta of the specified 64-bit MSR counter.
.PP
\fB--counter MSR#\fP shows the delta of the specified 32-bit MSR counter.
@@ -55,16 +57,20 @@ more than once may also enable internal turbostat debug information.
The \fBcommand\fP parameter forks \fBcommand\fP, and upon its exit,
displays the statistics gathered since it was forked.
.PP
-.SH FIELD DESCRIPTIONS
+.SH DEFAULT FIELD DESCRIPTIONS
.nf
-\fBPackage\fP processor package number.
-\fBCore\fP processor core number.
-\fBCPU\fP Linux CPU (logical processor) number.
-Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading Technology.
+\fBCPU\fP Linux CPU (logical processor) number. Yes, it is okay that on many systems the CPUs are not listed in numerical order -- for efficiency reasons, turbostat runs in topology order, so HT siblings appear together.
\fBAVG_MHz\fP number of cycles executed divided by time elapsed.
\fB%Busy\fP percent of the interval that the CPU retired instructions, aka. % of time in "C0" state.
\fBBzy_MHz\fP average clock rate while the CPU was busy (in "c0" state).
\fBTSC_MHz\fP average MHz that the TSC ran during the entire interval.
+.fi
+.PP
+.SH DEBUG FIELD DESCRIPTIONS
+.nf
+\fBPackage\fP processor package number.
+\fBCore\fP processor core number.
+Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading Technology (HT).
\fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states.
\fBCoreTmp\fP Degrees Celsius reported by the per-core Digital Thermal Sensor.
\fBPkgTtmp\fP Degrees Celsius reported by the per-package Package Thermal Monitor.
@@ -81,63 +87,76 @@ Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading T
Without any parameters, turbostat displays statistics ever 5 seconds.
(override interval with "-i sec" option, or specify a command
for turbostat to fork).
+.nf
+[root@hsw]# ./turbostat
+ CPU Avg_MHz %Busy Bzy_MHz TSC_MHz
+ - 488 12.51 3898 3498
+ 0 0 0.01 3885 3498
+ 4 3897 99.99 3898 3498
+ 1 0 0.00 3861 3498
+ 5 0 0.00 3882 3498
+ 2 1 0.02 3894 3498
+ 6 2 0.06 3898 3498
+ 3 0 0.00 3849 3498
+ 7 0 0.00 3877 3498
+
+.fi
+.SH DEBUG EXAMPLE
+The "--debug" option prints additional system information before measurements:
The first row of statistics is a summary for the entire system.
For residency % columns, the summary is a weighted average.
For Temperature columns, the summary is the column maximum.
For Watts columns, the summary is a system total.
Subsequent rows show per-CPU statistics.
-
-.nf
-[root@ivy]# ./turbostat
- Core CPU Avg_MHz %Busy Bzy_MHz TSC_MHz SMI CPU%c1 CPU%c3 CPU%c6 CPU%c7 CoreTmp PkgTmp Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt
- - - 6 0.36 1596 3492 0 0.59 0.01 99.04 0.00 23 24 23.82 0.01 72.47 0.00 6.40 1.01 0.00
- 0 0 9 0.58 1596 3492 0 0.28 0.01 99.13 0.00 23 24 23.82 0.01 72.47 0.00 6.40 1.01 0.00
- 0 4 1 0.07 1596 3492 0 0.79
- 1 1 10 0.65 1596 3492 0 0.59 0.00 98.76 0.00 23
- 1 5 5 0.28 1596 3492 0 0.95
- 2 2 10 0.66 1596 3492 0 0.41 0.01 98.92 0.00 23
- 2 6 2 0.10 1597 3492 0 0.97
- 3 3 3 0.20 1596 3492 0 0.44 0.00 99.37 0.00 23
- 3 7 5 0.31 1596 3492 0 0.33
-.fi
-.SH DEBUG EXAMPLE
-The "--debug" option prints additional system information before measurements:
-
.nf
-turbostat version 4.0 10-Feb, 2015 - Len Brown <lenb@kernel.org>
-CPUID(0): GenuineIntel 13 CPUID levels; family:model:stepping 0x6:3a:9 (6:58:9)
+turbostat version 4.1 10-Feb, 2015 - Len Brown <lenb@kernel.org>
+CPUID(0): GenuineIntel 13 CPUID levels; family:model:stepping 0x6:3c:3 (6:60:3)
CPUID(6): APERF, DTS, PTM, EPB
-RAPL: 851 sec. Joule Counter Range, at 77 Watts
-cpu0: MSR_NHM_PLATFORM_INFO: 0x81010f0012300
-16 * 100 = 1600 MHz max efficiency
+RAPL: 3121 sec. Joule Counter Range, at 84 Watts
+cpu0: MSR_NHM_PLATFORM_INFO: 0x80838f3012300
+8 * 100 = 800 MHz max efficiency
35 * 100 = 3500 MHz TSC frequency
-cpu0: MSR_IA32_POWER_CTL: 0x0014005d (C1E auto-promotion: DISabled)
-cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x1e008402 (UNdemote-C3, UNdemote-C1, demote-C3, demote-C1, locked: pkg-cstate-limit=2: pc6n)
+cpu0: MSR_IA32_POWER_CTL: 0x0004005d (C1E auto-promotion: DISabled)
+cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x1e000400 (UNdemote-C3, UNdemote-C1, demote-C3, demote-C1, UNlocked: pkg-cstate-limit=0: pc0)
cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x25262727
37 * 100 = 3700 MHz max turbo 4 active cores
38 * 100 = 3800 MHz max turbo 3 active cores
39 * 100 = 3900 MHz max turbo 2 active cores
39 * 100 = 3900 MHz max turbo 1 active cores
cpu0: MSR_IA32_ENERGY_PERF_BIAS: 0x00000006 (balanced)
-cpu0: MSR_RAPL_POWER_UNIT: 0x000a1003 (0.125000 Watts, 0.000015 Joules, 0.000977 sec.)
-cpu0: MSR_PKG_POWER_INFO: 0x01e00268 (77 W TDP, RAPL 60 - 0 W, 0.000000 sec.)
-cpu0: MSR_PKG_POWER_LIMIT: 0x30000148268 (UNlocked)
-cpu0: PKG Limit #1: ENabled (77.000000 Watts, 1.000000 sec, clamp DISabled)
-cpu0: PKG Limit #2: DISabled (96.000000 Watts, 0.000977* sec, clamp DISabled)
+cpu0: MSR_CORE_PERF_LIMIT_REASONS, 0x31200000 (Active: ) (Logged: Auto-HWP, Amps, MultiCoreTurbo, Transitions, )
+cpu0: MSR_GFX_PERF_LIMIT_REASONS, 0x00000000 (Active: ) (Logged: )
+cpu0: MSR_RING_PERF_LIMIT_REASONS, 0x0d000000 (Active: ) (Logged: Amps, PkgPwrL1, PkgPwrL2, )
+cpu0: MSR_RAPL_POWER_UNIT: 0x000a0e03 (0.125000 Watts, 0.000061 Joules, 0.000977 sec.)
+cpu0: MSR_PKG_POWER_INFO: 0x000002a0 (84 W TDP, RAPL 0 - 0 W, 0.000000 sec.)
+cpu0: MSR_PKG_POWER_LIMIT: 0x428348001a82a0 (UNlocked)
+cpu0: PKG Limit #1: ENabled (84.000000 Watts, 8.000000 sec, clamp DISabled)
+cpu0: PKG Limit #2: ENabled (105.000000 Watts, 0.002441* sec, clamp DISabled)
cpu0: MSR_PP0_POLICY: 0
cpu0: MSR_PP0_POWER_LIMIT: 0x00000000 (UNlocked)
cpu0: Cores Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled)
cpu0: MSR_PP1_POLICY: 0
cpu0: MSR_PP1_POWER_LIMIT: 0x00000000 (UNlocked)
cpu0: GFX Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled)
-cpu0: MSR_IA32_TEMPERATURE_TARGET: 0x00691400 (105 C)
-cpu0: MSR_IA32_PACKAGE_THERM_STATUS: 0x884e0000 (27 C)
-cpu0: MSR_IA32_THERM_STATUS: 0x88580000 (17 C +/- 1)
-cpu1: MSR_IA32_THERM_STATUS: 0x885a0000 (15 C +/- 1)
-cpu2: MSR_IA32_THERM_STATUS: 0x88570000 (18 C +/- 1)
-cpu3: MSR_IA32_THERM_STATUS: 0x884e0000 (27 C +/- 1)
- ...
+cpu0: MSR_IA32_TEMPERATURE_TARGET: 0x00641400 (100 C)
+cpu0: MSR_IA32_PACKAGE_THERM_STATUS: 0x88340800 (48 C)
+cpu0: MSR_IA32_THERM_STATUS: 0x88340000 (48 C +/- 1)
+cpu1: MSR_IA32_THERM_STATUS: 0x88440000 (32 C +/- 1)
+cpu2: MSR_IA32_THERM_STATUS: 0x88450000 (31 C +/- 1)
+cpu3: MSR_IA32_THERM_STATUS: 0x88490000 (27 C +/- 1)
+ Core CPU Avg_MHz %Busy Bzy_MHz TSC_MHz SMI CPU%c1 CPU%c3 CPU%c6 CPU%c7 CoreTmp PkgTmp PkgWatt CorWatt GFXWatt
+ - - 493 12.64 3898 3498 0 12.64 0.00 0.00 74.72 47 47 21.62 13.74 0.00
+ 0 0 4 0.11 3894 3498 0 99.89 0.00 0.00 0.00 47 47 21.62 13.74 0.00
+ 0 4 3897 99.98 3898 3498 0 0.02
+ 1 1 7 0.17 3887 3498 0 0.04 0.00 0.00 99.79 32
+ 1 5 0 0.00 3885 3498 0 0.21
+ 2 2 29 0.76 3895 3498 0 0.10 0.01 0.01 99.13 32
+ 2 6 2 0.06 3896 3498 0 0.80
+ 3 3 1 0.02 3832 3498 0 0.03 0.00 0.00 99.95 28
+ 3 7 0 0.00 3879 3498 0 0.04
+^C
+
.fi
The \fBmax efficiency\fP frequency, a.k.a. Low Frequency Mode, is the frequency
available at the minimum package voltage. The \fBTSC frequency\fP is the base
@@ -147,6 +166,9 @@ should be sustainable on all CPUs indefinitely, given nominal power and cooling.
The remaining rows show what maximum turbo frequency is possible
depending on the number of idle cores. Note that not all information is
available on all processors.
+.PP
+The --debug option adds additional columns to the measurement ouput, including CPU idle power-state residency processor temperature sensor readinds.
+See the field definitions above.
.SH FORK EXAMPLE
If turbostat is invoked with a command, it will fork that command
and output the statistics gathered when the command exits.
@@ -154,27 +176,23 @@ eg. Here a cycle soaker is run on 1 CPU (see %c0) for a few seconds
until ^C while the other CPUs are mostly idle:
.nf
-root@ivy: turbostat cat /dev/zero > /dev/null
+root@hsw: turbostat cat /dev/zero > /dev/null
^C
- Core CPU Avg_MHz %Busy Bzy_MHz TSC_MHz SMI CPU%c1 CPU%c3 CPU%c6 CPU%c7 CoreTmp PkgTmp Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt
- - - 496 12.75 3886 3492 0 13.16 0.04 74.04 0.00 36 36 0.00 0.00 0.00 0.00 23.15 17.65 0.00
- 0 0 22 0.57 3830 3492 0 0.83 0.02 98.59 0.00 27 36 0.00 0.00 0.00 0.00 23.15 17.65 0.00
- 0 4 9 0.24 3829 3492 0 1.15
- 1 1 4 0.09 3783 3492 0 99.91 0.00 0.00 0.00 36
- 1 5 3880 99.82 3888 3492 0 0.18
- 2 2 17 0.44 3813 3492 0 0.77 0.04 98.75 0.00 28
- 2 6 12 0.32 3823 3492 0 0.89
- 3 3 16 0.43 3844 3492 0 0.63 0.11 98.84 0.00 30
- 3 7 4 0.11 3827 3492 0 0.94
-30.372243 sec
+ CPU Avg_MHz %Busy Bzy_MHz TSC_MHz
+ - 482 12.51 3854 3498
+ 0 0 0.01 1960 3498
+ 4 0 0.00 2128 3498
+ 1 0 0.00 3003 3498
+ 5 3854 99.98 3855 3498
+ 2 0 0.01 3504 3498
+ 6 3 0.08 3884 3498
+ 3 0 0.00 2553 3498
+ 7 0 0.00 2126 3498
+10.783983 sec
.fi
-Above the cycle soaker drives cpu5 up its 3.8 GHz turbo limit
-while the other processors are generally in various states of idle.
-
-Note that cpu1 and cpu5 are HT siblings within core1.
-As cpu5 is very busy, it prevents its sibling, cpu1,
-from entering a c-state deeper than c1.
+Above the cycle soaker drives cpu5 up its 3.9 GHz turbo limit.
+The first row shows the average MHz and %Busy across all the processors in the system.
Note that the Avg_MHz column reflects the total number of cycles executed
divided by the measurement interval. If the %Busy column is 100%,
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 2d089cac8580..bac98ca3d4ca 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -57,6 +57,7 @@ unsigned int do_pc3;
unsigned int do_pc6;
unsigned int do_pc7;
unsigned int do_c8_c9_c10;
+unsigned int do_skl_residency;
unsigned int do_slm_cstates;
unsigned int use_c1_residency_msr;
unsigned int has_aperf;
@@ -65,8 +66,6 @@ unsigned int units = 1000000; /* MHz etc */
unsigned int genuine_intel;
unsigned int has_invariant_tsc;
unsigned int do_nhm_platform_info;
-unsigned int do_nhm_turbo_ratio_limit;
-unsigned int do_ivt_turbo_ratio_limit;
unsigned int extra_msr_offset32;
unsigned int extra_msr_offset64;
unsigned int extra_delta_offset32;
@@ -84,11 +83,14 @@ unsigned int do_dts;
unsigned int do_ptm;
unsigned int tcc_activation_temp;
unsigned int tcc_activation_temp_override;
-double rapl_power_units, rapl_energy_units, rapl_time_units;
+double rapl_power_units, rapl_time_units;
+double rapl_dram_energy_units, rapl_energy_units;
double rapl_joule_counter_range;
unsigned int do_core_perf_limit_reasons;
unsigned int do_gfx_perf_limit_reasons;
unsigned int do_ring_perf_limit_reasons;
+unsigned int crystal_hz;
+unsigned long long tsc_hz;
#define RAPL_PKG (1 << 0)
/* 0x610 MSR_PKG_POWER_LIMIT */
@@ -101,18 +103,18 @@ unsigned int do_ring_perf_limit_reasons;
#define RAPL_DRAM (1 << 3)
/* 0x618 MSR_DRAM_POWER_LIMIT */
/* 0x619 MSR_DRAM_ENERGY_STATUS */
- /* 0x61c MSR_DRAM_POWER_INFO */
#define RAPL_DRAM_PERF_STATUS (1 << 4)
/* 0x61b MSR_DRAM_PERF_STATUS */
+#define RAPL_DRAM_POWER_INFO (1 << 5)
+ /* 0x61c MSR_DRAM_POWER_INFO */
-#define RAPL_CORES (1 << 5)
+#define RAPL_CORES (1 << 6)
/* 0x638 MSR_PP0_POWER_LIMIT */
/* 0x639 MSR_PP0_ENERGY_STATUS */
-#define RAPL_CORE_POLICY (1 << 6)
+#define RAPL_CORE_POLICY (1 << 7)
/* 0x63a MSR_PP0_POLICY */
-
-#define RAPL_GFX (1 << 7)
+#define RAPL_GFX (1 << 8)
/* 0x640 MSR_PP1_POWER_LIMIT */
/* 0x641 MSR_PP1_ENERGY_STATUS */
/* 0x642 MSR_PP1_POLICY */
@@ -159,6 +161,10 @@ struct pkg_data {
unsigned long long pc8;
unsigned long long pc9;
unsigned long long pc10;
+ unsigned long long pkg_wtd_core_c0;
+ unsigned long long pkg_any_core_c0;
+ unsigned long long pkg_any_gfxe_c0;
+ unsigned long long pkg_both_core_gfxe_c0;
unsigned int package_id;
unsigned int energy_pkg; /* MSR_PKG_ENERGY_STATUS */
unsigned int energy_dram; /* MSR_DRAM_ENERGY_STATUS */
@@ -292,8 +298,7 @@ void print_header(void)
if (has_aperf)
outp += sprintf(outp, " Bzy_MHz");
outp += sprintf(outp, " TSC_MHz");
- if (do_smi)
- outp += sprintf(outp, " SMI");
+
if (extra_delta_offset32)
outp += sprintf(outp, " count 0x%03X", extra_delta_offset32);
if (extra_delta_offset64)
@@ -302,6 +307,13 @@ void print_header(void)
outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset32);
if (extra_msr_offset64)
outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset64);
+
+ if (!debug)
+ goto done;
+
+ if (do_smi)
+ outp += sprintf(outp, " SMI");
+
if (do_nhm_cstates)
outp += sprintf(outp, " CPU%%c1");
if (do_nhm_cstates && !do_slm_cstates)
@@ -316,6 +328,13 @@ void print_header(void)
if (do_ptm)
outp += sprintf(outp, " PkgTmp");
+ if (do_skl_residency) {
+ outp += sprintf(outp, " Totl%%C0");
+ outp += sprintf(outp, " Any%%C0");
+ outp += sprintf(outp, " GFX%%C0");
+ outp += sprintf(outp, " CPUGFX%%");
+ }
+
if (do_pc2)
outp += sprintf(outp, " Pkg%%pc2");
if (do_pc3)
@@ -359,6 +378,7 @@ void print_header(void)
outp += sprintf(outp, " time");
}
+ done:
outp += sprintf(outp, "\n");
}
@@ -396,6 +416,12 @@ int dump_counters(struct thread_data *t, struct core_data *c,
if (p) {
outp += sprintf(outp, "package: %d\n", p->package_id);
+
+ outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0);
+ outp += sprintf(outp, "Any cores: %016llX\n", p->pkg_any_core_c0);
+ outp += sprintf(outp, "Any GFX: %016llX\n", p->pkg_any_gfxe_c0);
+ outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0);
+
outp += sprintf(outp, "pc2: %016llX\n", p->pc2);
if (do_pc3)
outp += sprintf(outp, "pc3: %016llX\n", p->pc3);
@@ -487,10 +513,6 @@ int format_counters(struct thread_data *t, struct core_data *c,
/* TSC_MHz */
outp += sprintf(outp, "%8.0f", 1.0 * t->tsc/units/interval_float);
- /* SMI */
- if (do_smi)
- outp += sprintf(outp, "%8d", t->smi_count);
-
/* delta */
if (extra_delta_offset32)
outp += sprintf(outp, " %11llu", t->extra_delta32);
@@ -506,6 +528,13 @@ int format_counters(struct thread_data *t, struct core_data *c,
if (extra_msr_offset64)
outp += sprintf(outp, " 0x%016llx", t->extra_msr64);
+ if (!debug)
+ goto done;
+
+ /* SMI */
+ if (do_smi)
+ outp += sprintf(outp, "%8d", t->smi_count);
+
if (do_nhm_cstates) {
if (!skip_c1)
outp += sprintf(outp, "%8.2f", 100.0 * t->c1/t->tsc);
@@ -531,9 +560,18 @@ int format_counters(struct thread_data *t, struct core_data *c,
if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
goto done;
+ /* PkgTmp */
if (do_ptm)
outp += sprintf(outp, "%8d", p->pkg_temp_c);
+ /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
+ if (do_skl_residency) {
+ outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_wtd_core_c0/t->tsc);
+ outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_any_core_c0/t->tsc);
+ outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_any_gfxe_c0/t->tsc);
+ outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_both_core_gfxe_c0/t->tsc);
+ }
+
if (do_pc2)
outp += sprintf(outp, "%8.2f", 100.0 * p->pc2/t->tsc);
if (do_pc3)
@@ -565,7 +603,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
if (do_rapl & RAPL_GFX)
outp += sprintf(outp, fmt8, p->energy_gfx * rapl_energy_units / interval_float);
if (do_rapl & RAPL_DRAM)
- outp += sprintf(outp, fmt8, p->energy_dram * rapl_energy_units / interval_float);
+ outp += sprintf(outp, fmt8, p->energy_dram * rapl_dram_energy_units / interval_float);
if (do_rapl & RAPL_PKG_PERF_STATUS)
outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
if (do_rapl & RAPL_DRAM_PERF_STATUS)
@@ -582,7 +620,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
p->energy_gfx * rapl_energy_units);
if (do_rapl & RAPL_DRAM)
outp += sprintf(outp, fmt8,
- p->energy_dram * rapl_energy_units);
+ p->energy_dram * rapl_dram_energy_units);
if (do_rapl & RAPL_PKG_PERF_STATUS)
outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
if (do_rapl & RAPL_DRAM_PERF_STATUS)
@@ -636,6 +674,13 @@ void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_
void
delta_package(struct pkg_data *new, struct pkg_data *old)
{
+
+ if (do_skl_residency) {
+ old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0;
+ old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0;
+ old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0;
+ old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0;
+ }
old->pc2 = new->pc2 - old->pc2;
if (do_pc3)
old->pc3 = new->pc3 - old->pc3;
@@ -782,6 +827,11 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
c->c7 = 0;
c->core_temp_c = 0;
+ p->pkg_wtd_core_c0 = 0;
+ p->pkg_any_core_c0 = 0;
+ p->pkg_any_gfxe_c0 = 0;
+ p->pkg_both_core_gfxe_c0 = 0;
+
p->pc2 = 0;
if (do_pc3)
p->pc3 = 0;
@@ -826,6 +876,13 @@ int sum_counters(struct thread_data *t, struct core_data *c,
if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
return 0;
+ if (do_skl_residency) {
+ average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0;
+ average.packages.pkg_any_core_c0 += p->pkg_any_core_c0;
+ average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0;
+ average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0;
+ }
+
average.packages.pc2 += p->pc2;
if (do_pc3)
average.packages.pc3 += p->pc3;
@@ -873,6 +930,13 @@ void compute_average(struct thread_data *t, struct core_data *c,
average.cores.c6 /= topo.num_cores;
average.cores.c7 /= topo.num_cores;
+ if (do_skl_residency) {
+ average.packages.pkg_wtd_core_c0 /= topo.num_packages;
+ average.packages.pkg_any_core_c0 /= topo.num_packages;
+ average.packages.pkg_any_gfxe_c0 /= topo.num_packages;
+ average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages;
+ }
+
average.packages.pc2 /= topo.num_packages;
if (do_pc3)
average.packages.pc3 /= topo.num_packages;
@@ -979,6 +1043,16 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
return 0;
+ if (do_skl_residency) {
+ if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0))
+ return -10;
+ if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0))
+ return -11;
+ if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0))
+ return -12;
+ if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0))
+ return -13;
+ }
if (do_pc3)
if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
return -9;
@@ -1055,49 +1129,77 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
#define PCL_6R 9 /* PC6 Retention */
#define PCL__7 10 /* PC7 */
#define PCL_7S 11 /* PC7 Shrink */
-#define PCLUNL 12 /* Unlimited */
+#define PCL__8 12 /* PC8 */
+#define PCL__9 13 /* PC9 */
+#define PCLUNL 14 /* Unlimited */
int pkg_cstate_limit = PCLUKN;
char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2",
- "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "unlimited"};
+ "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "unlimited"};
-int nhm_pkg_cstate_limits[8] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL};
-int snb_pkg_cstate_limits[8] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL};
-int hsw_pkg_cstate_limits[8] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCLRSV, PCLUNL};
-int slv_pkg_cstate_limits[8] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7};
-int amt_pkg_cstate_limits[8] = {PCL__0, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7};
-int phi_pkg_cstate_limits[8] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL};
+int nhm_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
+int snb_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
+int hsw_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
+int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
+int amt_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
+int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
-void print_verbose_header(void)
+static void
+dump_nhm_platform_info(void)
{
unsigned long long msr;
unsigned int ratio;
- if (!do_nhm_platform_info)
- return;
-
get_msr(0, MSR_NHM_PLATFORM_INFO, &msr);
fprintf(stderr, "cpu0: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", msr);
ratio = (msr >> 40) & 0xFF;
- fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n",
+ fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency frequency\n",
ratio, bclk, ratio * bclk);
ratio = (msr >> 8) & 0xFF;
- fprintf(stderr, "%d * %.0f = %.0f MHz TSC frequency\n",
+ fprintf(stderr, "%d * %.0f = %.0f MHz base frequency\n",
ratio, bclk, ratio * bclk);
get_msr(0, MSR_IA32_POWER_CTL, &msr);
fprintf(stderr, "cpu0: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
msr, msr & 0x2 ? "EN" : "DIS");
- if (!do_ivt_turbo_ratio_limit)
- goto print_nhm_turbo_ratio_limits;
+ return;
+}
+
+static void
+dump_hsw_turbo_ratio_limits(void)
+{
+ unsigned long long msr;
+ unsigned int ratio;
+
+ get_msr(0, MSR_TURBO_RATIO_LIMIT2, &msr);
+
+ fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", msr);
+
+ ratio = (msr >> 8) & 0xFF;
+ if (ratio)
+ fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 18 active cores\n",
+ ratio, bclk, ratio * bclk);
+
+ ratio = (msr >> 0) & 0xFF;
+ if (ratio)
+ fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 17 active cores\n",
+ ratio, bclk, ratio * bclk);
+ return;
+}
+
+static void
+dump_ivt_turbo_ratio_limits(void)
+{
+ unsigned long long msr;
+ unsigned int ratio;
- get_msr(0, MSR_IVT_TURBO_RATIO_LIMIT, &msr);
+ get_msr(0, MSR_TURBO_RATIO_LIMIT1, &msr);
- fprintf(stderr, "cpu0: MSR_IVT_TURBO_RATIO_LIMIT: 0x%08llx\n", msr);
+ fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", msr);
ratio = (msr >> 56) & 0xFF;
if (ratio)
@@ -1138,30 +1240,18 @@ void print_verbose_header(void)
if (ratio)
fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 9 active cores\n",
ratio, bclk, ratio * bclk);
+ return;
+}
-print_nhm_turbo_ratio_limits:
- get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
-
-#define SNB_C1_AUTO_UNDEMOTE (1UL << 27)
-#define SNB_C3_AUTO_UNDEMOTE (1UL << 28)
-
- fprintf(stderr, "cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", msr);
-
- fprintf(stderr, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n",
- (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
- (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
- (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
- (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "",
- (msr & (1 << 15)) ? "" : "UN",
- (unsigned int)msr & 7,
- pkg_cstate_limit_strings[pkg_cstate_limit]);
-
- if (!do_nhm_turbo_ratio_limit)
- return;
+static void
+dump_nhm_turbo_ratio_limits(void)
+{
+ unsigned long long msr;
+ unsigned int ratio;
- get_msr(0, MSR_NHM_TURBO_RATIO_LIMIT, &msr);
+ get_msr(0, MSR_TURBO_RATIO_LIMIT, &msr);
- fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", msr);
+ fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", msr);
ratio = (msr >> 56) & 0xFF;
if (ratio)
@@ -1202,7 +1292,30 @@ print_nhm_turbo_ratio_limits:
if (ratio)
fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 1 active cores\n",
ratio, bclk, ratio * bclk);
+ return;
+}
+
+static void
+dump_nhm_cst_cfg(void)
+{
+ unsigned long long msr;
+
+ get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
+
+#define SNB_C1_AUTO_UNDEMOTE (1UL << 27)
+#define SNB_C3_AUTO_UNDEMOTE (1UL << 28)
+ fprintf(stderr, "cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", msr);
+
+ fprintf(stderr, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n",
+ (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
+ (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
+ (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
+ (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "",
+ (msr & (1 << 15)) ? "" : "UN",
+ (unsigned int)msr & 7,
+ pkg_cstate_limit_strings[pkg_cstate_limit]);
+ return;
}
void free_all_buffers(void)
@@ -1483,7 +1596,8 @@ void check_dev_msr()
struct stat sb;
if (stat("/dev/cpu/0/msr", &sb))
- err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
+ if (system("/sbin/modprobe msr > /dev/null 2>&1"))
+ err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
}
void check_permissions()
@@ -1573,6 +1687,8 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
case 0x47: /* BDW */
case 0x4F: /* BDX */
case 0x56: /* BDX-DE */
+ case 0x4E: /* SKL */
+ case 0x5E: /* SKL */
pkg_cstate_limits = hsw_pkg_cstate_limits;
break;
case 0x37: /* BYT */
@@ -1590,7 +1706,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
}
get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
- pkg_cstate_limit = pkg_cstate_limits[msr & 0x7];
+ pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
return 1;
}
@@ -1615,12 +1731,49 @@ int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
switch (model) {
case 0x3E: /* IVB Xeon */
+ case 0x3F: /* HSW Xeon */
+ return 1;
+ default:
+ return 0;
+ }
+}
+int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model)
+{
+ if (!genuine_intel)
+ return 0;
+
+ if (family != 6)
+ return 0;
+
+ switch (model) {
+ case 0x3F: /* HSW Xeon */
return 1;
default:
return 0;
}
}
+static void
+dump_cstate_pstate_config_info(family, model)
+{
+ if (!do_nhm_platform_info)
+ return;
+
+ dump_nhm_platform_info();
+
+ if (has_hsw_turbo_ratio_limit(family, model))
+ dump_hsw_turbo_ratio_limits();
+
+ if (has_ivt_turbo_ratio_limit(family, model))
+ dump_ivt_turbo_ratio_limits();
+
+ if (has_nhm_turbo_ratio_limit(family, model))
+ dump_nhm_turbo_ratio_limits();
+
+ dump_nhm_cst_cfg();
+}
+
+
/*
* print_epb()
* Decode the ENERGY_PERF_BIAS MSR
@@ -1690,35 +1843,35 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr);
fprintf(stderr, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
fprintf(stderr, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
- (msr & 1 << 0) ? "PROCHOT, " : "",
- (msr & 1 << 1) ? "ThermStatus, " : "",
- (msr & 1 << 2) ? "bit2, " : "",
- (msr & 1 << 4) ? "Graphics, " : "",
- (msr & 1 << 5) ? "Auto-HWP, " : "",
- (msr & 1 << 6) ? "VR-Therm, " : "",
- (msr & 1 << 8) ? "Amps, " : "",
- (msr & 1 << 9) ? "CorePwr, " : "",
- (msr & 1 << 10) ? "PkgPwrL1, " : "",
- (msr & 1 << 11) ? "PkgPwrL2, " : "",
- (msr & 1 << 12) ? "MultiCoreTurbo, " : "",
- (msr & 1 << 13) ? "Transitions, " : "",
+ (msr & 1 << 15) ? "bit15, " : "",
(msr & 1 << 14) ? "bit14, " : "",
- (msr & 1 << 15) ? "bit15, " : "");
+ (msr & 1 << 13) ? "Transitions, " : "",
+ (msr & 1 << 12) ? "MultiCoreTurbo, " : "",
+ (msr & 1 << 11) ? "PkgPwrL2, " : "",
+ (msr & 1 << 10) ? "PkgPwrL1, " : "",
+ (msr & 1 << 9) ? "CorePwr, " : "",
+ (msr & 1 << 8) ? "Amps, " : "",
+ (msr & 1 << 6) ? "VR-Therm, " : "",
+ (msr & 1 << 5) ? "Auto-HWP, " : "",
+ (msr & 1 << 4) ? "Graphics, " : "",
+ (msr & 1 << 2) ? "bit2, " : "",
+ (msr & 1 << 1) ? "ThermStatus, " : "",
+ (msr & 1 << 0) ? "PROCHOT, " : "");
fprintf(stderr, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
- (msr & 1 << 16) ? "PROCHOT, " : "",
- (msr & 1 << 17) ? "ThermStatus, " : "",
- (msr & 1 << 18) ? "bit18, " : "",
- (msr & 1 << 20) ? "Graphics, " : "",
- (msr & 1 << 21) ? "Auto-HWP, " : "",
- (msr & 1 << 22) ? "VR-Therm, " : "",
- (msr & 1 << 24) ? "Amps, " : "",
- (msr & 1 << 25) ? "CorePwr, " : "",
- (msr & 1 << 26) ? "PkgPwrL1, " : "",
- (msr & 1 << 27) ? "PkgPwrL2, " : "",
- (msr & 1 << 28) ? "MultiCoreTurbo, " : "",
- (msr & 1 << 29) ? "Transitions, " : "",
+ (msr & 1 << 31) ? "bit31, " : "",
(msr & 1 << 30) ? "bit30, " : "",
- (msr & 1 << 31) ? "bit31, " : "");
+ (msr & 1 << 29) ? "Transitions, " : "",
+ (msr & 1 << 28) ? "MultiCoreTurbo, " : "",
+ (msr & 1 << 27) ? "PkgPwrL2, " : "",
+ (msr & 1 << 26) ? "PkgPwrL1, " : "",
+ (msr & 1 << 25) ? "CorePwr, " : "",
+ (msr & 1 << 24) ? "Amps, " : "",
+ (msr & 1 << 22) ? "VR-Therm, " : "",
+ (msr & 1 << 21) ? "Auto-HWP, " : "",
+ (msr & 1 << 20) ? "Graphics, " : "",
+ (msr & 1 << 18) ? "bit18, " : "",
+ (msr & 1 << 17) ? "ThermStatus, " : "",
+ (msr & 1 << 16) ? "PROCHOT, " : "");
}
if (do_gfx_perf_limit_reasons) {
@@ -1784,6 +1937,25 @@ double get_tdp(model)
}
}
+/*
+ * rapl_dram_energy_units_probe()
+ * Energy units are either hard-coded, or come from RAPL Energy Unit MSR.
+ */
+static double
+rapl_dram_energy_units_probe(int model, double rapl_energy_units)
+{
+ /* only called for genuine_intel, family 6 */
+
+ switch (model) {
+ case 0x3F: /* HSX */
+ case 0x4F: /* BDX */
+ case 0x56: /* BDX-DE */
+ return (rapl_dram_energy_units = 15.3 / 1000000);
+ default:
+ return (rapl_energy_units);
+ }
+}
+
/*
* rapl_probe()
@@ -1812,14 +1984,18 @@ void rapl_probe(unsigned int family, unsigned int model)
case 0x47: /* BDW */
do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
break;
+ case 0x4E: /* SKL */
+ case 0x5E: /* SKL */
+ do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
+ break;
case 0x3F: /* HSX */
case 0x4F: /* BDX */
case 0x56: /* BDX-DE */
- do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
+ do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
break;
case 0x2D:
case 0x3E:
- do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
+ do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
break;
case 0x37: /* BYT */
case 0x4D: /* AVN */
@@ -1839,6 +2015,8 @@ void rapl_probe(unsigned int family, unsigned int model)
else
rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
+ rapl_dram_energy_units = rapl_dram_energy_units_probe(model, rapl_energy_units);
+
time_unit = msr >> 16 & 0xF;
if (time_unit == 0)
time_unit = 0xA;
@@ -2009,19 +2187,18 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
((msr >> 48) & 1) ? "EN" : "DIS");
}
- if (do_rapl & RAPL_DRAM) {
+ if (do_rapl & RAPL_DRAM_POWER_INFO) {
if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
return -6;
-
fprintf(stderr, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
cpu, msr,
((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
-
-
+ }
+ if (do_rapl & RAPL_DRAM) {
if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
return -9;
fprintf(stderr, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
@@ -2090,6 +2267,8 @@ int has_snb_msrs(unsigned int family, unsigned int model)
case 0x47: /* BDW */
case 0x4F: /* BDX */
case 0x56: /* BDX-DE */
+ case 0x4E: /* SKL */
+ case 0x5E: /* SKL */
return 1;
}
return 0;
@@ -2110,11 +2289,35 @@ int has_hsw_msrs(unsigned int family, unsigned int model)
switch (model) {
case 0x45: /* HSW */
case 0x3D: /* BDW */
+ case 0x4E: /* SKL */
+ case 0x5E: /* SKL */
return 1;
}
return 0;
}
+/*
+ * SKL adds support for additional MSRS:
+ *
+ * MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658
+ * MSR_PKG_ANY_CORE_C0_RES 0x00000659
+ * MSR_PKG_ANY_GFXE_C0_RES 0x0000065A
+ * MSR_PKG_BOTH_CORE_GFXE_C0_RES 0x0000065B
+ */
+int has_skl_msrs(unsigned int family, unsigned int model)
+{
+ if (!genuine_intel)
+ return 0;
+
+ switch (model) {
+ case 0x4E: /* SKL */
+ case 0x5E: /* SKL */
+ return 1;
+ }
+ return 0;
+}
+
+
int is_slm(unsigned int family, unsigned int model)
{
@@ -2228,7 +2431,7 @@ guess:
return 0;
}
-void check_cpuid()
+void process_cpuid()
{
unsigned int eax, ebx, ecx, edx, max_level;
unsigned int fms, family, model, stepping;
@@ -2294,6 +2497,41 @@ void check_cpuid()
do_ptm ? "" : "No ",
has_epb ? "" : "No ");
+ if (max_level > 0x15) {
+ unsigned int eax_crystal;
+ unsigned int ebx_tsc;
+
+ /*
+ * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz
+ */
+ eax_crystal = ebx_tsc = crystal_hz = edx = 0;
+ __get_cpuid(0x15, &eax_crystal, &ebx_tsc, &crystal_hz, &edx);
+
+ if (ebx_tsc != 0) {
+
+ if (debug && (ebx != 0))
+ fprintf(stderr, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
+ eax_crystal, ebx_tsc, crystal_hz);
+
+ if (crystal_hz == 0)
+ switch(model) {
+ case 0x4E: /* SKL */
+ case 0x5E: /* SKL */
+ crystal_hz = 24000000; /* 24 MHz */
+ break;
+ default:
+ crystal_hz = 0;
+ }
+
+ if (crystal_hz) {
+ tsc_hz = (unsigned long long) crystal_hz * ebx_tsc / eax_crystal;
+ if (debug)
+ fprintf(stderr, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n",
+ tsc_hz / 1000000, crystal_hz, ebx_tsc, eax_crystal);
+ }
+ }
+ }
+
do_nhm_platform_info = do_nhm_cstates = do_smi = probe_nhm_msrs(family, model);
do_snb_cstates = has_snb_msrs(family, model);
do_pc2 = do_snb_cstates && (pkg_cstate_limit >= PCL__2);
@@ -2301,18 +2539,19 @@ void check_cpuid()
do_pc6 = (pkg_cstate_limit >= PCL__6);
do_pc7 = do_snb_cstates && (pkg_cstate_limit >= PCL__7);
do_c8_c9_c10 = has_hsw_msrs(family, model);
+ do_skl_residency = has_skl_msrs(family, model);
do_slm_cstates = is_slm(family, model);
bclk = discover_bclk(family, model);
- do_nhm_turbo_ratio_limit = do_nhm_platform_info && has_nhm_turbo_ratio_limit(family, model);
- do_ivt_turbo_ratio_limit = has_ivt_turbo_ratio_limit(family, model);
rapl_probe(family, model);
perf_limit_reasons_probe(family, model);
+ if (debug)
+ dump_cstate_pstate_config_info();
+
return;
}
-
void help()
{
fprintf(stderr,
@@ -2428,14 +2667,14 @@ void topology_probe()
if (debug > 1)
fprintf(stderr, "max_core_id %d, sizing for %d cores per package\n",
max_core_id, topo.num_cores_per_pkg);
- if (!summary_only && topo.num_cores_per_pkg > 1)
+ if (debug && !summary_only && topo.num_cores_per_pkg > 1)
show_core = 1;
topo.num_packages = max_package_id + 1;
if (debug > 1)
fprintf(stderr, "max_package_id %d, sizing for %d packages\n",
max_package_id, topo.num_packages);
- if (!summary_only && topo.num_packages > 1)
+ if (debug && !summary_only && topo.num_packages > 1)
show_pkg = 1;
topo.num_threads_per_core = max_siblings;
@@ -2550,14 +2789,11 @@ void turbostat_init()
{
check_dev_msr();
check_permissions();
- check_cpuid();
+ process_cpuid();
setup_all_buffers();
if (debug)
- print_verbose_header();
-
- if (debug)
for_all_cpus(print_epb, ODD_COUNTERS);
if (debug)
@@ -2634,7 +2870,7 @@ int get_and_dump_counters(void)
}
void print_version() {
- fprintf(stderr, "turbostat version 4.1 10-Feb, 2015"
+ fprintf(stderr, "turbostat version 4.5 2 Apr, 2015"
" - Len Brown <lenb@kernel.org>\n");
}
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index f0a7918178dd..ddf63569df5a 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -1,6 +1,6 @@
.PHONY: all all_32 all_64 check_build32 clean run_tests
-TARGETS_C_BOTHBITS := sigreturn
+TARGETS_C_BOTHBITS := sigreturn single_step_syscall
BINARIES_32 := $(TARGETS_C_BOTHBITS:%=%_32)
BINARIES_64 := $(TARGETS_C_BOTHBITS:%=%_64)
diff --git a/tools/testing/selftests/x86/run_x86_tests.sh b/tools/testing/selftests/x86/run_x86_tests.sh
index 3d3ec65f3e7c..3fc19b376812 100644
--- a/tools/testing/selftests/x86/run_x86_tests.sh
+++ b/tools/testing/selftests/x86/run_x86_tests.sh
@@ -3,9 +3,11 @@
# This is deliberately minimal. IMO kselftests should provide a standard
# script here.
./sigreturn_32 || exit 1
+./single_step_syscall_32 || exit 1
if [[ "$uname -p" -eq "x86_64" ]]; then
./sigreturn_64 || exit 1
+ ./single_step_syscall_64 || exit 1
fi
exit 0
diff --git a/tools/testing/selftests/x86/single_step_syscall.c b/tools/testing/selftests/x86/single_step_syscall.c
new file mode 100644
index 000000000000..50c26358e8b7
--- /dev/null
+++ b/tools/testing/selftests/x86/single_step_syscall.c
@@ -0,0 +1,181 @@
+/*
+ * single_step_syscall.c - single-steps various x86 syscalls
+ * Copyright (c) 2014-2015 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * This is a very simple series of tests that makes system calls with
+ * the TF flag set. This exercises some nasty kernel code in the
+ * SYSENTER case: SYSENTER does not clear TF, so SYSENTER with TF set
+ * immediately issues #DB from CPL 0. This requires special handling in
+ * the kernel.
+ */
+
+#define _GNU_SOURCE
+
+#include <sys/time.h>
+#include <time.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <sys/mman.h>
+#include <sys/signal.h>
+#include <sys/ucontext.h>
+#include <asm/ldt.h>
+#include <err.h>
+#include <setjmp.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <sys/ptrace.h>
+#include <sys/user.h>
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static volatile sig_atomic_t sig_traps;
+
+#ifdef __x86_64__
+# define REG_IP REG_RIP
+# define WIDTH "q"
+#else
+# define REG_IP REG_EIP
+# define WIDTH "l"
+#endif
+
+static unsigned long get_eflags(void)
+{
+ unsigned long eflags;
+ asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags));
+ return eflags;
+}
+
+static void set_eflags(unsigned long eflags)
+{
+ asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH
+ : : "rm" (eflags) : "flags");
+}
+
+#define X86_EFLAGS_TF (1UL << 8)
+
+static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+ if (get_eflags() & X86_EFLAGS_TF) {
+ set_eflags(get_eflags() & ~X86_EFLAGS_TF);
+ printf("[WARN]\tSIGTRAP handler had TF set\n");
+ _exit(1);
+ }
+
+ sig_traps++;
+
+ if (sig_traps == 10000 || sig_traps == 10001) {
+ printf("[WARN]\tHit %d SIGTRAPs with si_addr 0x%lx, ip 0x%lx\n",
+ (int)sig_traps,
+ (unsigned long)info->si_addr,
+ (unsigned long)ctx->uc_mcontext.gregs[REG_IP]);
+ }
+}
+
+static void check_result(void)
+{
+ unsigned long new_eflags = get_eflags();
+ set_eflags(new_eflags & ~X86_EFLAGS_TF);
+
+ if (!sig_traps) {
+ printf("[FAIL]\tNo SIGTRAP\n");
+ exit(1);
+ }
+
+ if (!(new_eflags & X86_EFLAGS_TF)) {
+ printf("[FAIL]\tTF was cleared\n");
+ exit(1);
+ }
+
+ printf("[OK]\tSurvived with TF set and %d traps\n", (int)sig_traps);
+ sig_traps = 0;
+}
+
+int main()
+{
+ int tmp;
+
+ sethandler(SIGTRAP, sigtrap, 0);
+
+ printf("[RUN]\tSet TF and check nop\n");
+ set_eflags(get_eflags() | X86_EFLAGS_TF);
+ asm volatile ("nop");
+ check_result();
+
+#ifdef __x86_64__
+ printf("[RUN]\tSet TF and check syscall-less opportunistic sysret\n");
+ set_eflags(get_eflags() | X86_EFLAGS_TF);
+ extern unsigned char post_nop[];
+ asm volatile ("pushf" WIDTH "\n\t"
+ "pop" WIDTH " %%r11\n\t"
+ "nop\n\t"
+ "post_nop:"
+ : : "c" (post_nop) : "r11");
+ check_result();
+#endif
+
+ printf("[RUN]\tSet TF and check int80\n");
+ set_eflags(get_eflags() | X86_EFLAGS_TF);
+ asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid));
+ check_result();
+
+ /*
+ * This test is particularly interesting if fast syscalls use
+ * SYSENTER: it triggers a nasty design flaw in SYSENTER.
+ * Specifically, SYSENTER does not clear TF, so either SYSENTER
+ * or the next instruction traps at CPL0. (Of course, Intel
+ * mostly forgot to document exactly what happens here.) So we
+ * get a CPL0 fault with usergs (on 64-bit kernels) and possibly
+ * no stack. The only sane way the kernel can possibly handle
+ * it is to clear TF on return from the #DB handler, but this
+ * happens way too early to set TF in the saved pt_regs, so the
+ * kernel has to do something clever to avoid losing track of
+ * the TF bit.
+ *
+ * Needless to say, we've had bugs in this area.
+ */
+ syscall(SYS_getpid); /* Force symbol binding without TF set. */
+ printf("[RUN]\tSet TF and check a fast syscall\n");
+ set_eflags(get_eflags() | X86_EFLAGS_TF);
+ syscall(SYS_getpid);
+ check_result();
+
+ /* Now make sure that another fast syscall doesn't set TF again. */
+ printf("[RUN]\tFast syscall with TF cleared\n");
+ fflush(stdout); /* Force a syscall */
+ if (get_eflags() & X86_EFLAGS_TF) {
+ printf("[FAIL]\tTF is now set\n");
+ exit(1);
+ }
+ if (sig_traps) {
+ printf("[FAIL]\tGot SIGTRAP\n");
+ exit(1);
+ }
+ printf("[OK]\tNothing unexpected happened\n");
+
+ return 0;
+}