summaryrefslogtreecommitdiffstats
path: root/tools/perf/builtin-stat.c
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/builtin-stat.c')
-rw-r--r--tools/perf/builtin-stat.c250
1 files changed, 180 insertions, 70 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 265b05157972..bf640abc9c41 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -93,6 +93,7 @@
#include <linux/ctype.h>
#include <perf/evlist.h>
+#include <internal/threadmap.h>
#define DEFAULT_SEPARATOR " "
#define FREEZE_ON_SMI_PATH "devices/cpu/freeze_on_smi"
@@ -173,14 +174,13 @@ static struct target target = {
#define METRIC_ONLY_LEN 20
-static volatile pid_t child_pid = -1;
+static volatile sig_atomic_t child_pid = -1;
static int detailed_run = 0;
static bool transaction_run;
static bool topdown_run = false;
static bool smi_cost = false;
static bool smi_reset = false;
static int big_num_opt = -1;
-static bool group = false;
static const char *pre_cmd = NULL;
static const char *post_cmd = NULL;
static bool sync_run = false;
@@ -208,7 +208,7 @@ struct perf_stat {
static struct perf_stat perf_stat;
#define STAT_RECORD perf_stat.record
-static volatile int done = 0;
+static volatile sig_atomic_t done = 0;
static struct perf_stat_config stat_config = {
.aggr_mode = AGGR_GLOBAL,
@@ -465,15 +465,19 @@ static int read_bpf_map_counters(void)
return 0;
}
-static void read_counters(struct timespec *rs)
+static int read_counters(struct timespec *rs)
{
- struct evsel *counter;
-
if (!stat_config.stop_read_counter) {
if (read_bpf_map_counters() ||
read_affinity_counters(rs))
- return;
+ return -1;
}
+ return 0;
+}
+
+static void process_counters(void)
+{
+ struct evsel *counter;
evlist__for_each_entry(evsel_list, counter) {
if (counter->err)
@@ -482,6 +486,10 @@ static void read_counters(struct timespec *rs)
pr_warning("failed to process counter %s\n", counter->name);
counter->err = 0;
}
+
+ perf_stat_merge_counters(&stat_config, evsel_list);
+ perf_stat_process_percore(&stat_config, evsel_list);
+ perf_stat_process_shadow_stats(&stat_config, evsel_list);
}
static void process_interval(void)
@@ -492,7 +500,10 @@ static void process_interval(void)
diff_timespec(&rs, &ts, &ref_time);
perf_stat__reset_shadow_per_stat(&rt_stat);
- read_counters(&rs);
+ evlist__reset_aggr_stats(evsel_list);
+
+ if (read_counters(&rs) == 0)
+ process_counters();
if (STAT_RECORD) {
if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSEC_PER_SEC + rs.tv_nsec, INTERVAL))
@@ -528,26 +539,14 @@ static int enable_counters(void)
return err;
}
- if (stat_config.initial_delay < 0) {
- pr_info(EVLIST_DISABLED_MSG);
- return 0;
- }
-
- if (stat_config.initial_delay > 0) {
- pr_info(EVLIST_DISABLED_MSG);
- usleep(stat_config.initial_delay * USEC_PER_MSEC);
- }
-
/*
* We need to enable counters only if:
* - we don't have tracee (attaching to task or cpu)
* - we have initial delay configured
*/
- if (!target__none(&target) || stat_config.initial_delay) {
+ if (!target__none(&target)) {
if (!all_counters_use_bpf)
evlist__enable(evsel_list);
- if (stat_config.initial_delay > 0)
- pr_info(EVLIST_ENABLED_MSG);
}
return 0;
}
@@ -569,7 +568,7 @@ static void disable_counters(void)
}
}
-static volatile int workload_exec_errno;
+static volatile sig_atomic_t workload_exec_errno;
/*
* evlist__prepare_workload will send a SIGUSR1
@@ -769,9 +768,6 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
child_pid = evsel_list->workload.pid;
}
- if (group)
- evlist__set_leader(evsel_list);
-
if (!cpu_map__is_dummy(evsel_list->core.user_requested_cpus)) {
if (affinity__setup(&saved_affinity) < 0)
return -1;
@@ -918,14 +914,27 @@ try_again_reset:
return err;
}
- err = enable_counters();
- if (err)
- return -1;
+ if (stat_config.initial_delay) {
+ pr_info(EVLIST_DISABLED_MSG);
+ } else {
+ err = enable_counters();
+ if (err)
+ return -1;
+ }
/* Exec the command, if any */
if (forks)
evlist__start_workload(evsel_list);
+ if (stat_config.initial_delay > 0) {
+ usleep(stat_config.initial_delay * USEC_PER_MSEC);
+ err = enable_counters();
+ if (err)
+ return -1;
+
+ pr_info(EVLIST_ENABLED_MSG);
+ }
+
t0 = rdclock();
clock_gettime(CLOCK_MONOTONIC, &ref_time);
@@ -963,11 +972,9 @@ try_again_reset:
init_stats(&walltime_nsecs_stats);
update_stats(&walltime_nsecs_stats, t1 - t0);
- if (stat_config.aggr_mode == AGGR_GLOBAL)
- evlist__save_aggr_prev_raw_counts(evsel_list);
-
evlist__copy_prev_raw_counts(evsel_list);
evlist__reset_prev_raw_counts(evsel_list);
+ evlist__reset_aggr_stats(evsel_list);
perf_stat__reset_shadow_per_stat(&rt_stat);
} else {
update_stats(&walltime_nsecs_stats, t1 - t0);
@@ -980,7 +987,8 @@ try_again_reset:
* avoid arbitrary skew, we must read all counters before closing any
* group leaders.
*/
- read_counters(&(struct timespec) { .tv_nsec = t1-t0 });
+ if (read_counters(&(struct timespec) { .tv_nsec = t1-t0 }) == 0)
+ process_counters();
/*
* We need to keep evsel_list alive, because it's processed
@@ -1023,13 +1031,13 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
/* Do not print anything if we record to the pipe. */
if (STAT_RECORD && perf_stat.data.is_pipe)
return;
- if (stat_config.quiet)
+ if (quiet)
return;
evlist__print_counters(evsel_list, &stat_config, &target, ts, argc, argv);
}
-static volatile int signr = -1;
+static volatile sig_atomic_t signr = -1;
static void skip_signal(int signo)
{
@@ -1181,8 +1189,6 @@ static struct option stat_options[] = {
#endif
OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
"system-wide collection from all CPUs"),
- OPT_BOOLEAN('g', "group", &group,
- "put the counters into a counter group"),
OPT_BOOLEAN(0, "scale", &stat_config.scale,
"Use --no-scale to disable counter scaling for multiplexing"),
OPT_INCR('v', "verbose", &verbose,
@@ -1273,8 +1279,8 @@ static struct option stat_options[] = {
"print summary for interval mode"),
OPT_BOOLEAN(0, "no-csv-summary", &stat_config.no_csv_summary,
"don't print 'summary' for CSV summary output"),
- OPT_BOOLEAN(0, "quiet", &stat_config.quiet,
- "don't print output (useful with record)"),
+ OPT_BOOLEAN(0, "quiet", &quiet,
+ "don't print any output, messages or warnings (useful with record)"),
OPT_CALLBACK(0, "cputype", &evsel_list, "hybrid cpu type",
"Only enable events on applying cpu with this type "
"for hybrid platform (e.g. core or atom)",
@@ -1330,10 +1336,26 @@ static struct aggr_cpu_id perf_stat__get_node(struct perf_stat_config *config __
return aggr_cpu_id__node(cpu, /*data=*/NULL);
}
+static struct aggr_cpu_id perf_stat__get_global(struct perf_stat_config *config __maybe_unused,
+ struct perf_cpu cpu)
+{
+ return aggr_cpu_id__global(cpu, /*data=*/NULL);
+}
+
+static struct aggr_cpu_id perf_stat__get_cpu(struct perf_stat_config *config __maybe_unused,
+ struct perf_cpu cpu)
+{
+ return aggr_cpu_id__cpu(cpu, /*data=*/NULL);
+}
+
static struct aggr_cpu_id perf_stat__get_aggr(struct perf_stat_config *config,
aggr_get_id_t get_id, struct perf_cpu cpu)
{
- struct aggr_cpu_id id = aggr_cpu_id__empty();
+ struct aggr_cpu_id id;
+
+ /* per-process mode - should use global aggr mode */
+ if (cpu.cpu == -1)
+ return get_id(config, cpu);
if (aggr_cpu_id__is_empty(&config->cpus_aggr_map->map[cpu.cpu]))
config->cpus_aggr_map->map[cpu.cpu] = get_id(config, cpu);
@@ -1366,16 +1388,16 @@ static struct aggr_cpu_id perf_stat__get_node_cached(struct perf_stat_config *co
return perf_stat__get_aggr(config, perf_stat__get_node, cpu);
}
-static bool term_percore_set(void)
+static struct aggr_cpu_id perf_stat__get_global_cached(struct perf_stat_config *config,
+ struct perf_cpu cpu)
{
- struct evsel *counter;
-
- evlist__for_each_entry(evsel_list, counter) {
- if (counter->percore)
- return true;
- }
+ return perf_stat__get_aggr(config, perf_stat__get_global, cpu);
+}
- return false;
+static struct aggr_cpu_id perf_stat__get_cpu_cached(struct perf_stat_config *config,
+ struct perf_cpu cpu)
+{
+ return perf_stat__get_aggr(config, perf_stat__get_cpu, cpu);
}
static aggr_cpu_id_get_t aggr_mode__get_aggr(enum aggr_mode aggr_mode)
@@ -1390,11 +1412,9 @@ static aggr_cpu_id_get_t aggr_mode__get_aggr(enum aggr_mode aggr_mode)
case AGGR_NODE:
return aggr_cpu_id__node;
case AGGR_NONE:
- if (term_percore_set())
- return aggr_cpu_id__core;
-
- return NULL;
+ return aggr_cpu_id__cpu;
case AGGR_GLOBAL:
+ return aggr_cpu_id__global;
case AGGR_THREAD:
case AGGR_UNSET:
case AGGR_MAX:
@@ -1415,11 +1435,9 @@ static aggr_get_id_t aggr_mode__get_id(enum aggr_mode aggr_mode)
case AGGR_NODE:
return perf_stat__get_node_cached;
case AGGR_NONE:
- if (term_percore_set()) {
- return perf_stat__get_core_cached;
- }
- return NULL;
+ return perf_stat__get_cpu_cached;
case AGGR_GLOBAL:
+ return perf_stat__get_global_cached;
case AGGR_THREAD:
case AGGR_UNSET:
case AGGR_MAX:
@@ -1434,8 +1452,9 @@ static int perf_stat_init_aggr_mode(void)
aggr_cpu_id_get_t get_id = aggr_mode__get_aggr(stat_config.aggr_mode);
if (get_id) {
+ bool needs_sort = stat_config.aggr_mode != AGGR_NONE;
stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.user_requested_cpus,
- get_id, /*data=*/NULL);
+ get_id, /*data=*/NULL, needs_sort);
if (!stat_config.aggr_map) {
pr_err("cannot build %s map", aggr_mode__string[stat_config.aggr_mode]);
return -1;
@@ -1443,6 +1462,21 @@ static int perf_stat_init_aggr_mode(void)
stat_config.aggr_get_id = aggr_mode__get_id(stat_config.aggr_mode);
}
+ if (stat_config.aggr_mode == AGGR_THREAD) {
+ nr = perf_thread_map__nr(evsel_list->core.threads);
+ stat_config.aggr_map = cpu_aggr_map__empty_new(nr);
+ if (stat_config.aggr_map == NULL)
+ return -ENOMEM;
+
+ for (int s = 0; s < nr; s++) {
+ struct aggr_cpu_id id = aggr_cpu_id__empty();
+
+ id.thread_idx = s;
+ stat_config.aggr_map->map[s] = id;
+ }
+ return 0;
+ }
+
/*
* The evsel_list->cpus is the base we operate on,
* taking the highest cpu number to be the size of
@@ -1527,6 +1561,26 @@ static struct aggr_cpu_id perf_env__get_core_aggr_by_cpu(struct perf_cpu cpu, vo
return id;
}
+static struct aggr_cpu_id perf_env__get_cpu_aggr_by_cpu(struct perf_cpu cpu, void *data)
+{
+ struct perf_env *env = data;
+ struct aggr_cpu_id id = aggr_cpu_id__empty();
+
+ if (cpu.cpu != -1) {
+ /*
+ * core_id is relative to socket and die,
+ * we need a global id. So we set
+ * socket, die id and core id
+ */
+ id.socket = env->cpu[cpu.cpu].socket_id;
+ id.die = env->cpu[cpu.cpu].die_id;
+ id.core = env->cpu[cpu.cpu].core_id;
+ id.cpu = cpu;
+ }
+
+ return id;
+}
+
static struct aggr_cpu_id perf_env__get_node_aggr_by_cpu(struct perf_cpu cpu, void *data)
{
struct aggr_cpu_id id = aggr_cpu_id__empty();
@@ -1535,6 +1589,16 @@ static struct aggr_cpu_id perf_env__get_node_aggr_by_cpu(struct perf_cpu cpu, vo
return id;
}
+static struct aggr_cpu_id perf_env__get_global_aggr_by_cpu(struct perf_cpu cpu __maybe_unused,
+ void *data __maybe_unused)
+{
+ struct aggr_cpu_id id = aggr_cpu_id__empty();
+
+ /* it always aggregates to the cpu 0 */
+ id.cpu = (struct perf_cpu){ .cpu = 0 };
+ return id;
+}
+
static struct aggr_cpu_id perf_stat__get_socket_file(struct perf_stat_config *config __maybe_unused,
struct perf_cpu cpu)
{
@@ -1552,12 +1616,24 @@ static struct aggr_cpu_id perf_stat__get_core_file(struct perf_stat_config *conf
return perf_env__get_core_aggr_by_cpu(cpu, &perf_stat.session->header.env);
}
+static struct aggr_cpu_id perf_stat__get_cpu_file(struct perf_stat_config *config __maybe_unused,
+ struct perf_cpu cpu)
+{
+ return perf_env__get_cpu_aggr_by_cpu(cpu, &perf_stat.session->header.env);
+}
+
static struct aggr_cpu_id perf_stat__get_node_file(struct perf_stat_config *config __maybe_unused,
struct perf_cpu cpu)
{
return perf_env__get_node_aggr_by_cpu(cpu, &perf_stat.session->header.env);
}
+static struct aggr_cpu_id perf_stat__get_global_file(struct perf_stat_config *config __maybe_unused,
+ struct perf_cpu cpu)
+{
+ return perf_env__get_global_aggr_by_cpu(cpu, &perf_stat.session->header.env);
+}
+
static aggr_cpu_id_get_t aggr_mode__get_aggr_file(enum aggr_mode aggr_mode)
{
switch (aggr_mode) {
@@ -1569,8 +1645,10 @@ static aggr_cpu_id_get_t aggr_mode__get_aggr_file(enum aggr_mode aggr_mode)
return perf_env__get_core_aggr_by_cpu;
case AGGR_NODE:
return perf_env__get_node_aggr_by_cpu;
- case AGGR_NONE:
case AGGR_GLOBAL:
+ return perf_env__get_global_aggr_by_cpu;
+ case AGGR_NONE:
+ return perf_env__get_cpu_aggr_by_cpu;
case AGGR_THREAD:
case AGGR_UNSET:
case AGGR_MAX:
@@ -1590,8 +1668,10 @@ static aggr_get_id_t aggr_mode__get_id_file(enum aggr_mode aggr_mode)
return perf_stat__get_core_file;
case AGGR_NODE:
return perf_stat__get_node_file;
- case AGGR_NONE:
case AGGR_GLOBAL:
+ return perf_stat__get_global_file;
+ case AGGR_NONE:
+ return perf_stat__get_cpu_file;
case AGGR_THREAD:
case AGGR_UNSET:
case AGGR_MAX:
@@ -1604,11 +1684,29 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
{
struct perf_env *env = &st->session->header.env;
aggr_cpu_id_get_t get_id = aggr_mode__get_aggr_file(stat_config.aggr_mode);
+ bool needs_sort = stat_config.aggr_mode != AGGR_NONE;
+
+ if (stat_config.aggr_mode == AGGR_THREAD) {
+ int nr = perf_thread_map__nr(evsel_list->core.threads);
+
+ stat_config.aggr_map = cpu_aggr_map__empty_new(nr);
+ if (stat_config.aggr_map == NULL)
+ return -ENOMEM;
+
+ for (int s = 0; s < nr; s++) {
+ struct aggr_cpu_id id = aggr_cpu_id__empty();
+
+ id.thread_idx = s;
+ stat_config.aggr_map->map[s] = id;
+ }
+ return 0;
+ }
if (!get_id)
return 0;
- stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.user_requested_cpus, get_id, env);
+ stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.user_requested_cpus,
+ get_id, env, needs_sort);
if (!stat_config.aggr_map) {
pr_err("cannot build %s map", aggr_mode__string[stat_config.aggr_mode]);
return -1;
@@ -1991,13 +2089,11 @@ static int process_stat_round_event(struct perf_session *session,
union perf_event *event)
{
struct perf_record_stat_round *stat_round = &event->stat_round;
- struct evsel *counter;
struct timespec tsh, *ts = NULL;
const char **argv = session->header.env.cmdline_argv;
int argc = session->header.env.nr_cmdline;
- evlist__for_each_entry(evsel_list, counter)
- perf_stat_process_counter(&stat_config, counter);
+ process_counters();
if (stat_round->type == PERF_STAT_ROUND_TYPE__FINAL)
update_stats(&walltime_nsecs_stats, stat_round->time);
@@ -2024,17 +2120,23 @@ int process_stat_config_event(struct perf_session *session,
if (perf_cpu_map__empty(st->cpus)) {
if (st->aggr_mode != AGGR_UNSET)
pr_warning("warning: processing task data, aggregation mode not set\n");
- return 0;
- }
-
- if (st->aggr_mode != AGGR_UNSET)
+ } else if (st->aggr_mode != AGGR_UNSET) {
stat_config.aggr_mode = st->aggr_mode;
+ }
if (perf_stat.data.is_pipe)
perf_stat_init_aggr_mode();
else
perf_stat_init_aggr_mode_file(st);
+ if (stat_config.aggr_map) {
+ int nr_aggr = stat_config.aggr_map->nr;
+
+ if (evlist__alloc_aggr_stats(session->evlist, nr_aggr) < 0) {
+ pr_err("cannot allocate aggr counts\n");
+ return -1;
+ }
+ }
return 0;
}
@@ -2048,7 +2150,7 @@ static int set_maps(struct perf_stat *st)
perf_evlist__set_maps(&evsel_list->core, st->cpus, st->threads);
- if (evlist__alloc_stats(evsel_list, true))
+ if (evlist__alloc_stats(&stat_config, evsel_list, /*alloc_raw=*/true))
return -ENOMEM;
st->maps_allocated = true;
@@ -2277,7 +2379,7 @@ int cmd_stat(int argc, const char **argv)
goto out;
}
- if (!output && !stat_config.quiet) {
+ if (!output && !quiet) {
struct timespec tm;
mode = append_file ? "a" : "w";
@@ -2297,6 +2399,14 @@ int cmd_stat(int argc, const char **argv)
}
}
+ if (stat_config.interval_clear && !isatty(fileno(output))) {
+ fprintf(stderr, "--interval-clear does not work with output\n");
+ parse_options_usage(stat_usage, stat_options, "o", 1);
+ parse_options_usage(NULL, stat_options, "log-fd", 0);
+ parse_options_usage(NULL, stat_options, "interval-clear", 0);
+ return -1;
+ }
+
stat_config.output = output;
/*
@@ -2495,10 +2605,10 @@ int cmd_stat(int argc, const char **argv)
goto out;
}
- if (evlist__alloc_stats(evsel_list, interval))
+ if (perf_stat_init_aggr_mode())
goto out;
- if (perf_stat_init_aggr_mode())
+ if (evlist__alloc_stats(&stat_config, evsel_list, interval))
goto out;
/*