From 8719138318316656988dbd422461c1addc9a9159 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 20 Oct 2015 00:23:48 +0900 Subject: perf test: Silence tracepoint event failures Currently, when 'perf test' is run by a normal user, it'll fail to access tracepoint events. The output becomes somewhat messy because it tries to be nice with long error messages and hints. IMHO this is not needed for 'perf test' by default and AFAIK 'perf test' uses pr_debug() rather than pr_err() for such messages so that one can use -v option to see further details on failed testcases if needed. Before: $ perf test 1: vmlinux symtab matches kallsyms : FAILED! 2: detect openat syscall event :Error: No permissions to read /sys/kernel/debug/tracing/events/syscalls/sys_enter_openat Hint: Try 'sudo mount -o remount,mode=755 /sys/kernel/debug/tracing' FAILED! 3: detect openat syscall event on all cpus :Error: No permissions to read /sys/kernel/debug/tracing/events/syscalls/sys_enter_openat Hint: Try 'sudo mount -o remount,mode=755 /sys/kernel/debug/tracing' FAILED! ... After: $ perf test 1: vmlinux symtab matches kallsyms : FAILED! 2: detect openat syscall event : FAILED! 3: detect openat syscall event on all cpus : FAILED! ... $ perf test -v 2 2: detect openat syscall event : --- start --- test child forked, pid 30575 Error: No permissions to read /sys/kernel/debug/tracing/events/syscalls/sys_enter_openat Hint: Try 'sudo mount -o remount,mode=755 /sys/kernel/debug/tracing' test child finished with -1 ---- end ---- detect openat syscall event: FAILED! Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Acked-by: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1445268229-1601-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/openat-syscall-all-cpus.c | 2 +- tools/perf/tests/openat-syscall-tp-fields.c | 2 +- tools/perf/tests/openat-syscall.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c index 9e104a2e973d..2006485a2859 100644 --- a/tools/perf/tests/openat-syscall-all-cpus.c +++ b/tools/perf/tests/openat-syscall-all-cpus.c @@ -34,7 +34,7 @@ int test__openat_syscall_event_on_all_cpus(void) evsel = perf_evsel__newtp("syscalls", "sys_enter_openat"); if (IS_ERR(evsel)) { tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "syscalls", "sys_enter_openat"); - pr_err("%s\n", errbuf); + pr_debug("%s\n", errbuf); goto out_thread_map_delete; } diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c index 473d3869727e..5e811cd8f1c3 100644 --- a/tools/perf/tests/openat-syscall-tp-fields.c +++ b/tools/perf/tests/openat-syscall-tp-fields.c @@ -89,7 +89,7 @@ int test__syscall_openat_tp_fields(void) err = perf_evsel__parse_sample(evsel, event, &sample); if (err) { - pr_err("Can't parse sample, err = %d\n", err); + pr_debug("Can't parse sample, err = %d\n", err); goto out_delete_evlist; } diff --git a/tools/perf/tests/openat-syscall.c b/tools/perf/tests/openat-syscall.c index 7b1db8306098..033b54797b8a 100644 --- a/tools/perf/tests/openat-syscall.c +++ b/tools/perf/tests/openat-syscall.c @@ -22,7 +22,7 @@ int test__openat_syscall_event(void) evsel = perf_evsel__newtp("syscalls", "sys_enter_openat"); if (IS_ERR(evsel)) { tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "syscalls", "sys_enter_openat"); - pr_err("%s\n", errbuf); + pr_debug("%s\n", errbuf); goto out_thread_map_delete; } -- cgit v1.2.3 From 2690c730935873065175de33f59cce5bb221b9dc Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 20 Oct 2015 00:23:49 +0900 Subject: perf test: Suppress libtraceevent warnings Currently libtraceevent emits warning on unsupported event formats. However it'd be better to see them only -v option is given. To do that, it needs to override the warning() function which is used in the libtracevent. Thus add set_warning_routine() same as set_die_routine() and check the verbose flag in our warning routine. Before: # perf test 5 5: parse events tests : Warning: [kvmmmu:kvm_mmu_get_page] bad op token { Warning: [kvmmmu:kvm_mmu_sync_page] bad op token { Warning: [kvmmmu:kvm_mmu_unsync_page] bad op token { Warning: [kvmmmu:kvm_mmu_prepare_zap_page] bad op token { Warning: [kvmmmu:fast_page_fault] function is_writable_pte not defined ... Ok After: # perf test 5 5: parse events tests : Ok Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Acked-by: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1445268229-1601-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/parse-events.c | 14 ++++++++++++++ tools/perf/util/usage.c | 5 +++++ tools/perf/util/util.h | 1 + 3 files changed, 20 insertions(+) diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 0648b84a9171..636d7b42d844 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -5,6 +5,7 @@ #include #include "tests.h" #include "debug.h" +#include "util.h" #include #include @@ -1753,6 +1754,17 @@ static int test_pmu_events(void) return ret; } +static void debug_warn(const char *warn, va_list params) +{ + char msg[1024]; + + if (!verbose) + return; + + vsnprintf(msg, sizeof(msg), warn, params); + fprintf(stderr, " Warning: %s\n", msg); +} + int test__parse_events(void) { int ret1, ret2 = 0; @@ -1764,6 +1776,8 @@ do { \ ret2 = ret1; \ } while (0) + set_warning_routine(debug_warn); + TEST_EVENTS(test__events); if (test_pmu()) diff --git a/tools/perf/util/usage.c b/tools/perf/util/usage.c index 4007aca8e0ca..6adfa18cdd4e 100644 --- a/tools/perf/util/usage.c +++ b/tools/perf/util/usage.c @@ -50,6 +50,11 @@ void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN) die_routine = routine; } +void set_warning_routine(void (*routine)(const char *err, va_list params)) +{ + warn_routine = routine; +} + void usage(const char *err) { usage_routine(err); diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 3d5b01e8978f..4cfb913aa9e0 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -145,6 +145,7 @@ extern void warning(const char *err, ...) __attribute__((format (printf, 1, 2))) extern void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN); +extern void set_warning_routine(void (*routine)(const char *err, va_list params)); extern int prefixcmp(const char *str, const char *prefix); extern void set_buildid_dir(const char *dir); -- cgit v1.2.3 From 13839ec495a31844d66d487f740c07771c60a0d0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 19 Oct 2015 10:04:17 +0200 Subject: perf bench: Improve the 'perf bench mem memcpy' code readability - improve the readability of initializations - fix unnecessary double negations - fix ugly line breaks - fix other small details Signed-off-by: Ingo Molnar Cc: Andrew Morton Cc: David Ahern Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1445241870-24854-2-git-send-email-mingo@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/mem-memcpy.c | 101 +++++++++++++++++++----------------------- 1 file changed, 45 insertions(+), 56 deletions(-) diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c index d3dfb7936dcd..27606ff5c4f9 100644 --- a/tools/perf/bench/mem-memcpy.c +++ b/tools/perf/bench/mem-memcpy.c @@ -1,7 +1,7 @@ /* * mem-memcpy.c * - * memcpy: Simple memory copy in various ways + * Simple memcpy() and memset() benchmarks * * Written by Hitoshi Mitake */ @@ -61,20 +61,17 @@ struct routine { }; struct routine memcpy_routines[] = { - { .name = "default", - .desc = "Default memcpy() provided by glibc", - .fn.memcpy = memcpy }, -#ifdef HAVE_ARCH_X86_64_SUPPORT - -#define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn}, -#include "mem-memcpy-x86-64-asm-def.h" -#undef MEMCPY_FN + { .name = "default", + .desc = "Default memcpy() provided by glibc", + .fn.memcpy = memcpy }, +#ifdef HAVE_ARCH_X86_64_SUPPORT +# define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn}, +# include "mem-memcpy-x86-64-asm-def.h" +# undef MEMCPY_FN #endif - { NULL, - NULL, - {NULL} } + { NULL, } }; static const char * const bench_mem_memcpy_usage[] = { @@ -89,8 +86,7 @@ static struct perf_event_attr cycle_attr = { static void init_cycle(void) { - cycle_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, - perf_event_open_cloexec_flag()); + cycle_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag()); if (cycle_fd < 0 && errno == ENOSYS) die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); @@ -111,12 +107,9 @@ static u64 get_cycle(void) static double timeval2double(struct timeval *ts) { - return (double)ts->tv_sec + - (double)ts->tv_usec / (double)1000000; + return (double)ts->tv_sec + (double)ts->tv_usec / (double)1000000; } -#define pf (no_prefault ? 0 : 1) - #define print_bps(x) do { \ if (x < K) \ printf(" %14lf B/Sec", x); \ @@ -140,6 +133,7 @@ static void __bench_mem_routine(struct bench_mem_info *info, int r_idx, size_t l const struct routine *r = &info->routines[r_idx]; double result_bps[2]; u64 result_cycle[2]; + int prefault = no_prefault ? 0 : 1; result_cycle[0] = result_cycle[1] = 0ULL; result_bps[0] = result_bps[1] = 0.0; @@ -149,8 +143,8 @@ static void __bench_mem_routine(struct bench_mem_info *info, int r_idx, size_t l if (bench_format == BENCH_FORMAT_DEFAULT) printf("# Copying %s Bytes ...\n\n", length_str); - if (!only_prefault && !no_prefault) { - /* show both of results */ + if (!only_prefault && prefault) { + /* Show both results: */ if (use_cycle) { result_cycle[0] = info->do_cycle(r, len, false); result_cycle[1] = info->do_cycle(r, len, true); @@ -160,14 +154,14 @@ static void __bench_mem_routine(struct bench_mem_info *info, int r_idx, size_t l } } else { if (use_cycle) - result_cycle[pf] = info->do_cycle(r, len, only_prefault); + result_cycle[prefault] = info->do_cycle(r, len, only_prefault); else - result_bps[pf] = info->do_gettimeofday(r, len, only_prefault); + result_bps[prefault] = info->do_gettimeofday(r, len, only_prefault); } switch (bench_format) { case BENCH_FORMAT_DEFAULT: - if (!only_prefault && !no_prefault) { + if (!only_prefault && prefault) { if (use_cycle) { printf(" %14lf Cycle/Byte\n", (double)result_cycle[0] @@ -184,16 +178,16 @@ static void __bench_mem_routine(struct bench_mem_info *info, int r_idx, size_t l } else { if (use_cycle) { printf(" %14lf Cycle/Byte", - (double)result_cycle[pf] + (double)result_cycle[prefault] / totallen); } else - print_bps(result_bps[pf]); + print_bps(result_bps[prefault]); printf("%s\n", only_prefault ? " (with prefault)" : ""); } break; case BENCH_FORMAT_SIMPLE: - if (!only_prefault && !no_prefault) { + if (!only_prefault && prefault) { if (use_cycle) { printf("%lf %lf\n", (double)result_cycle[0] / totallen, @@ -204,14 +198,14 @@ static void __bench_mem_routine(struct bench_mem_info *info, int r_idx, size_t l } } else { if (use_cycle) { - printf("%lf\n", (double)result_cycle[pf] + printf("%lf\n", (double)result_cycle[prefault] / totallen); } else - printf("%lf\n", result_bps[pf]); + printf("%lf\n", result_bps[prefault]); } break; default: - /* reaching this means there's some disaster: */ + /* Reaching this means there's some disaster: */ die("unknown format: %d\n", bench_format); break; } @@ -225,8 +219,7 @@ static int bench_mem_common(int argc, const char **argv, size_t len; double totallen; - argc = parse_options(argc, argv, options, - info->usage, 0); + argc = parse_options(argc, argv, options, info->usage, 0); if (no_prefault && only_prefault) { fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n"); @@ -244,7 +237,7 @@ static int bench_mem_common(int argc, const char **argv, return 1; } - /* same to without specifying either of prefault and no-prefault */ + /* Same as without specifying either of prefault and no-prefault: */ if (only_prefault && no_prefault) only_prefault = no_prefault = false; @@ -282,7 +275,8 @@ static void memcpy_alloc_mem(void **dst, void **src, size_t length) *src = zalloc(length); if (!*src) die("memory allocation failed - maybe length is too large?\n"); - /* Make sure to always replace the zero pages even if MMAP_THRESH is crossed */ + + /* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */ memset(*src, 0, length); } @@ -308,8 +302,7 @@ static u64 do_memcpy_cycle(const struct routine *r, size_t len, bool prefault) return cycle_end - cycle_start; } -static double do_memcpy_gettimeofday(const struct routine *r, size_t len, - bool prefault) +static double do_memcpy_gettimeofday(const struct routine *r, size_t len, bool prefault) { struct timeval tv_start, tv_end, tv_diff; memcpy_t fn = r->fn.memcpy; @@ -337,10 +330,10 @@ int bench_mem_memcpy(int argc, const char **argv, const char *prefix __maybe_unused) { struct bench_mem_info info = { - .routines = memcpy_routines, - .do_cycle = do_memcpy_cycle, - .do_gettimeofday = do_memcpy_gettimeofday, - .usage = bench_mem_memcpy_usage, + .routines = memcpy_routines, + .do_cycle = do_memcpy_cycle, + .do_gettimeofday = do_memcpy_gettimeofday, + .usage = bench_mem_memcpy_usage, }; return bench_mem_common(argc, argv, prefix, &info); @@ -404,30 +397,26 @@ static const char * const bench_mem_memset_usage[] = { }; static const struct routine memset_routines[] = { - { .name ="default", - .desc = "Default memset() provided by glibc", - .fn.memset = memset }, -#ifdef HAVE_ARCH_X86_64_SUPPORT - -#define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn }, -#include "mem-memset-x86-64-asm-def.h" -#undef MEMSET_FN + { .name = "default", + .desc = "Default memset() provided by glibc", + .fn.memset = memset }, +#ifdef HAVE_ARCH_X86_64_SUPPORT +# define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn }, +# include "mem-memset-x86-64-asm-def.h" +# undef MEMSET_FN #endif - { .name = NULL, - .desc = NULL, - .fn.memset = NULL } + { NULL, } }; -int bench_mem_memset(int argc, const char **argv, - const char *prefix __maybe_unused) +int bench_mem_memset(int argc, const char **argv, const char *prefix __maybe_unused) { struct bench_mem_info info = { - .routines = memset_routines, - .do_cycle = do_memset_cycle, - .do_gettimeofday = do_memset_gettimeofday, - .usage = bench_mem_memset_usage, + .routines = memset_routines, + .do_cycle = do_memset_cycle, + .do_gettimeofday = do_memset_gettimeofday, + .usage = bench_mem_memset_usage, }; return bench_mem_common(argc, argv, prefix, &info); -- cgit v1.2.3 From 276197415685e2a91ce367562800cf0f8fbe482c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 19 Oct 2015 10:04:18 +0200 Subject: perf bench: Default to all routines in 'perf bench mem' So few people know that the --routine option to 'perf bench memcpy/memset' exists, and would not know that it's capable of testing the kernel's memcpy/memset implementations. Furthermore, 'perf bench mem all' will not run all routines: vega:~> perf bench mem all # Running mem/memcpy benchmark... Routine default (Default memcpy() provided by glibc) # Copying 1MB Bytes ... 894.454383 MB/Sec 3.844734 GB/Sec (with prefault) # Running mem/memset benchmark... Routine default (Default memset() provided by glibc) # Copying 1MB Bytes ... 1.220703 GB/Sec 9.042245 GB/Sec (with prefault) Because misleadingly the 'all' refers to 'all sub-benchmarks', not 'all sub-benchmarks and routines'. Fix all this by making the memcpy/memset routine to default to 'all', which results in all the benchmarks being run: triton:~> perf bench mem all # Running mem/memcpy benchmark... Routine default (Default memcpy() provided by glibc) # Copying 1MB Bytes ... 1.448906 GB/Sec 4.957170 GB/Sec (with prefault) Routine x86-64-unrolled (unrolled memcpy() in arch/x86/lib/memcpy_64.S) # Copying 1MB Bytes ... 1.614153 GB/Sec 4.379204 GB/Sec (with prefault) Routine x86-64-movsq (movsq-based memcpy() in arch/x86/lib/memcpy_64.S) # Copying 1MB Bytes ... 1.570036 GB/Sec 4.264465 GB/Sec (with prefault) Routine x86-64-movsb (movsb-based memcpy() in arch/x86/lib/memcpy_64.S) # Copying 1MB Bytes ... 1.788576 GB/Sec 6.554111 GB/Sec (with prefault) # Running mem/memset benchmark... Routine default (Default memset() provided by glibc) # Copying 1MB Bytes ... 2.082223 GB/Sec 9.126752 GB/Sec (with prefault) Routine x86-64-unrolled (unrolled memset() in arch/x86/lib/memset_64.S) # Copying 1MB Bytes ... 5.710892 GB/Sec 8.346688 GB/Sec (with prefault) Routine x86-64-stosq (movsq-based memset() in arch/x86/lib/memset_64.S) # Copying 1MB Bytes ... 9.765625 GB/Sec 12.520032 GB/Sec (with prefault) Routine x86-64-stosb (movsb-based memset() in arch/x86/lib/memset_64.S) # Copying 1MB Bytes ... 9.668936 GB/Sec 12.682630 GB/Sec (with prefault) Signed-off-by: Ingo Molnar Cc: David Ahern Cc: Hitoshi Mitake Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1445241870-24854-3-git-send-email-mingo@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/mem-memcpy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c index 27606ff5c4f9..263f84171ae5 100644 --- a/tools/perf/bench/mem-memcpy.c +++ b/tools/perf/bench/mem-memcpy.c @@ -24,7 +24,7 @@ #define K 1024 static const char *length_str = "1MB"; -static const char *routine = "default"; +static const char *routine = "all"; static int iterations = 1; static bool use_cycle; static int cycle_fd; @@ -35,7 +35,7 @@ static const struct option options[] = { OPT_STRING('l', "length", &length_str, "1MB", "Specify length of memory to copy. " "Available units: B, KB, MB, GB and TB (upper and lower)"), - OPT_STRING('r', "routine", &routine, "default", + OPT_STRING('r', "routine", &routine, "all", "Specify routine to copy, \"all\" runs all available routines"), OPT_INTEGER('i', "iterations", &iterations, "repeat memcpy() invocation this number of times"), -- cgit v1.2.3 From 2946f59ac31d703738c00c684613d289e8d001ea Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 19 Oct 2015 10:04:19 +0200 Subject: perf bench: Eliminate unused argument from bench_mem_common() Signed-off-by: Ingo Molnar Cc: David Ahern Cc: Hitoshi Mitake Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1445241870-24854-4-git-send-email-mingo@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/mem-memcpy.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c index 263f84171ae5..7acb9b83382c 100644 --- a/tools/perf/bench/mem-memcpy.c +++ b/tools/perf/bench/mem-memcpy.c @@ -211,9 +211,7 @@ static void __bench_mem_routine(struct bench_mem_info *info, int r_idx, size_t l } } -static int bench_mem_common(int argc, const char **argv, - const char *prefix __maybe_unused, - struct bench_mem_info *info) +static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *info) { int i; size_t len; @@ -326,8 +324,7 @@ static double do_memcpy_gettimeofday(const struct routine *r, size_t len, bool p return (double)(((double)len * iterations) / timeval2double(&tv_diff)); } -int bench_mem_memcpy(int argc, const char **argv, - const char *prefix __maybe_unused) +int bench_mem_memcpy(int argc, const char **argv, const char *prefix __maybe_unused) { struct bench_mem_info info = { .routines = memcpy_routines, @@ -336,7 +333,7 @@ int bench_mem_memcpy(int argc, const char **argv, .usage = bench_mem_memcpy_usage, }; - return bench_mem_common(argc, argv, prefix, &info); + return bench_mem_common(argc, argv, &info); } static void memset_alloc_mem(void **dst, size_t length) @@ -419,5 +416,5 @@ int bench_mem_memset(int argc, const char **argv, const char *prefix __maybe_unu .usage = bench_mem_memset_usage, }; - return bench_mem_common(argc, argv, prefix, &info); + return bench_mem_common(argc, argv, &info); } -- cgit v1.2.3 From 9b2fa7f3e7799a335fd839906ab4d45b7d595dc4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 19 Oct 2015 10:04:20 +0200 Subject: perf bench: Rename 'mem-memcpy.c' => 'mem-functions.c' So mem-memcpy.c started out as a simple memcpy() benchmark, then it grew memset() functionality and now I plan to add string copy benchmarks as well. This makes the file name a misnomer: rename it to the more generic mem-functions.c name. Signed-off-by: Ingo Molnar Cc: David Ahern Cc: Hitoshi Mitake Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1445241870-24854-5-git-send-email-mingo@kernel.org [ The "rename" was introducing __unused, wasn't removing the old file, and didn't update tools/perf/bench/Build, fix it ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/Build | 2 +- tools/perf/bench/mem-functions.c | 420 +++++++++++++++++++++++++++++++++++++++ tools/perf/bench/mem-memcpy.c | 420 --------------------------------------- 3 files changed, 421 insertions(+), 421 deletions(-) create mode 100644 tools/perf/bench/mem-functions.c delete mode 100644 tools/perf/bench/mem-memcpy.c diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build index 573e28896038..60bf11943047 100644 --- a/tools/perf/bench/Build +++ b/tools/perf/bench/Build @@ -1,6 +1,6 @@ perf-y += sched-messaging.o perf-y += sched-pipe.o -perf-y += mem-memcpy.o +perf-y += mem-functions.o perf-y += futex-hash.o perf-y += futex-wake.o perf-y += futex-wake-parallel.o diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c new file mode 100644 index 000000000000..7acb9b83382c --- /dev/null +++ b/tools/perf/bench/mem-functions.c @@ -0,0 +1,420 @@ +/* + * mem-memcpy.c + * + * Simple memcpy() and memset() benchmarks + * + * Written by Hitoshi Mitake + */ + +#include "../perf.h" +#include "../util/util.h" +#include "../util/parse-options.h" +#include "../util/header.h" +#include "../util/cloexec.h" +#include "bench.h" +#include "mem-memcpy-arch.h" +#include "mem-memset-arch.h" + +#include +#include +#include +#include +#include + +#define K 1024 + +static const char *length_str = "1MB"; +static const char *routine = "all"; +static int iterations = 1; +static bool use_cycle; +static int cycle_fd; +static bool only_prefault; +static bool no_prefault; + +static const struct option options[] = { + OPT_STRING('l', "length", &length_str, "1MB", + "Specify length of memory to copy. " + "Available units: B, KB, MB, GB and TB (upper and lower)"), + OPT_STRING('r', "routine", &routine, "all", + "Specify routine to copy, \"all\" runs all available routines"), + OPT_INTEGER('i', "iterations", &iterations, + "repeat memcpy() invocation this number of times"), + OPT_BOOLEAN('c', "cycle", &use_cycle, + "Use cycles event instead of gettimeofday() for measuring"), + OPT_BOOLEAN('o', "only-prefault", &only_prefault, + "Show only the result with page faults before memcpy()"), + OPT_BOOLEAN('n', "no-prefault", &no_prefault, + "Show only the result without page faults before memcpy()"), + OPT_END() +}; + +typedef void *(*memcpy_t)(void *, const void *, size_t); +typedef void *(*memset_t)(void *, int, size_t); + +struct routine { + const char *name; + const char *desc; + union { + memcpy_t memcpy; + memset_t memset; + } fn; +}; + +struct routine memcpy_routines[] = { + { .name = "default", + .desc = "Default memcpy() provided by glibc", + .fn.memcpy = memcpy }, + +#ifdef HAVE_ARCH_X86_64_SUPPORT +# define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn}, +# include "mem-memcpy-x86-64-asm-def.h" +# undef MEMCPY_FN +#endif + + { NULL, } +}; + +static const char * const bench_mem_memcpy_usage[] = { + "perf bench mem memcpy ", + NULL +}; + +static struct perf_event_attr cycle_attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES +}; + +static void init_cycle(void) +{ + cycle_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag()); + + if (cycle_fd < 0 && errno == ENOSYS) + die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); + else + BUG_ON(cycle_fd < 0); +} + +static u64 get_cycle(void) +{ + int ret; + u64 clk; + + ret = read(cycle_fd, &clk, sizeof(u64)); + BUG_ON(ret != sizeof(u64)); + + return clk; +} + +static double timeval2double(struct timeval *ts) +{ + return (double)ts->tv_sec + (double)ts->tv_usec / (double)1000000; +} + +#define print_bps(x) do { \ + if (x < K) \ + printf(" %14lf B/Sec", x); \ + else if (x < K * K) \ + printf(" %14lfd KB/Sec", x / K); \ + else if (x < K * K * K) \ + printf(" %14lf MB/Sec", x / K / K); \ + else \ + printf(" %14lf GB/Sec", x / K / K / K); \ + } while (0) + +struct bench_mem_info { + const struct routine *routines; + u64 (*do_cycle)(const struct routine *r, size_t len, bool prefault); + double (*do_gettimeofday)(const struct routine *r, size_t len, bool prefault); + const char *const *usage; +}; + +static void __bench_mem_routine(struct bench_mem_info *info, int r_idx, size_t len, double totallen) +{ + const struct routine *r = &info->routines[r_idx]; + double result_bps[2]; + u64 result_cycle[2]; + int prefault = no_prefault ? 0 : 1; + + result_cycle[0] = result_cycle[1] = 0ULL; + result_bps[0] = result_bps[1] = 0.0; + + printf("Routine %s (%s)\n", r->name, r->desc); + + if (bench_format == BENCH_FORMAT_DEFAULT) + printf("# Copying %s Bytes ...\n\n", length_str); + + if (!only_prefault && prefault) { + /* Show both results: */ + if (use_cycle) { + result_cycle[0] = info->do_cycle(r, len, false); + result_cycle[1] = info->do_cycle(r, len, true); + } else { + result_bps[0] = info->do_gettimeofday(r, len, false); + result_bps[1] = info->do_gettimeofday(r, len, true); + } + } else { + if (use_cycle) + result_cycle[prefault] = info->do_cycle(r, len, only_prefault); + else + result_bps[prefault] = info->do_gettimeofday(r, len, only_prefault); + } + + switch (bench_format) { + case BENCH_FORMAT_DEFAULT: + if (!only_prefault && prefault) { + if (use_cycle) { + printf(" %14lf Cycle/Byte\n", + (double)result_cycle[0] + / totallen); + printf(" %14lf Cycle/Byte (with prefault)\n", + (double)result_cycle[1] + / totallen); + } else { + print_bps(result_bps[0]); + printf("\n"); + print_bps(result_bps[1]); + printf(" (with prefault)\n"); + } + } else { + if (use_cycle) { + printf(" %14lf Cycle/Byte", + (double)result_cycle[prefault] + / totallen); + } else + print_bps(result_bps[prefault]); + + printf("%s\n", only_prefault ? " (with prefault)" : ""); + } + break; + case BENCH_FORMAT_SIMPLE: + if (!only_prefault && prefault) { + if (use_cycle) { + printf("%lf %lf\n", + (double)result_cycle[0] / totallen, + (double)result_cycle[1] / totallen); + } else { + printf("%lf %lf\n", + result_bps[0], result_bps[1]); + } + } else { + if (use_cycle) { + printf("%lf\n", (double)result_cycle[prefault] + / totallen); + } else + printf("%lf\n", result_bps[prefault]); + } + break; + default: + /* Reaching this means there's some disaster: */ + die("unknown format: %d\n", bench_format); + break; + } +} + +static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *info) +{ + int i; + size_t len; + double totallen; + + argc = parse_options(argc, argv, options, info->usage, 0); + + if (no_prefault && only_prefault) { + fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n"); + return 1; + } + + if (use_cycle) + init_cycle(); + + len = (size_t)perf_atoll((char *)length_str); + totallen = (double)len * iterations; + + if ((s64)len <= 0) { + fprintf(stderr, "Invalid length:%s\n", length_str); + return 1; + } + + /* Same as without specifying either of prefault and no-prefault: */ + if (only_prefault && no_prefault) + only_prefault = no_prefault = false; + + if (!strncmp(routine, "all", 3)) { + for (i = 0; info->routines[i].name; i++) + __bench_mem_routine(info, i, len, totallen); + return 0; + } + + for (i = 0; info->routines[i].name; i++) { + if (!strcmp(info->routines[i].name, routine)) + break; + } + if (!info->routines[i].name) { + printf("Unknown routine:%s\n", routine); + printf("Available routines...\n"); + for (i = 0; info->routines[i].name; i++) { + printf("\t%s ... %s\n", + info->routines[i].name, info->routines[i].desc); + } + return 1; + } + + __bench_mem_routine(info, i, len, totallen); + + return 0; +} + +static void memcpy_alloc_mem(void **dst, void **src, size_t length) +{ + *dst = zalloc(length); + if (!*dst) + die("memory allocation failed - maybe length is too large?\n"); + + *src = zalloc(length); + if (!*src) + die("memory allocation failed - maybe length is too large?\n"); + + /* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */ + memset(*src, 0, length); +} + +static u64 do_memcpy_cycle(const struct routine *r, size_t len, bool prefault) +{ + u64 cycle_start = 0ULL, cycle_end = 0ULL; + void *src = NULL, *dst = NULL; + memcpy_t fn = r->fn.memcpy; + int i; + + memcpy_alloc_mem(&dst, &src, len); + + if (prefault) + fn(dst, src, len); + + cycle_start = get_cycle(); + for (i = 0; i < iterations; ++i) + fn(dst, src, len); + cycle_end = get_cycle(); + + free(src); + free(dst); + return cycle_end - cycle_start; +} + +static double do_memcpy_gettimeofday(const struct routine *r, size_t len, bool prefault) +{ + struct timeval tv_start, tv_end, tv_diff; + memcpy_t fn = r->fn.memcpy; + void *src = NULL, *dst = NULL; + int i; + + memcpy_alloc_mem(&dst, &src, len); + + if (prefault) + fn(dst, src, len); + + BUG_ON(gettimeofday(&tv_start, NULL)); + for (i = 0; i < iterations; ++i) + fn(dst, src, len); + BUG_ON(gettimeofday(&tv_end, NULL)); + + timersub(&tv_end, &tv_start, &tv_diff); + + free(src); + free(dst); + return (double)(((double)len * iterations) / timeval2double(&tv_diff)); +} + +int bench_mem_memcpy(int argc, const char **argv, const char *prefix __maybe_unused) +{ + struct bench_mem_info info = { + .routines = memcpy_routines, + .do_cycle = do_memcpy_cycle, + .do_gettimeofday = do_memcpy_gettimeofday, + .usage = bench_mem_memcpy_usage, + }; + + return bench_mem_common(argc, argv, &info); +} + +static void memset_alloc_mem(void **dst, size_t length) +{ + *dst = zalloc(length); + if (!*dst) + die("memory allocation failed - maybe length is too large?\n"); +} + +static u64 do_memset_cycle(const struct routine *r, size_t len, bool prefault) +{ + u64 cycle_start = 0ULL, cycle_end = 0ULL; + memset_t fn = r->fn.memset; + void *dst = NULL; + int i; + + memset_alloc_mem(&dst, len); + + if (prefault) + fn(dst, -1, len); + + cycle_start = get_cycle(); + for (i = 0; i < iterations; ++i) + fn(dst, i, len); + cycle_end = get_cycle(); + + free(dst); + return cycle_end - cycle_start; +} + +static double do_memset_gettimeofday(const struct routine *r, size_t len, + bool prefault) +{ + struct timeval tv_start, tv_end, tv_diff; + memset_t fn = r->fn.memset; + void *dst = NULL; + int i; + + memset_alloc_mem(&dst, len); + + if (prefault) + fn(dst, -1, len); + + BUG_ON(gettimeofday(&tv_start, NULL)); + for (i = 0; i < iterations; ++i) + fn(dst, i, len); + BUG_ON(gettimeofday(&tv_end, NULL)); + + timersub(&tv_end, &tv_start, &tv_diff); + + free(dst); + return (double)(((double)len * iterations) / timeval2double(&tv_diff)); +} + +static const char * const bench_mem_memset_usage[] = { + "perf bench mem memset ", + NULL +}; + +static const struct routine memset_routines[] = { + { .name = "default", + .desc = "Default memset() provided by glibc", + .fn.memset = memset }, + +#ifdef HAVE_ARCH_X86_64_SUPPORT +# define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn }, +# include "mem-memset-x86-64-asm-def.h" +# undef MEMSET_FN +#endif + + { NULL, } +}; + +int bench_mem_memset(int argc, const char **argv, const char *prefix __maybe_unused) +{ + struct bench_mem_info info = { + .routines = memset_routines, + .do_cycle = do_memset_cycle, + .do_gettimeofday = do_memset_gettimeofday, + .usage = bench_mem_memset_usage, + }; + + return bench_mem_common(argc, argv, &info); +} diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c deleted file mode 100644 index 7acb9b83382c..000000000000 --- a/tools/perf/bench/mem-memcpy.c +++ /dev/null @@ -1,420 +0,0 @@ -/* - * mem-memcpy.c - * - * Simple memcpy() and memset() benchmarks - * - * Written by Hitoshi Mitake - */ - -#include "../perf.h" -#include "../util/util.h" -#include "../util/parse-options.h" -#include "../util/header.h" -#include "../util/cloexec.h" -#include "bench.h" -#include "mem-memcpy-arch.h" -#include "mem-memset-arch.h" - -#include -#include -#include -#include -#include - -#define K 1024 - -static const char *length_str = "1MB"; -static const char *routine = "all"; -static int iterations = 1; -static bool use_cycle; -static int cycle_fd; -static bool only_prefault; -static bool no_prefault; - -static const struct option options[] = { - OPT_STRING('l', "length", &length_str, "1MB", - "Specify length of memory to copy. " - "Available units: B, KB, MB, GB and TB (upper and lower)"), - OPT_STRING('r', "routine", &routine, "all", - "Specify routine to copy, \"all\" runs all available routines"), - OPT_INTEGER('i', "iterations", &iterations, - "repeat memcpy() invocation this number of times"), - OPT_BOOLEAN('c', "cycle", &use_cycle, - "Use cycles event instead of gettimeofday() for measuring"), - OPT_BOOLEAN('o', "only-prefault", &only_prefault, - "Show only the result with page faults before memcpy()"), - OPT_BOOLEAN('n', "no-prefault", &no_prefault, - "Show only the result without page faults before memcpy()"), - OPT_END() -}; - -typedef void *(*memcpy_t)(void *, const void *, size_t); -typedef void *(*memset_t)(void *, int, size_t); - -struct routine { - const char *name; - const char *desc; - union { - memcpy_t memcpy; - memset_t memset; - } fn; -}; - -struct routine memcpy_routines[] = { - { .name = "default", - .desc = "Default memcpy() provided by glibc", - .fn.memcpy = memcpy }, - -#ifdef HAVE_ARCH_X86_64_SUPPORT -# define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn}, -# include "mem-memcpy-x86-64-asm-def.h" -# undef MEMCPY_FN -#endif - - { NULL, } -}; - -static const char * const bench_mem_memcpy_usage[] = { - "perf bench mem memcpy ", - NULL -}; - -static struct perf_event_attr cycle_attr = { - .type = PERF_TYPE_HARDWARE, - .config = PERF_COUNT_HW_CPU_CYCLES -}; - -static void init_cycle(void) -{ - cycle_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag()); - - if (cycle_fd < 0 && errno == ENOSYS) - die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); - else - BUG_ON(cycle_fd < 0); -} - -static u64 get_cycle(void) -{ - int ret; - u64 clk; - - ret = read(cycle_fd, &clk, sizeof(u64)); - BUG_ON(ret != sizeof(u64)); - - return clk; -} - -static double timeval2double(struct timeval *ts) -{ - return (double)ts->tv_sec + (double)ts->tv_usec / (double)1000000; -} - -#define print_bps(x) do { \ - if (x < K) \ - printf(" %14lf B/Sec", x); \ - else if (x < K * K) \ - printf(" %14lfd KB/Sec", x / K); \ - else if (x < K * K * K) \ - printf(" %14lf MB/Sec", x / K / K); \ - else \ - printf(" %14lf GB/Sec", x / K / K / K); \ - } while (0) - -struct bench_mem_info { - const struct routine *routines; - u64 (*do_cycle)(const struct routine *r, size_t len, bool prefault); - double (*do_gettimeofday)(const struct routine *r, size_t len, bool prefault); - const char *const *usage; -}; - -static void __bench_mem_routine(struct bench_mem_info *info, int r_idx, size_t len, double totallen) -{ - const struct routine *r = &info->routines[r_idx]; - double result_bps[2]; - u64 result_cycle[2]; - int prefault = no_prefault ? 0 : 1; - - result_cycle[0] = result_cycle[1] = 0ULL; - result_bps[0] = result_bps[1] = 0.0; - - printf("Routine %s (%s)\n", r->name, r->desc); - - if (bench_format == BENCH_FORMAT_DEFAULT) - printf("# Copying %s Bytes ...\n\n", length_str); - - if (!only_prefault && prefault) { - /* Show both results: */ - if (use_cycle) { - result_cycle[0] = info->do_cycle(r, len, false); - result_cycle[1] = info->do_cycle(r, len, true); - } else { - result_bps[0] = info->do_gettimeofday(r, len, false); - result_bps[1] = info->do_gettimeofday(r, len, true); - } - } else { - if (use_cycle) - result_cycle[prefault] = info->do_cycle(r, len, only_prefault); - else - result_bps[prefault] = info->do_gettimeofday(r, len, only_prefault); - } - - switch (bench_format) { - case BENCH_FORMAT_DEFAULT: - if (!only_prefault && prefault) { - if (use_cycle) { - printf(" %14lf Cycle/Byte\n", - (double)result_cycle[0] - / totallen); - printf(" %14lf Cycle/Byte (with prefault)\n", - (double)result_cycle[1] - / totallen); - } else { - print_bps(result_bps[0]); - printf("\n"); - print_bps(result_bps[1]); - printf(" (with prefault)\n"); - } - } else { - if (use_cycle) { - printf(" %14lf Cycle/Byte", - (double)result_cycle[prefault] - / totallen); - } else - print_bps(result_bps[prefault]); - - printf("%s\n", only_prefault ? " (with prefault)" : ""); - } - break; - case BENCH_FORMAT_SIMPLE: - if (!only_prefault && prefault) { - if (use_cycle) { - printf("%lf %lf\n", - (double)result_cycle[0] / totallen, - (double)result_cycle[1] / totallen); - } else { - printf("%lf %lf\n", - result_bps[0], result_bps[1]); - } - } else { - if (use_cycle) { - printf("%lf\n", (double)result_cycle[prefault] - / totallen); - } else - printf("%lf\n", result_bps[prefault]); - } - break; - default: - /* Reaching this means there's some disaster: */ - die("unknown format: %d\n", bench_format); - break; - } -} - -static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *info) -{ - int i; - size_t len; - double totallen; - - argc = parse_options(argc, argv, options, info->usage, 0); - - if (no_prefault && only_prefault) { - fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n"); - return 1; - } - - if (use_cycle) - init_cycle(); - - len = (size_t)perf_atoll((char *)length_str); - totallen = (double)len * iterations; - - if ((s64)len <= 0) { - fprintf(stderr, "Invalid length:%s\n", length_str); - return 1; - } - - /* Same as without specifying either of prefault and no-prefault: */ - if (only_prefault && no_prefault) - only_prefault = no_prefault = false; - - if (!strncmp(routine, "all", 3)) { - for (i = 0; info->routines[i].name; i++) - __bench_mem_routine(info, i, len, totallen); - return 0; - } - - for (i = 0; info->routines[i].name; i++) { - if (!strcmp(info->routines[i].name, routine)) - break; - } - if (!info->routines[i].name) { - printf("Unknown routine:%s\n", routine); - printf("Available routines...\n"); - for (i = 0; info->routines[i].name; i++) { - printf("\t%s ... %s\n", - info->routines[i].name, info->routines[i].desc); - } - return 1; - } - - __bench_mem_routine(info, i, len, totallen); - - return 0; -} - -static void memcpy_alloc_mem(void **dst, void **src, size_t length) -{ - *dst = zalloc(length); - if (!*dst) - die("memory allocation failed - maybe length is too large?\n"); - - *src = zalloc(length); - if (!*src) - die("memory allocation failed - maybe length is too large?\n"); - - /* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */ - memset(*src, 0, length); -} - -static u64 do_memcpy_cycle(const struct routine *r, size_t len, bool prefault) -{ - u64 cycle_start = 0ULL, cycle_end = 0ULL; - void *src = NULL, *dst = NULL; - memcpy_t fn = r->fn.memcpy; - int i; - - memcpy_alloc_mem(&dst, &src, len); - - if (prefault) - fn(dst, src, len); - - cycle_start = get_cycle(); - for (i = 0; i < iterations; ++i) - fn(dst, src, len); - cycle_end = get_cycle(); - - free(src); - free(dst); - return cycle_end - cycle_start; -} - -static double do_memcpy_gettimeofday(const struct routine *r, size_t len, bool prefault) -{ - struct timeval tv_start, tv_end, tv_diff; - memcpy_t fn = r->fn.memcpy; - void *src = NULL, *dst = NULL; - int i; - - memcpy_alloc_mem(&dst, &src, len); - - if (prefault) - fn(dst, src, len); - - BUG_ON(gettimeofday(&tv_start, NULL)); - for (i = 0; i < iterations; ++i) - fn(dst, src, len); - BUG_ON(gettimeofday(&tv_end, NULL)); - - timersub(&tv_end, &tv_start, &tv_diff); - - free(src); - free(dst); - return (double)(((double)len * iterations) / timeval2double(&tv_diff)); -} - -int bench_mem_memcpy(int argc, const char **argv, const char *prefix __maybe_unused) -{ - struct bench_mem_info info = { - .routines = memcpy_routines, - .do_cycle = do_memcpy_cycle, - .do_gettimeofday = do_memcpy_gettimeofday, - .usage = bench_mem_memcpy_usage, - }; - - return bench_mem_common(argc, argv, &info); -} - -static void memset_alloc_mem(void **dst, size_t length) -{ - *dst = zalloc(length); - if (!*dst) - die("memory allocation failed - maybe length is too large?\n"); -} - -static u64 do_memset_cycle(const struct routine *r, size_t len, bool prefault) -{ - u64 cycle_start = 0ULL, cycle_end = 0ULL; - memset_t fn = r->fn.memset; - void *dst = NULL; - int i; - - memset_alloc_mem(&dst, len); - - if (prefault) - fn(dst, -1, len); - - cycle_start = get_cycle(); - for (i = 0; i < iterations; ++i) - fn(dst, i, len); - cycle_end = get_cycle(); - - free(dst); - return cycle_end - cycle_start; -} - -static double do_memset_gettimeofday(const struct routine *r, size_t len, - bool prefault) -{ - struct timeval tv_start, tv_end, tv_diff; - memset_t fn = r->fn.memset; - void *dst = NULL; - int i; - - memset_alloc_mem(&dst, len); - - if (prefault) - fn(dst, -1, len); - - BUG_ON(gettimeofday(&tv_start, NULL)); - for (i = 0; i < iterations; ++i) - fn(dst, i, len); - BUG_ON(gettimeofday(&tv_end, NULL)); - - timersub(&tv_end, &tv_start, &tv_diff); - - free(dst); - return (double)(((double)len * iterations) / timeval2double(&tv_diff)); -} - -static const char * const bench_mem_memset_usage[] = { - "perf bench mem memset ", - NULL -}; - -static const struct routine memset_routines[] = { - { .name = "default", - .desc = "Default memset() provided by glibc", - .fn.memset = memset }, - -#ifdef HAVE_ARCH_X86_64_SUPPORT -# define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn }, -# include "mem-memset-x86-64-asm-def.h" -# undef MEMSET_FN -#endif - - { NULL, } -}; - -int bench_mem_memset(int argc, const char **argv, const char *prefix __maybe_unused) -{ - struct bench_mem_info info = { - .routines = memset_routines, - .do_cycle = do_memset_cycle, - .do_gettimeofday = do_memset_gettimeofday, - .usage = bench_mem_memset_usage, - }; - - return bench_mem_common(argc, argv, &info); -} -- cgit v1.2.3 From 6db175c7333e22ee818373cbea067e3eaa0236f7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 19 Oct 2015 10:04:21 +0200 Subject: perf bench: Remove the prefaulting complication from 'perf bench mem mem*' So 'perf bench mem memcpy/memset' has elaborate code to measure memcpy()/memset() performance both with freshly allocated buffers (which includes initial page fault overhead) and with preallocated buffers. But the thing is, the resulting bandwidth results are mostly meaningless, because page faults dominate so much of the cost. It might make sense to measure cache cold vs. cache hot performance, but the code does not do this. So remove this complication, and always prefault the ranges before using them. Signed-off-by: Ingo Molnar Cc: David Ahern Cc: Hitoshi Mitake Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1445241870-24854-6-git-send-email-mingo@kernel.org [ Remove --no-prefault, --only-prefault from docs, noticed by David Ahern ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-bench.txt | 16 ---- tools/perf/bench/mem-functions.c | 146 +++++++++++--------------------- 2 files changed, 50 insertions(+), 112 deletions(-) diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt index ab632d9fbd7d..9cb60abe03aa 100644 --- a/tools/perf/Documentation/perf-bench.txt +++ b/tools/perf/Documentation/perf-bench.txt @@ -157,14 +157,6 @@ Repeat memcpy invocation this number of times. --cycle:: Use perf's cpu-cycles event instead of gettimeofday syscall. --o:: ---only-prefault:: -Show only the result with page faults before memcpy. - --n:: ---no-prefault:: -Show only the result without page faults before memcpy. - *memset*:: Suite for evaluating performance of simple memory set in various ways. @@ -189,14 +181,6 @@ Repeat memset invocation this number of times. --cycle:: Use perf's cpu-cycles event instead of gettimeofday syscall. --o:: ---only-prefault:: -Show only the result with page faults before memset. - --n:: ---no-prefault:: -Show only the result without page faults before memset. - SUITES FOR 'numa' ~~~~~~~~~~~~~~~~~ *mem*:: diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c index 7acb9b83382c..9c18a4b976b6 100644 --- a/tools/perf/bench/mem-functions.c +++ b/tools/perf/bench/mem-functions.c @@ -28,8 +28,6 @@ static const char *routine = "all"; static int iterations = 1; static bool use_cycle; static int cycle_fd; -static bool only_prefault; -static bool no_prefault; static const struct option options[] = { OPT_STRING('l', "length", &length_str, "1MB", @@ -41,10 +39,6 @@ static const struct option options[] = { "repeat memcpy() invocation this number of times"), OPT_BOOLEAN('c', "cycle", &use_cycle, "Use cycles event instead of gettimeofday() for measuring"), - OPT_BOOLEAN('o', "only-prefault", &only_prefault, - "Show only the result with page faults before memcpy()"), - OPT_BOOLEAN('n', "no-prefault", &no_prefault, - "Show only the result without page faults before memcpy()"), OPT_END() }; @@ -110,103 +104,60 @@ static double timeval2double(struct timeval *ts) return (double)ts->tv_sec + (double)ts->tv_usec / (double)1000000; } -#define print_bps(x) do { \ - if (x < K) \ - printf(" %14lf B/Sec", x); \ - else if (x < K * K) \ - printf(" %14lfd KB/Sec", x / K); \ - else if (x < K * K * K) \ - printf(" %14lf MB/Sec", x / K / K); \ - else \ - printf(" %14lf GB/Sec", x / K / K / K); \ +#define print_bps(x) do { \ + if (x < K) \ + printf(" %14lf B/Sec\n", x); \ + else if (x < K * K) \ + printf(" %14lfd KB/Sec\n", x / K); \ + else if (x < K * K * K) \ + printf(" %14lf MB/Sec\n", x / K / K); \ + else \ + printf(" %14lf GB/Sec\n", x / K / K / K); \ } while (0) struct bench_mem_info { const struct routine *routines; - u64 (*do_cycle)(const struct routine *r, size_t len, bool prefault); - double (*do_gettimeofday)(const struct routine *r, size_t len, bool prefault); + u64 (*do_cycle)(const struct routine *r, size_t len); + double (*do_gettimeofday)(const struct routine *r, size_t len); const char *const *usage; }; static void __bench_mem_routine(struct bench_mem_info *info, int r_idx, size_t len, double totallen) { const struct routine *r = &info->routines[r_idx]; - double result_bps[2]; - u64 result_cycle[2]; - int prefault = no_prefault ? 0 : 1; - - result_cycle[0] = result_cycle[1] = 0ULL; - result_bps[0] = result_bps[1] = 0.0; + double result_bps = 0.0; + u64 result_cycle = 0; printf("Routine %s (%s)\n", r->name, r->desc); if (bench_format == BENCH_FORMAT_DEFAULT) printf("# Copying %s Bytes ...\n\n", length_str); - if (!only_prefault && prefault) { - /* Show both results: */ - if (use_cycle) { - result_cycle[0] = info->do_cycle(r, len, false); - result_cycle[1] = info->do_cycle(r, len, true); - } else { - result_bps[0] = info->do_gettimeofday(r, len, false); - result_bps[1] = info->do_gettimeofday(r, len, true); - } + if (use_cycle) { + result_cycle = info->do_cycle(r, len); } else { - if (use_cycle) - result_cycle[prefault] = info->do_cycle(r, len, only_prefault); - else - result_bps[prefault] = info->do_gettimeofday(r, len, only_prefault); + result_bps = info->do_gettimeofday(r, len); } switch (bench_format) { case BENCH_FORMAT_DEFAULT: - if (!only_prefault && prefault) { - if (use_cycle) { - printf(" %14lf Cycle/Byte\n", - (double)result_cycle[0] - / totallen); - printf(" %14lf Cycle/Byte (with prefault)\n", - (double)result_cycle[1] - / totallen); - } else { - print_bps(result_bps[0]); - printf("\n"); - print_bps(result_bps[1]); - printf(" (with prefault)\n"); - } + if (use_cycle) { + printf(" %14lf Cycle/Byte\n", (double)result_cycle/totallen); } else { - if (use_cycle) { - printf(" %14lf Cycle/Byte", - (double)result_cycle[prefault] - / totallen); - } else - print_bps(result_bps[prefault]); - - printf("%s\n", only_prefault ? " (with prefault)" : ""); + print_bps(result_bps); } break; + case BENCH_FORMAT_SIMPLE: - if (!only_prefault && prefault) { - if (use_cycle) { - printf("%lf %lf\n", - (double)result_cycle[0] / totallen, - (double)result_cycle[1] / totallen); - } else { - printf("%lf %lf\n", - result_bps[0], result_bps[1]); - } + if (use_cycle) { + printf("%lf\n", (double)result_cycle/totallen); } else { - if (use_cycle) { - printf("%lf\n", (double)result_cycle[prefault] - / totallen); - } else - printf("%lf\n", result_bps[prefault]); + printf("%lf\n", result_bps); } break; + default: - /* Reaching this means there's some disaster: */ - die("unknown format: %d\n", bench_format); + BUG_ON(1); break; } } @@ -219,11 +170,6 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info * argc = parse_options(argc, argv, options, info->usage, 0); - if (no_prefault && only_prefault) { - fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n"); - return 1; - } - if (use_cycle) init_cycle(); @@ -235,10 +181,6 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info * return 1; } - /* Same as without specifying either of prefault and no-prefault: */ - if (only_prefault && no_prefault) - only_prefault = no_prefault = false; - if (!strncmp(routine, "all", 3)) { for (i = 0; info->routines[i].name; i++) __bench_mem_routine(info, i, len, totallen); @@ -278,7 +220,7 @@ static void memcpy_alloc_mem(void **dst, void **src, size_t length) memset(*src, 0, length); } -static u64 do_memcpy_cycle(const struct routine *r, size_t len, bool prefault) +static u64 do_memcpy_cycle(const struct routine *r, size_t len) { u64 cycle_start = 0ULL, cycle_end = 0ULL; void *src = NULL, *dst = NULL; @@ -287,8 +229,11 @@ static u64 do_memcpy_cycle(const struct routine *r, size_t len, bool prefault) memcpy_alloc_mem(&dst, &src, len); - if (prefault) - fn(dst, src, len); + /* + * We prefault the freshly allocated memory range here, + * to not measure page fault overhead: + */ + fn(dst, src, len); cycle_start = get_cycle(); for (i = 0; i < iterations; ++i) @@ -300,7 +245,7 @@ static u64 do_memcpy_cycle(const struct routine *r, size_t len, bool prefault) return cycle_end - cycle_start; } -static double do_memcpy_gettimeofday(const struct routine *r, size_t len, bool prefault) +static double do_memcpy_gettimeofday(const struct routine *r, size_t len) { struct timeval tv_start, tv_end, tv_diff; memcpy_t fn = r->fn.memcpy; @@ -309,8 +254,11 @@ static double do_memcpy_gettimeofday(const struct routine *r, size_t len, bool p memcpy_alloc_mem(&dst, &src, len); - if (prefault) - fn(dst, src, len); + /* + * We prefault the freshly allocated memory range here, + * to not measure page fault overhead: + */ + fn(dst, src, len); BUG_ON(gettimeofday(&tv_start, NULL)); for (i = 0; i < iterations; ++i) @@ -321,6 +269,7 @@ static double do_memcpy_gettimeofday(const struct routine *r, size_t len, bool p free(src); free(dst); + return (double)(((double)len * iterations) / timeval2double(&tv_diff)); } @@ -343,7 +292,7 @@ static void memset_alloc_mem(void **dst, size_t length) die("memory allocation failed - maybe length is too large?\n"); } -static u64 do_memset_cycle(const struct routine *r, size_t len, bool prefault) +static u64 do_memset_cycle(const struct routine *r, size_t len) { u64 cycle_start = 0ULL, cycle_end = 0ULL; memset_t fn = r->fn.memset; @@ -352,8 +301,11 @@ static u64 do_memset_cycle(const struct routine *r, size_t len, bool prefault) memset_alloc_mem(&dst, len); - if (prefault) - fn(dst, -1, len); + /* + * We prefault the freshly allocated memory range here, + * to not measure page fault overhead: + */ + fn(dst, -1, len); cycle_start = get_cycle(); for (i = 0; i < iterations; ++i) @@ -364,8 +316,7 @@ static u64 do_memset_cycle(const struct routine *r, size_t len, bool prefault) return cycle_end - cycle_start; } -static double do_memset_gettimeofday(const struct routine *r, size_t len, - bool prefault) +static double do_memset_gettimeofday(const struct routine *r, size_t len) { struct timeval tv_start, tv_end, tv_diff; memset_t fn = r->fn.memset; @@ -374,8 +325,11 @@ static double do_memset_gettimeofday(const struct routine *r, size_t len, memset_alloc_mem(&dst, len); - if (prefault) - fn(dst, -1, len); + /* + * We prefault the freshly allocated memory range here, + * to not measure page fault overhead: + */ + fn(dst, -1, len); BUG_ON(gettimeofday(&tv_start, NULL)); for (i = 0; i < iterations; ++i) -- cgit v1.2.3 From 7a46a8fd13bd60584687f417cd35935965f29ae2 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 19 Oct 2015 10:04:22 +0200 Subject: perf bench: List output formatting options on 'perf bench -h' So 'perf bench -h' is not very helpful when printing the help line about the output formatting options: -f, --format Specify format style There are two output format styles, 'default' and 'simple', so improve the help text to: -f, --format Specify the output formatting style Signed-off-by: Ingo Molnar Cc: David Ahern Cc: Hitoshi Mitake Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1445241870-24854-7-git-send-email-mingo@kernel.org [ Removed leftovers from the mem-functions.c rename ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-bench.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index f67934d46d40..1b585213ba5a 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -110,7 +110,7 @@ int bench_format = BENCH_FORMAT_DEFAULT; unsigned int bench_repeat = 10; /* default number of times to repeat the run */ static const struct option bench_options[] = { - OPT_STRING('f', "format", &bench_format_str, "default", "Specify format style"), + OPT_STRING('f', "format", &bench_format_str, "default|simple", "Specify the output formatting style"), OPT_UINTEGER('r', "repeat", &bench_repeat, "Specify amount of times to repeat the run"), OPT_END() }; -- cgit v1.2.3 From b14f2d357675bd7fb4e5a705ac7320a9638ecab5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 19 Oct 2015 10:04:23 +0200 Subject: perf bench mem: Change 'cycle' to 'cycles' So 'perf bench mem memset/memcpy' has a CPU cycles measurement method, but calls it 'cycle' (singular) throughout the code, which makes it harder to read. Rename all related functions, variables and options to a plural 'cycles' nomenclature. Signed-off-by: Ingo Molnar Cc: David Ahern Cc: Hitoshi Mitake Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1445241870-24854-8-git-send-email-mingo@kernel.org [ s/--cycle/--cycles/g in perf-bench man page ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-bench.txt | 4 +-- tools/perf/bench/mem-functions.c | 56 ++++++++++++++++----------------- 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt index 9cb60abe03aa..17135ef92d70 100644 --- a/tools/perf/Documentation/perf-bench.txt +++ b/tools/perf/Documentation/perf-bench.txt @@ -154,7 +154,7 @@ On x86-64, x86-64-unrolled, x86-64-movsq and x86-64-movsb are supported. Repeat memcpy invocation this number of times. -c:: ---cycle:: +--cycles:: Use perf's cpu-cycles event instead of gettimeofday syscall. *memset*:: @@ -178,7 +178,7 @@ On x86-64, x86-64-unrolled, x86-64-stosq and x86-64-stosb are supported. Repeat memset invocation this number of times. -c:: ---cycle:: +--cycles:: Use perf's cpu-cycles event instead of gettimeofday syscall. SUITES FOR 'numa' diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c index 9c18a4b976b6..6fe8667fbdf2 100644 --- a/tools/perf/bench/mem-functions.c +++ b/tools/perf/bench/mem-functions.c @@ -26,8 +26,8 @@ static const char *length_str = "1MB"; static const char *routine = "all"; static int iterations = 1; -static bool use_cycle; -static int cycle_fd; +static bool use_cycles; +static int cycles_fd; static const struct option options[] = { OPT_STRING('l', "length", &length_str, "1MB", @@ -37,8 +37,8 @@ static const struct option options[] = { "Specify routine to copy, \"all\" runs all available routines"), OPT_INTEGER('i', "iterations", &iterations, "repeat memcpy() invocation this number of times"), - OPT_BOOLEAN('c', "cycle", &use_cycle, - "Use cycles event instead of gettimeofday() for measuring"), + OPT_BOOLEAN('c', "cycles", &use_cycles, + "Use a cycles event instead of gettimeofday() to measure performance"), OPT_END() }; @@ -78,22 +78,22 @@ static struct perf_event_attr cycle_attr = { .config = PERF_COUNT_HW_CPU_CYCLES }; -static void init_cycle(void) +static void init_cycles(void) { - cycle_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag()); + cycles_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag()); - if (cycle_fd < 0 && errno == ENOSYS) + if (cycles_fd < 0 && errno == ENOSYS) die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); else - BUG_ON(cycle_fd < 0); + BUG_ON(cycles_fd < 0); } -static u64 get_cycle(void) +static u64 get_cycles(void) { int ret; u64 clk; - ret = read(cycle_fd, &clk, sizeof(u64)); + ret = read(cycles_fd, &clk, sizeof(u64)); BUG_ON(ret != sizeof(u64)); return clk; @@ -117,7 +117,7 @@ static double timeval2double(struct timeval *ts) struct bench_mem_info { const struct routine *routines; - u64 (*do_cycle)(const struct routine *r, size_t len); + u64 (*do_cycles)(const struct routine *r, size_t len); double (*do_gettimeofday)(const struct routine *r, size_t len); const char *const *usage; }; @@ -126,31 +126,31 @@ static void __bench_mem_routine(struct bench_mem_info *info, int r_idx, size_t l { const struct routine *r = &info->routines[r_idx]; double result_bps = 0.0; - u64 result_cycle = 0; + u64 result_cycles = 0; printf("Routine %s (%s)\n", r->name, r->desc); if (bench_format == BENCH_FORMAT_DEFAULT) printf("# Copying %s Bytes ...\n\n", length_str); - if (use_cycle) { - result_cycle = info->do_cycle(r, len); + if (use_cycles) { + result_cycles = info->do_cycles(r, len); } else { result_bps = info->do_gettimeofday(r, len); } switch (bench_format) { case BENCH_FORMAT_DEFAULT: - if (use_cycle) { - printf(" %14lf Cycle/Byte\n", (double)result_cycle/totallen); + if (use_cycles) { + printf(" %14lf cycles/Byte\n", (double)result_cycles/totallen); } else { print_bps(result_bps); } break; case BENCH_FORMAT_SIMPLE: - if (use_cycle) { - printf("%lf\n", (double)result_cycle/totallen); + if (use_cycles) { + printf("%lf\n", (double)result_cycles/totallen); } else { printf("%lf\n", result_bps); } @@ -170,8 +170,8 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info * argc = parse_options(argc, argv, options, info->usage, 0); - if (use_cycle) - init_cycle(); + if (use_cycles) + init_cycles(); len = (size_t)perf_atoll((char *)length_str); totallen = (double)len * iterations; @@ -220,7 +220,7 @@ static void memcpy_alloc_mem(void **dst, void **src, size_t length) memset(*src, 0, length); } -static u64 do_memcpy_cycle(const struct routine *r, size_t len) +static u64 do_memcpy_cycles(const struct routine *r, size_t len) { u64 cycle_start = 0ULL, cycle_end = 0ULL; void *src = NULL, *dst = NULL; @@ -235,10 +235,10 @@ static u64 do_memcpy_cycle(const struct routine *r, size_t len) */ fn(dst, src, len); - cycle_start = get_cycle(); + cycle_start = get_cycles(); for (i = 0; i < iterations; ++i) fn(dst, src, len); - cycle_end = get_cycle(); + cycle_end = get_cycles(); free(src); free(dst); @@ -277,7 +277,7 @@ int bench_mem_memcpy(int argc, const char **argv, const char *prefix __maybe_unu { struct bench_mem_info info = { .routines = memcpy_routines, - .do_cycle = do_memcpy_cycle, + .do_cycles = do_memcpy_cycles, .do_gettimeofday = do_memcpy_gettimeofday, .usage = bench_mem_memcpy_usage, }; @@ -292,7 +292,7 @@ static void memset_alloc_mem(void **dst, size_t length) die("memory allocation failed - maybe length is too large?\n"); } -static u64 do_memset_cycle(const struct routine *r, size_t len) +static u64 do_memset_cycles(const struct routine *r, size_t len) { u64 cycle_start = 0ULL, cycle_end = 0ULL; memset_t fn = r->fn.memset; @@ -307,10 +307,10 @@ static u64 do_memset_cycle(const struct routine *r, size_t len) */ fn(dst, -1, len); - cycle_start = get_cycle(); + cycle_start = get_cycles(); for (i = 0; i < iterations; ++i) fn(dst, i, len); - cycle_end = get_cycle(); + cycle_end = get_cycles(); free(dst); return cycle_end - cycle_start; @@ -365,7 +365,7 @@ int bench_mem_memset(int argc, const char **argv, const char *prefix __maybe_unu { struct bench_mem_info info = { .routines = memset_routines, - .do_cycle = do_memset_cycle, + .do_cycles = do_memset_cycles, .do_gettimeofday = do_memset_gettimeofday, .usage = bench_mem_memset_usage, }; -- cgit v1.2.3 From e815e327604af880bdcf38cdd711dfa78627ab2a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 19 Oct 2015 10:04:24 +0200 Subject: perf bench mem: Rename 'routine' to 'routine_str' So bench/mem-functions.c has a 'routine' name for the routines parameter string, but a 'length_str' name for the length parameter string. We also have another entity named 'routine': 'struct routine'. This is inconsistent and confusing: rename 'routine' to 'routine_str'. Also fix typos in the --routine help text. Signed-off-by: Ingo Molnar Cc: David Ahern Cc: Hitoshi Mitake Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1445241870-24854-9-git-send-email-mingo@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/mem-functions.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c index 6fe8667fbdf2..a76e57f3ab66 100644 --- a/tools/perf/bench/mem-functions.c +++ b/tools/perf/bench/mem-functions.c @@ -24,7 +24,7 @@ #define K 1024 static const char *length_str = "1MB"; -static const char *routine = "all"; +static const char *routine_str = "all"; static int iterations = 1; static bool use_cycles; static int cycles_fd; @@ -33,8 +33,8 @@ static const struct option options[] = { OPT_STRING('l', "length", &length_str, "1MB", "Specify length of memory to copy. " "Available units: B, KB, MB, GB and TB (upper and lower)"), - OPT_STRING('r', "routine", &routine, "all", - "Specify routine to copy, \"all\" runs all available routines"), + OPT_STRING('r', "routine", &routine_str, "all", + "Specify the routine to run, \"all\" runs all available routines"), OPT_INTEGER('i', "iterations", &iterations, "repeat memcpy() invocation this number of times"), OPT_BOOLEAN('c', "cycles", &use_cycles, @@ -181,18 +181,18 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info * return 1; } - if (!strncmp(routine, "all", 3)) { + if (!strncmp(routine_str, "all", 3)) { for (i = 0; info->routines[i].name; i++) __bench_mem_routine(info, i, len, totallen); return 0; } for (i = 0; info->routines[i].name; i++) { - if (!strcmp(info->routines[i].name, routine)) + if (!strcmp(info->routines[i].name, routine_str)) break; } if (!info->routines[i].name) { - printf("Unknown routine:%s\n", routine); + printf("Unknown routine: %s\n", routine_str); printf("Available routines...\n"); for (i = 0; info->routines[i].name; i++) { printf("\t%s ... %s\n", -- cgit v1.2.3 From a69b4f741340a52d0976636a45c9976a883f03a0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 19 Oct 2015 10:04:25 +0200 Subject: perf bench mem: Fix 'length' vs. 'size' naming confusion So 'perf bench mem memcpy/memset' consistently uses 'len' and 'length' for buffer sizes - while it's really a memory buffer size. (strings have length.) Rename all affected variables. Signed-off-by: Ingo Molnar Cc: David Ahern Cc: Hitoshi Mitake Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1445241870-24854-10-git-send-email-mingo@kernel.org [ Update perf-bench man page ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-bench.txt | 8 +-- tools/perf/bench/mem-functions.c | 92 ++++++++++++++++----------------- 2 files changed, 50 insertions(+), 50 deletions(-) diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt index 17135ef92d70..bbd27d89b50a 100644 --- a/tools/perf/Documentation/perf-bench.txt +++ b/tools/perf/Documentation/perf-bench.txt @@ -139,8 +139,8 @@ Suite for evaluating performance of simple memory copy in various ways. Options of *memcpy* ^^^^^^^^^^^^^^^^^^^ -l:: ---length:: -Specify length of memory to copy (default: 1MB). +--size:: +Specify size of memory to copy (default: 1MB). Available units are B, KB, MB, GB and TB (case insensitive). -r:: @@ -163,8 +163,8 @@ Suite for evaluating performance of simple memory set in various ways. Options of *memset* ^^^^^^^^^^^^^^^^^^^ -l:: ---length:: -Specify length of memory to set (default: 1MB). +--size:: +Specify size of memory to set (default: 1MB). Available units are B, KB, MB, GB and TB (case insensitive). -r:: diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c index a76e57f3ab66..1605249d2912 100644 --- a/tools/perf/bench/mem-functions.c +++ b/tools/perf/bench/mem-functions.c @@ -23,15 +23,15 @@ #define K 1024 -static const char *length_str = "1MB"; +static const char *size_str = "1MB"; static const char *routine_str = "all"; static int iterations = 1; static bool use_cycles; static int cycles_fd; static const struct option options[] = { - OPT_STRING('l', "length", &length_str, "1MB", - "Specify length of memory to copy. " + OPT_STRING('l', "size", &size_str, "1MB", + "Specify the size of the memory buffers. " "Available units: B, KB, MB, GB and TB (upper and lower)"), OPT_STRING('r', "routine", &routine_str, "all", "Specify the routine to run, \"all\" runs all available routines"), @@ -117,12 +117,12 @@ static double timeval2double(struct timeval *ts) struct bench_mem_info { const struct routine *routines; - u64 (*do_cycles)(const struct routine *r, size_t len); - double (*do_gettimeofday)(const struct routine *r, size_t len); + u64 (*do_cycles)(const struct routine *r, size_t size); + double (*do_gettimeofday)(const struct routine *r, size_t size); const char *const *usage; }; -static void __bench_mem_routine(struct bench_mem_info *info, int r_idx, size_t len, double totallen) +static void __bench_mem_routine(struct bench_mem_info *info, int r_idx, size_t size, double size_total) { const struct routine *r = &info->routines[r_idx]; double result_bps = 0.0; @@ -131,18 +131,18 @@ static void __bench_mem_routine(struct bench_mem_info *info, int r_idx, size_t l printf("Routine %s (%s)\n", r->name, r->desc); if (bench_format == BENCH_FORMAT_DEFAULT) - printf("# Copying %s Bytes ...\n\n", length_str); + printf("# Copying %s Bytes ...\n\n", size_str); if (use_cycles) { - result_cycles = info->do_cycles(r, len); + result_cycles = info->do_cycles(r, size); } else { - result_bps = info->do_gettimeofday(r, len); + result_bps = info->do_gettimeofday(r, size); } switch (bench_format) { case BENCH_FORMAT_DEFAULT: if (use_cycles) { - printf(" %14lf cycles/Byte\n", (double)result_cycles/totallen); + printf(" %14lf cycles/Byte\n", (double)result_cycles/size_total); } else { print_bps(result_bps); } @@ -150,7 +150,7 @@ static void __bench_mem_routine(struct bench_mem_info *info, int r_idx, size_t l case BENCH_FORMAT_SIMPLE: if (use_cycles) { - printf("%lf\n", (double)result_cycles/totallen); + printf("%lf\n", (double)result_cycles/size_total); } else { printf("%lf\n", result_bps); } @@ -165,25 +165,25 @@ static void __bench_mem_routine(struct bench_mem_info *info, int r_idx, size_t l static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *info) { int i; - size_t len; - double totallen; + size_t size; + double size_total; argc = parse_options(argc, argv, options, info->usage, 0); if (use_cycles) init_cycles(); - len = (size_t)perf_atoll((char *)length_str); - totallen = (double)len * iterations; + size = (size_t)perf_atoll((char *)size_str); + size_total = (double)size * iterations; - if ((s64)len <= 0) { - fprintf(stderr, "Invalid length:%s\n", length_str); + if ((s64)size <= 0) { + fprintf(stderr, "Invalid size:%s\n", size_str); return 1; } if (!strncmp(routine_str, "all", 3)) { for (i = 0; info->routines[i].name; i++) - __bench_mem_routine(info, i, len, totallen); + __bench_mem_routine(info, i, size, size_total); return 0; } @@ -201,43 +201,43 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info * return 1; } - __bench_mem_routine(info, i, len, totallen); + __bench_mem_routine(info, i, size, size_total); return 0; } -static void memcpy_alloc_mem(void **dst, void **src, size_t length) +static void memcpy_alloc_mem(void **dst, void **src, size_t size) { - *dst = zalloc(length); + *dst = zalloc(size); if (!*dst) - die("memory allocation failed - maybe length is too large?\n"); + die("memory allocation failed - maybe size is too large?\n"); - *src = zalloc(length); + *src = zalloc(size); if (!*src) - die("memory allocation failed - maybe length is too large?\n"); + die("memory allocation failed - maybe size is too large?\n"); /* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */ - memset(*src, 0, length); + memset(*src, 0, size); } -static u64 do_memcpy_cycles(const struct routine *r, size_t len) +static u64 do_memcpy_cycles(const struct routine *r, size_t size) { u64 cycle_start = 0ULL, cycle_end = 0ULL; void *src = NULL, *dst = NULL; memcpy_t fn = r->fn.memcpy; int i; - memcpy_alloc_mem(&dst, &src, len); + memcpy_alloc_mem(&dst, &src, size); /* * We prefault the freshly allocated memory range here, * to not measure page fault overhead: */ - fn(dst, src, len); + fn(dst, src, size); cycle_start = get_cycles(); for (i = 0; i < iterations; ++i) - fn(dst, src, len); + fn(dst, src, size); cycle_end = get_cycles(); free(src); @@ -245,24 +245,24 @@ static u64 do_memcpy_cycles(const struct routine *r, size_t len) return cycle_end - cycle_start; } -static double do_memcpy_gettimeofday(const struct routine *r, size_t len) +static double do_memcpy_gettimeofday(const struct routine *r, size_t size) { struct timeval tv_start, tv_end, tv_diff; memcpy_t fn = r->fn.memcpy; void *src = NULL, *dst = NULL; int i; - memcpy_alloc_mem(&dst, &src, len); + memcpy_alloc_mem(&dst, &src, size); /* * We prefault the freshly allocated memory range here, * to not measure page fault overhead: */ - fn(dst, src, len); + fn(dst, src, size); BUG_ON(gettimeofday(&tv_start, NULL)); for (i = 0; i < iterations; ++i) - fn(dst, src, len); + fn(dst, src, size); BUG_ON(gettimeofday(&tv_end, NULL)); timersub(&tv_end, &tv_start, &tv_diff); @@ -270,7 +270,7 @@ static double do_memcpy_gettimeofday(const struct routine *r, size_t len) free(src); free(dst); - return (double)(((double)len * iterations) / timeval2double(&tv_diff)); + return (double)(((double)size * iterations) / timeval2double(&tv_diff)); } int bench_mem_memcpy(int argc, const char **argv, const char *prefix __maybe_unused) @@ -285,61 +285,61 @@ int bench_mem_memcpy(int argc, const char **argv, const char *prefix __maybe_unu return bench_mem_common(argc, argv, &info); } -static void memset_alloc_mem(void **dst, size_t length) +static void memset_alloc_mem(void **dst, size_t size) { - *dst = zalloc(length); + *dst = zalloc(size); if (!*dst) - die("memory allocation failed - maybe length is too large?\n"); + die("memory allocation failed - maybe size is too large?\n"); } -static u64 do_memset_cycles(const struct routine *r, size_t len) +static u64 do_memset_cycles(const struct routine *r, size_t size) { u64 cycle_start = 0ULL, cycle_end = 0ULL; memset_t fn = r->fn.memset; void *dst = NULL; int i; - memset_alloc_mem(&dst, len); + memset_alloc_mem(&dst, size); /* * We prefault the freshly allocated memory range here, * to not measure page fault overhead: */ - fn(dst, -1, len); + fn(dst, -1, size); cycle_start = get_cycles(); for (i = 0; i < iterations; ++i) - fn(dst, i, len); + fn(dst, i, size); cycle_end = get_cycles(); free(dst); return cycle_end - cycle_start; } -static double do_memset_gettimeofday(const struct routine *r, size_t len) +static double do_memset_gettimeofday(const struct routine *r, size_t size) { struct timeval tv_start, tv_end, tv_diff; memset_t fn = r->fn.memset; void *dst = NULL; int i; - memset_alloc_mem(&dst, len); + memset_alloc_mem(&dst, size); /* * We prefault the freshly allocated memory range here, * to not measure page fault overhead: */ - fn(dst, -1, len); + fn(dst, -1, size); BUG_ON(gettimeofday(&tv_start, NULL)); for (i = 0; i < iterations; ++i) - fn(dst, i, len); + fn(dst, i, size); BUG_ON(gettimeofday(&tv_end, NULL)); timersub(&tv_end, &tv_start, &tv_diff); free(dst); - return (double)(((double)len * iterations) / timeval2double(&tv_diff)); + return (double)(((double)size * iterations) / timeval2double(&tv_diff)); } static const char * const bench_mem_memset_usage[] = { -- cgit v1.2.3 From 13b1fdce8d46027f346c0533a4323b58e2b5bad8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 19 Oct 2015 10:04:26 +0200 Subject: perf bench mem: Improve user visible strings - fix various typos in user visible output strings - make the output consistent (wrt. capitalization and spelling) - offer the list of routines to benchmark on '-r help'. Signed-off-by: Ingo Molnar Cc: David Ahern Cc: Hitoshi Mitake Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1445241870-24854-11-git-send-email-mingo@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/mem-functions.c | 29 +++++++++++++++++------------ tools/perf/builtin-bench.c | 6 +++--- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c index 1605249d2912..318da3421e69 100644 --- a/tools/perf/bench/mem-functions.c +++ b/tools/perf/bench/mem-functions.c @@ -32,13 +32,17 @@ static int cycles_fd; static const struct option options[] = { OPT_STRING('l', "size", &size_str, "1MB", "Specify the size of the memory buffers. " - "Available units: B, KB, MB, GB and TB (upper and lower)"), + "Available units: B, KB, MB, GB and TB (case insensitive)"), + OPT_STRING('r', "routine", &routine_str, "all", - "Specify the routine to run, \"all\" runs all available routines"), + "Specify the routine to run, \"all\" runs all available routines, \"help\" lists them"), + OPT_INTEGER('i', "iterations", &iterations, - "repeat memcpy() invocation this number of times"), + "Repeat the function this number of times"), + OPT_BOOLEAN('c', "cycles", &use_cycles, "Use a cycles event instead of gettimeofday() to measure performance"), + OPT_END() }; @@ -106,13 +110,13 @@ static double timeval2double(struct timeval *ts) #define print_bps(x) do { \ if (x < K) \ - printf(" %14lf B/Sec\n", x); \ + printf(" %14lf bytes/sec\n", x); \ else if (x < K * K) \ - printf(" %14lfd KB/Sec\n", x / K); \ + printf(" %14lfd KB/sec\n", x / K); \ else if (x < K * K * K) \ - printf(" %14lf MB/Sec\n", x / K / K); \ + printf(" %14lf MB/sec\n", x / K / K); \ else \ - printf(" %14lf GB/Sec\n", x / K / K / K); \ + printf(" %14lf GB/sec\n", x / K / K / K); \ } while (0) struct bench_mem_info { @@ -128,10 +132,10 @@ static void __bench_mem_routine(struct bench_mem_info *info, int r_idx, size_t s double result_bps = 0.0; u64 result_cycles = 0; - printf("Routine %s (%s)\n", r->name, r->desc); + printf("# Routine '%s' (%s)\n", r->name, r->desc); if (bench_format == BENCH_FORMAT_DEFAULT) - printf("# Copying %s Bytes ...\n\n", size_str); + printf("# Copying %s bytes ...\n\n", size_str); if (use_cycles) { result_cycles = info->do_cycles(r, size); @@ -142,7 +146,7 @@ static void __bench_mem_routine(struct bench_mem_info *info, int r_idx, size_t s switch (bench_format) { case BENCH_FORMAT_DEFAULT: if (use_cycles) { - printf(" %14lf cycles/Byte\n", (double)result_cycles/size_total); + printf(" %14lf cycles/byte\n", (double)result_cycles/size_total); } else { print_bps(result_bps); } @@ -192,8 +196,9 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info * break; } if (!info->routines[i].name) { - printf("Unknown routine: %s\n", routine_str); - printf("Available routines...\n"); + if (strcmp(routine_str, "help") && strcmp(routine_str, "h")) + printf("Unknown routine: %s\n", routine_str); + printf("Available routines:\n"); for (i = 0; info->routines[i].name; i++) { printf("\t%s ... %s\n", info->routines[i].name, info->routines[i].desc); diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index 1b585213ba5a..a8fc948c8ace 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -49,9 +49,9 @@ static struct bench sched_benchmarks[] = { }; static struct bench mem_benchmarks[] = { - { "memcpy", "Benchmark for memcpy()", bench_mem_memcpy }, - { "memset", "Benchmark for memset() tests", bench_mem_memset }, - { "all", "Test all memory benchmarks", NULL }, + { "memcpy", "Benchmark for memcpy() functions", bench_mem_memcpy }, + { "memset", "Benchmark for memset() functions", bench_mem_memset }, + { "all", "Test all memory access benchmarks", NULL }, { NULL, NULL, NULL } }; -- cgit v1.2.3 From 5dd93304a5d386c73c0a59117752acdca67f857d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 19 Oct 2015 10:04:27 +0200 Subject: perf bench mem: Reorganize the code a bit Reorder functions a bit, so that we synchronize the layout of the memcpy() and memset() portions of the code. This improves the code, especially after we'll add an strlcpy() variant as well. Signed-off-by: Ingo Molnar Cc: David Ahern Cc: Hitoshi Mitake Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1445241870-24854-12-git-send-email-mingo@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/mem-functions.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c index 318da3421e69..8d980d4a64e5 100644 --- a/tools/perf/bench/mem-functions.c +++ b/tools/perf/bench/mem-functions.c @@ -58,25 +58,6 @@ struct routine { } fn; }; -struct routine memcpy_routines[] = { - { .name = "default", - .desc = "Default memcpy() provided by glibc", - .fn.memcpy = memcpy }, - -#ifdef HAVE_ARCH_X86_64_SUPPORT -# define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn}, -# include "mem-memcpy-x86-64-asm-def.h" -# undef MEMCPY_FN -#endif - - { NULL, } -}; - -static const char * const bench_mem_memcpy_usage[] = { - "perf bench mem memcpy ", - NULL -}; - static struct perf_event_attr cycle_attr = { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES @@ -278,6 +259,25 @@ static double do_memcpy_gettimeofday(const struct routine *r, size_t size) return (double)(((double)size * iterations) / timeval2double(&tv_diff)); } +struct routine memcpy_routines[] = { + { .name = "default", + .desc = "Default memcpy() provided by glibc", + .fn.memcpy = memcpy }, + +#ifdef HAVE_ARCH_X86_64_SUPPORT +# define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn}, +# include "mem-memcpy-x86-64-asm-def.h" +# undef MEMCPY_FN +#endif + + { NULL, } +}; + +static const char * const bench_mem_memcpy_usage[] = { + "perf bench mem memcpy ", + NULL +}; + int bench_mem_memcpy(int argc, const char **argv, const char *prefix __maybe_unused) { struct bench_mem_info info = { -- cgit v1.2.3 From b0d22e52e3d2c2b151dfaa0f6e01bafa5475344f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 19 Oct 2015 10:04:28 +0200 Subject: perf bench: Harmonize all the -l/--nr_loops options We have three benchmarking subsystems that specify some sort of 'number of loops' parameter - but all of them do it inconsistently: numa: -l/--nr_loops sched messaging: -l/--loops mem memset/memcpy: -i/--iterations Harmonize them to -l/--nr_loops by picking the numa variant - which is also the most likely one to have existing scripting which we don't want to break. Plus improve the parameter help texts to indicate the default value for the nr_loops variable to keep users from guessing ... Also propagate the naming to internal variables. Signed-off-by: Ingo Molnar Cc: David Ahern Cc: Hitoshi Mitake Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1445241870-24854-13-git-send-email-mingo@kernel.org [ Let the harmonisation reach the perf-bench man page as well ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-bench.txt | 10 +++++----- tools/perf/bench/mem-functions.c | 22 +++++++++++----------- tools/perf/bench/numa.c | 4 ++-- tools/perf/bench/sched-messaging.c | 10 +++++----- 4 files changed, 23 insertions(+), 23 deletions(-) diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt index bbd27d89b50a..ddfb3e10e88c 100644 --- a/tools/perf/Documentation/perf-bench.txt +++ b/tools/perf/Documentation/perf-bench.txt @@ -82,7 +82,7 @@ Be multi thread instead of multi process Specify number of groups -l:: ---loop=:: +--nr_loops=:: Specify number of loops Example of *messaging* @@ -149,8 +149,8 @@ Specify routine to copy (default: default). Available routines are depend on the architecture. On x86-64, x86-64-unrolled, x86-64-movsq and x86-64-movsb are supported. --i:: ---iterations:: +-l:: +--nr_loops:: Repeat memcpy invocation this number of times. -c:: @@ -173,8 +173,8 @@ Specify routine to set (default: default). Available routines are depend on the architecture. On x86-64, x86-64-unrolled, x86-64-stosq and x86-64-stosb are supported. --i:: ---iterations:: +-l:: +--nr_loops:: Repeat memset invocation this number of times. -c:: diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c index 8d980d4a64e5..c5dfabdbd8d0 100644 --- a/tools/perf/bench/mem-functions.c +++ b/tools/perf/bench/mem-functions.c @@ -25,20 +25,20 @@ static const char *size_str = "1MB"; static const char *routine_str = "all"; -static int iterations = 1; +static int nr_loops = 1; static bool use_cycles; static int cycles_fd; static const struct option options[] = { - OPT_STRING('l', "size", &size_str, "1MB", + OPT_STRING('s', "size", &size_str, "1MB", "Specify the size of the memory buffers. " "Available units: B, KB, MB, GB and TB (case insensitive)"), OPT_STRING('r', "routine", &routine_str, "all", "Specify the routine to run, \"all\" runs all available routines, \"help\" lists them"), - OPT_INTEGER('i', "iterations", &iterations, - "Repeat the function this number of times"), + OPT_INTEGER('l', "nr_loops", &nr_loops, + "Specify the number of loops to run. (default: 1)"), OPT_BOOLEAN('c', "cycles", &use_cycles, "Use a cycles event instead of gettimeofday() to measure performance"), @@ -159,7 +159,7 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info * init_cycles(); size = (size_t)perf_atoll((char *)size_str); - size_total = (double)size * iterations; + size_total = (double)size * nr_loops; if ((s64)size <= 0) { fprintf(stderr, "Invalid size:%s\n", size_str); @@ -222,7 +222,7 @@ static u64 do_memcpy_cycles(const struct routine *r, size_t size) fn(dst, src, size); cycle_start = get_cycles(); - for (i = 0; i < iterations; ++i) + for (i = 0; i < nr_loops; ++i) fn(dst, src, size); cycle_end = get_cycles(); @@ -247,7 +247,7 @@ static double do_memcpy_gettimeofday(const struct routine *r, size_t size) fn(dst, src, size); BUG_ON(gettimeofday(&tv_start, NULL)); - for (i = 0; i < iterations; ++i) + for (i = 0; i < nr_loops; ++i) fn(dst, src, size); BUG_ON(gettimeofday(&tv_end, NULL)); @@ -256,7 +256,7 @@ static double do_memcpy_gettimeofday(const struct routine *r, size_t size) free(src); free(dst); - return (double)(((double)size * iterations) / timeval2double(&tv_diff)); + return (double)(((double)size * nr_loops) / timeval2double(&tv_diff)); } struct routine memcpy_routines[] = { @@ -313,7 +313,7 @@ static u64 do_memset_cycles(const struct routine *r, size_t size) fn(dst, -1, size); cycle_start = get_cycles(); - for (i = 0; i < iterations; ++i) + for (i = 0; i < nr_loops; ++i) fn(dst, i, size); cycle_end = get_cycles(); @@ -337,14 +337,14 @@ static double do_memset_gettimeofday(const struct routine *r, size_t size) fn(dst, -1, size); BUG_ON(gettimeofday(&tv_start, NULL)); - for (i = 0; i < iterations; ++i) + for (i = 0; i < nr_loops; ++i) fn(dst, i, size); BUG_ON(gettimeofday(&tv_end, NULL)); timersub(&tv_end, &tv_start, &tv_diff); free(dst); - return (double)(((double)size * iterations) / timeval2double(&tv_diff)); + return (double)(((double)size * nr_loops) / timeval2double(&tv_diff)); } static const char * const bench_mem_memset_usage[] = { diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 870b7e665a20..492df2752a2d 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -164,8 +164,8 @@ static const struct option options[] = { OPT_STRING('L', "mb_proc_locked", &p0.mb_proc_locked_str,"MB", "process serialized/locked memory access (MBs), <= process_memory"), OPT_STRING('T', "mb_thread" , &p0.mb_thread_str, "MB", "thread memory (MBs)"), - OPT_UINTEGER('l', "nr_loops" , &p0.nr_loops, "max number of loops to run"), - OPT_UINTEGER('s', "nr_secs" , &p0.nr_secs, "max number of seconds to run"), + OPT_UINTEGER('l', "nr_loops" , &p0.nr_loops, "max number of loops to run (default: unlimited)"), + OPT_UINTEGER('s', "nr_secs" , &p0.nr_secs, "max number of seconds to run (default: 5 secs)"), OPT_UINTEGER('u', "usleep" , &p0.sleep_usecs, "usecs to sleep per loop iteration"), OPT_BOOLEAN('R', "data_reads" , &p0.data_reads, "access the data via writes (can be mixed with -W)"), diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c index d7f281c2828d..d4ff1b539cfd 100644 --- a/tools/perf/bench/sched-messaging.c +++ b/tools/perf/bench/sched-messaging.c @@ -33,7 +33,7 @@ #define DATASIZE 100 static bool use_pipes = false; -static unsigned int loops = 100; +static unsigned int nr_loops = 100; static bool thread_mode = false; static unsigned int num_groups = 10; @@ -79,7 +79,7 @@ static void ready(int ready_out, int wakefd) err(EXIT_FAILURE, "poll"); } -/* Sender sprays loops messages down each file descriptor */ +/* Sender sprays nr_loops messages down each file descriptor */ static void *sender(struct sender_context *ctx) { char data[DATASIZE]; @@ -88,7 +88,7 @@ static void *sender(struct sender_context *ctx) ready(ctx->ready_out, ctx->wakefd); /* Now pump to every receiver. */ - for (i = 0; i < loops; i++) { + for (i = 0; i < nr_loops; i++) { for (j = 0; j < ctx->num_fds; j++) { int ret, done = 0; @@ -213,7 +213,7 @@ static unsigned int group(pthread_t *pth, /* Create the pipe between client and server */ fdpair(fds); - ctx->num_packets = num_fds * loops; + ctx->num_packets = num_fds * nr_loops; ctx->in_fds[0] = fds[0]; ctx->in_fds[1] = fds[1]; ctx->ready_out = ready_out; @@ -250,7 +250,7 @@ static const struct option options[] = { OPT_BOOLEAN('t', "thread", &thread_mode, "Be multi thread instead of multi process"), OPT_UINTEGER('g', "group", &num_groups, "Specify number of groups"), - OPT_UINTEGER('l', "loop", &loops, "Specify number of loops"), + OPT_UINTEGER('l', "nr_loops", &nr_loops, "Specify the number of loops to run (default: 100)"), OPT_END() }; -- cgit v1.2.3 From 2f211c84ad40469c15226e899b720624fbd28e20 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 19 Oct 2015 10:04:29 +0200 Subject: perf bench mem: Rename 'routine' to 'function' So right now there's a somewhat inconsistent mess of the benchmarking code and options sometimes calling benchmarked functions 'functions', sometimes calling them 'routines'. Name them 'functions' consistently. Signed-off-by: Ingo Molnar Cc: David Ahern Cc: Hitoshi Mitake Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1445241870-24854-14-git-send-email-mingo@kernel.org [ Updated perf-bench man page, pointed out by David Ahern ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-bench.txt | 16 ++++----- tools/perf/bench/mem-functions.c | 60 ++++++++++++++++----------------- 2 files changed, 38 insertions(+), 38 deletions(-) diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt index ddfb3e10e88c..34750fc32714 100644 --- a/tools/perf/Documentation/perf-bench.txt +++ b/tools/perf/Documentation/perf-bench.txt @@ -143,10 +143,10 @@ Options of *memcpy* Specify size of memory to copy (default: 1MB). Available units are B, KB, MB, GB and TB (case insensitive). --r:: ---routine:: -Specify routine to copy (default: default). -Available routines are depend on the architecture. +-f:: +--function:: +Specify function to copy (default: default). +Available functions are depend on the architecture. On x86-64, x86-64-unrolled, x86-64-movsq and x86-64-movsb are supported. -l:: @@ -167,10 +167,10 @@ Options of *memset* Specify size of memory to set (default: 1MB). Available units are B, KB, MB, GB and TB (case insensitive). --r:: ---routine:: -Specify routine to set (default: default). -Available routines are depend on the architecture. +-f:: +--function:: +Specify function to set (default: default). +Available functions are depend on the architecture. On x86-64, x86-64-unrolled, x86-64-stosq and x86-64-stosb are supported. -l:: diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c index c5dfabdbd8d0..d1de9c4a7ddf 100644 --- a/tools/perf/bench/mem-functions.c +++ b/tools/perf/bench/mem-functions.c @@ -24,7 +24,7 @@ #define K 1024 static const char *size_str = "1MB"; -static const char *routine_str = "all"; +static const char *function_str = "all"; static int nr_loops = 1; static bool use_cycles; static int cycles_fd; @@ -34,8 +34,8 @@ static const struct option options[] = { "Specify the size of the memory buffers. " "Available units: B, KB, MB, GB and TB (case insensitive)"), - OPT_STRING('r', "routine", &routine_str, "all", - "Specify the routine to run, \"all\" runs all available routines, \"help\" lists them"), + OPT_STRING('f', "function", &function_str, "all", + "Specify the function to run, \"all\" runs all available functions, \"help\" lists them"), OPT_INTEGER('l', "nr_loops", &nr_loops, "Specify the number of loops to run. (default: 1)"), @@ -49,7 +49,7 @@ static const struct option options[] = { typedef void *(*memcpy_t)(void *, const void *, size_t); typedef void *(*memset_t)(void *, int, size_t); -struct routine { +struct function { const char *name; const char *desc; union { @@ -101,19 +101,19 @@ static double timeval2double(struct timeval *ts) } while (0) struct bench_mem_info { - const struct routine *routines; - u64 (*do_cycles)(const struct routine *r, size_t size); - double (*do_gettimeofday)(const struct routine *r, size_t size); + const struct function *functions; + u64 (*do_cycles)(const struct function *r, size_t size); + double (*do_gettimeofday)(const struct function *r, size_t size); const char *const *usage; }; -static void __bench_mem_routine(struct bench_mem_info *info, int r_idx, size_t size, double size_total) +static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t size, double size_total) { - const struct routine *r = &info->routines[r_idx]; + const struct function *r = &info->functions[r_idx]; double result_bps = 0.0; u64 result_cycles = 0; - printf("# Routine '%s' (%s)\n", r->name, r->desc); + printf("# function '%s' (%s)\n", r->name, r->desc); if (bench_format == BENCH_FORMAT_DEFAULT) printf("# Copying %s bytes ...\n\n", size_str); @@ -166,28 +166,28 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info * return 1; } - if (!strncmp(routine_str, "all", 3)) { - for (i = 0; info->routines[i].name; i++) - __bench_mem_routine(info, i, size, size_total); + if (!strncmp(function_str, "all", 3)) { + for (i = 0; info->functions[i].name; i++) + __bench_mem_function(info, i, size, size_total); return 0; } - for (i = 0; info->routines[i].name; i++) { - if (!strcmp(info->routines[i].name, routine_str)) + for (i = 0; info->functions[i].name; i++) { + if (!strcmp(info->functions[i].name, function_str)) break; } - if (!info->routines[i].name) { - if (strcmp(routine_str, "help") && strcmp(routine_str, "h")) - printf("Unknown routine: %s\n", routine_str); - printf("Available routines:\n"); - for (i = 0; info->routines[i].name; i++) { + if (!info->functions[i].name) { + if (strcmp(function_str, "help") && strcmp(function_str, "h")) + printf("Unknown function: %s\n", function_str); + printf("Available functions:\n"); + for (i = 0; info->functions[i].name; i++) { printf("\t%s ... %s\n", - info->routines[i].name, info->routines[i].desc); + info->functions[i].name, info->functions[i].desc); } return 1; } - __bench_mem_routine(info, i, size, size_total); + __bench_mem_function(info, i, size, size_total); return 0; } @@ -206,7 +206,7 @@ static void memcpy_alloc_mem(void **dst, void **src, size_t size) memset(*src, 0, size); } -static u64 do_memcpy_cycles(const struct routine *r, size_t size) +static u64 do_memcpy_cycles(const struct function *r, size_t size) { u64 cycle_start = 0ULL, cycle_end = 0ULL; void *src = NULL, *dst = NULL; @@ -231,7 +231,7 @@ static u64 do_memcpy_cycles(const struct routine *r, size_t size) return cycle_end - cycle_start; } -static double do_memcpy_gettimeofday(const struct routine *r, size_t size) +static double do_memcpy_gettimeofday(const struct function *r, size_t size) { struct timeval tv_start, tv_end, tv_diff; memcpy_t fn = r->fn.memcpy; @@ -259,7 +259,7 @@ static double do_memcpy_gettimeofday(const struct routine *r, size_t size) return (double)(((double)size * nr_loops) / timeval2double(&tv_diff)); } -struct routine memcpy_routines[] = { +struct function memcpy_functions[] = { { .name = "default", .desc = "Default memcpy() provided by glibc", .fn.memcpy = memcpy }, @@ -281,7 +281,7 @@ static const char * const bench_mem_memcpy_usage[] = { int bench_mem_memcpy(int argc, const char **argv, const char *prefix __maybe_unused) { struct bench_mem_info info = { - .routines = memcpy_routines, + .functions = memcpy_functions, .do_cycles = do_memcpy_cycles, .do_gettimeofday = do_memcpy_gettimeofday, .usage = bench_mem_memcpy_usage, @@ -297,7 +297,7 @@ static void memset_alloc_mem(void **dst, size_t size) die("memory allocation failed - maybe size is too large?\n"); } -static u64 do_memset_cycles(const struct routine *r, size_t size) +static u64 do_memset_cycles(const struct function *r, size_t size) { u64 cycle_start = 0ULL, cycle_end = 0ULL; memset_t fn = r->fn.memset; @@ -321,7 +321,7 @@ static u64 do_memset_cycles(const struct routine *r, size_t size) return cycle_end - cycle_start; } -static double do_memset_gettimeofday(const struct routine *r, size_t size) +static double do_memset_gettimeofday(const struct function *r, size_t size) { struct timeval tv_start, tv_end, tv_diff; memset_t fn = r->fn.memset; @@ -352,7 +352,7 @@ static const char * const bench_mem_memset_usage[] = { NULL }; -static const struct routine memset_routines[] = { +static const struct function memset_functions[] = { { .name = "default", .desc = "Default memset() provided by glibc", .fn.memset = memset }, @@ -369,7 +369,7 @@ static const struct routine memset_routines[] = { int bench_mem_memset(int argc, const char **argv, const char *prefix __maybe_unused) { struct bench_mem_info info = { - .routines = memset_routines, + .functions = memset_functions, .do_cycles = do_memset_cycles, .do_gettimeofday = do_memset_gettimeofday, .usage = bench_mem_memset_usage, -- cgit v1.2.3 From aa254af25c40d6d1cdc3f354db29eaf3e85a5ede Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 19 Oct 2015 10:04:30 +0200 Subject: perf bench: Run benchmarks, don't test them So right now we output this text: memcpy: Benchmark for memcpy() functions memset: Benchmark for memset() functions all: Test all memory access benchmarks But the right verb to use with benchmarks is to 'run' them, not 'test' them. So change this (and all similar texts) to: memcpy: Benchmark for memcpy() functions memset: Benchmark for memset() functions all: Run all memory access benchmarks Signed-off-by: Ingo Molnar Cc: David Ahern Cc: Hitoshi Mitake Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1445241870-24854-15-git-send-email-mingo@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-bench.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index a8fc948c8ace..b17aed36ca16 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -36,7 +36,7 @@ struct bench { #ifdef HAVE_LIBNUMA_SUPPORT static struct bench numa_benchmarks[] = { { "mem", "Benchmark for NUMA workloads", bench_numa }, - { "all", "Test all NUMA benchmarks", NULL }, + { "all", "Run all NUMA benchmarks", NULL }, { NULL, NULL, NULL } }; #endif @@ -44,14 +44,14 @@ static struct bench numa_benchmarks[] = { static struct bench sched_benchmarks[] = { { "messaging", "Benchmark for scheduling and IPC", bench_sched_messaging }, { "pipe", "Benchmark for pipe() between two processes", bench_sched_pipe }, - { "all", "Test all scheduler benchmarks", NULL }, + { "all", "Run all scheduler benchmarks", NULL }, { NULL, NULL, NULL } }; static struct bench mem_benchmarks[] = { { "memcpy", "Benchmark for memcpy() functions", bench_mem_memcpy }, { "memset", "Benchmark for memset() functions", bench_mem_memset }, - { "all", "Test all memory access benchmarks", NULL }, + { "all", "Run all memory access benchmarks", NULL }, { NULL, NULL, NULL } }; @@ -62,7 +62,7 @@ static struct bench futex_benchmarks[] = { { "requeue", "Benchmark for futex requeue calls", bench_futex_requeue }, /* pi-futexes */ { "lock-pi", "Benchmark for futex lock_pi calls", bench_futex_lock_pi }, - { "all", "Test all futex benchmarks", NULL }, + { "all", "Run all futex benchmarks", NULL }, { NULL, NULL, NULL } }; -- cgit v1.2.3 From 3a134ae96ca0af06804d343019b85026486e6fe1 Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Thu, 15 Oct 2015 15:39:51 +0800 Subject: perf help: Change 'usage' to 'Usage' for consistency Capitalize 'usage' to make it consistent with all the other 'Usage' in the codes, e.g., usage_builtin. Signed-off-by: Yunlong Song Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ramkumar Ramachandra Cc: Sriram Raghunathan Cc: Wang Nan Link: http://lkml.kernel.org/r/1444894792-2338-3-git-send-email-yunlong.song@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-help.c | 2 +- tools/perf/util/parse-options.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index 36486eade1ef..a7d588bf3cdd 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -463,7 +463,7 @@ int cmd_help(int argc, const char **argv, const char *prefix __maybe_unused) builtin_help_subcommands, builtin_help_usage, 0); if (show_all) { - printf("\n usage: %s\n\n", perf_usage_string); + printf("\n Usage: %s\n\n", perf_usage_string); list_commands("perf commands", &main_cmds, &other_cmds); printf(" %s\n\n", perf_more_info_string); return 0; diff --git a/tools/perf/util/parse-options.c b/tools/perf/util/parse-options.c index 9a38b05f0273..8aa7922397a9 100644 --- a/tools/perf/util/parse-options.c +++ b/tools/perf/util/parse-options.c @@ -648,7 +648,7 @@ int usage_with_options_internal(const char * const *usagestr, if (!usagestr) return PARSE_OPT_HELP; - fprintf(stderr, "\n usage: %s\n", *usagestr++); + fprintf(stderr, "\n Usage: %s\n", *usagestr++); while (*usagestr && **usagestr) fprintf(stderr, " or: %s\n", *usagestr++); while (*usagestr) { @@ -684,7 +684,7 @@ int parse_options_usage(const char * const *usagestr, if (!usagestr) goto opt; - fprintf(stderr, "\n usage: %s\n", *usagestr++); + fprintf(stderr, "\n Usage: %s\n", *usagestr++); while (*usagestr && **usagestr) fprintf(stderr, " or: %s\n", *usagestr++); while (*usagestr) { -- cgit v1.2.3 From 581cc8a2a2a00afc864840720186b0f6a38079d9 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 16 Oct 2015 12:41:03 +0200 Subject: perf stat: Rename perf_stat struct into perf_stat_evsel It's used as the perf_evsel::priv data, so the name suits better. Also we'll need the perf_stat name free for more generic struct. Signed-off-by: Jiri Olsa Tested-by: Kan Liang Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1444992092-17897-29-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 4 ++-- tools/perf/util/stat.c | 10 +++++----- tools/perf/util/stat.h | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 5ef88f760b12..184057295970 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -434,7 +434,7 @@ static void print_noise_pct(double total, double avg) static void print_noise(struct perf_evsel *evsel, double avg) { - struct perf_stat *ps; + struct perf_stat_evsel *ps; if (run_count == 1) return; @@ -671,7 +671,7 @@ static void print_aggr_thread(struct perf_evsel *counter, char *prefix) static void print_counter_aggr(struct perf_evsel *counter, char *prefix) { FILE *output = stat_config.output; - struct perf_stat *ps = counter->priv; + struct perf_stat_evsel *ps = counter->priv; double avg = avg_stats(&ps->res_stats[0]); int scaled = counter->counts->scaled; double uval; diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 2d065d065b67..93e6d697e574 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -67,7 +67,7 @@ double rel_stddev_stats(double stddev, double avg) bool __perf_evsel_stat__is(struct perf_evsel *evsel, enum perf_stat_evsel_id id) { - struct perf_stat *ps = evsel->priv; + struct perf_stat_evsel *ps = evsel->priv; return ps->id == id; } @@ -84,7 +84,7 @@ static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = { void perf_stat_evsel_id_init(struct perf_evsel *evsel) { - struct perf_stat *ps = evsel->priv; + struct perf_stat_evsel *ps = evsel->priv; int i; /* ps->id is 0 hence PERF_STAT_EVSEL_ID__NONE by default */ @@ -100,7 +100,7 @@ void perf_stat_evsel_id_init(struct perf_evsel *evsel) void perf_evsel__reset_stat_priv(struct perf_evsel *evsel) { int i; - struct perf_stat *ps = evsel->priv; + struct perf_stat_evsel *ps = evsel->priv; for (i = 0; i < 3; i++) init_stats(&ps->res_stats[i]); @@ -110,7 +110,7 @@ void perf_evsel__reset_stat_priv(struct perf_evsel *evsel) int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) { - evsel->priv = zalloc(sizeof(struct perf_stat)); + evsel->priv = zalloc(sizeof(struct perf_stat_evsel)); if (evsel->priv == NULL) return -ENOMEM; perf_evsel__reset_stat_priv(evsel); @@ -304,7 +304,7 @@ int perf_stat_process_counter(struct perf_stat_config *config, struct perf_evsel *counter) { struct perf_counts_values *aggr = &counter->counts->aggr; - struct perf_stat *ps = counter->priv; + struct perf_stat_evsel *ps = counter->priv; u64 *count = counter->counts->aggr.values; int i, ret; diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 62448c8175d3..f8d9d5ce24a9 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -20,7 +20,7 @@ enum perf_stat_evsel_id { PERF_STAT_EVSEL_ID__MAX, }; -struct perf_stat { +struct perf_stat_evsel { struct stats res_stats[3]; enum perf_stat_evsel_id id; }; -- cgit v1.2.3 From 208df99ed07ca5e86ee41617e0384930fc9ca819 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 16 Oct 2015 12:41:04 +0200 Subject: perf stat: Add AGGR_UNSET mode Adding AGGR_UNSET mode, so we could distinguish unset aggr_mode in following patches. Signed-off-by: Jiri Olsa Tested-by: Kan Liang Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1444992092-17897-30-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 5 +++++ tools/perf/util/stat.c | 1 + tools/perf/util/stat.h | 1 + 3 files changed, 7 insertions(+) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 184057295970..abeb15aebd12 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -479,6 +479,7 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr) csv_sep); break; case AGGR_GLOBAL: + case AGGR_UNSET: default: break; } @@ -799,6 +800,8 @@ static void print_interval(char *prefix, struct timespec *ts) case AGGR_GLOBAL: default: fprintf(output, "# time counts %*s events\n", unit_width, "unit"); + case AGGR_UNSET: + break; } } @@ -880,6 +883,7 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) evlist__for_each(evsel_list, counter) print_counter(counter, prefix); break; + case AGGR_UNSET: default: break; } @@ -960,6 +964,7 @@ static int perf_stat_init_aggr_mode(void) case AGGR_NONE: case AGGR_GLOBAL: case AGGR_THREAD: + case AGGR_UNSET: default: break; } diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 93e6d697e574..837374181ec7 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -272,6 +272,7 @@ process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel aggr->ena += count->ena; aggr->run += count->run; } + case AGGR_UNSET: default: break; } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index f8d9d5ce24a9..da1d11c4f8c1 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -31,6 +31,7 @@ enum aggr_mode { AGGR_SOCKET, AGGR_CORE, AGGR_THREAD, + AGGR_UNSET, }; struct perf_stat_config { -- cgit v1.2.3 From f1cbb8f35719e36803f226d1bbf08ac12cedcd76 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 16 Oct 2015 12:41:14 +0200 Subject: perf cpu_map: Make cpu_map__build_map global We'll need to call it from perf stat in the stat_script patchkit Signed-off-by: Jiri Olsa Tested-by: Kan Liang Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1444992092-17897-40-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cpumap.c | 4 ++-- tools/perf/util/cpumap.h | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index c51c29fd0732..70ec8d031f9d 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -258,8 +258,8 @@ static int cmp_ids(const void *a, const void *b) return *(int *)a - *(int *)b; } -static int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res, - int (*f)(struct cpu_map *map, int cpu)) +int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res, + int (*f)(struct cpu_map *map, int cpu)) { struct cpu_map *c; int nr = cpus->nr; diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 8982d538da83..6e36fc35eca6 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -87,4 +87,6 @@ static inline int cpu__get_node(int cpu) return cpunode_map[cpu]; } +int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res, + int (*f)(struct cpu_map *map, int cpu)); #endif /* __PERF_CPUMAP_H */ -- cgit v1.2.3 From 1fe7a30028eeccd92e6fccfbeb8c5c3811b11b64 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 16 Oct 2015 12:41:15 +0200 Subject: perf cpu_map: Add data arg to cpu_map__build_map callback Adding data arg to cpu_map__build_map callback, so we could pass data along to the callback. It'll be needed in following patches to retrieve topology info from perf.data. Signed-off-by: Jiri Olsa Tested-by: Kan Liang Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1444992092-17897-41-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 14 ++++++++++++-- tools/perf/tests/topology.c | 4 ++-- tools/perf/util/cpumap.c | 15 ++++++++------- tools/perf/util/cpumap.h | 7 ++++--- tools/perf/util/stat.c | 2 +- 5 files changed, 27 insertions(+), 15 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index abeb15aebd12..91e793a76929 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -944,6 +944,16 @@ static int stat__set_big_num(const struct option *opt __maybe_unused, return 0; } +static int perf_stat__get_socket(struct cpu_map *map, int cpu) +{ + return cpu_map__get_socket(map, cpu, NULL); +} + +static int perf_stat__get_core(struct cpu_map *map, int cpu) +{ + return cpu_map__get_core(map, cpu, NULL); +} + static int perf_stat_init_aggr_mode(void) { switch (stat_config.aggr_mode) { @@ -952,14 +962,14 @@ static int perf_stat_init_aggr_mode(void) perror("cannot build socket map"); return -1; } - aggr_get_id = cpu_map__get_socket; + aggr_get_id = perf_stat__get_socket; break; case AGGR_CORE: if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) { perror("cannot build core map"); return -1; } - aggr_get_id = cpu_map__get_core; + aggr_get_id = perf_stat__get_core; break; case AGGR_NONE: case AGGR_GLOBAL: diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index c3aff53a976a..f5bb096c3bd9 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -73,10 +73,10 @@ static int check_cpu_topology(char *path, struct cpu_map *map) for (i = 0; i < map->nr; i++) { TEST_ASSERT_VAL("Core ID doesn't match", - (session->header.env.cpu[map->map[i]].core_id == (cpu_map__get_core(map, i) & 0xffff))); + (session->header.env.cpu[map->map[i]].core_id == (cpu_map__get_core(map, i, NULL) & 0xffff))); TEST_ASSERT_VAL("Socket ID doesn't match", - (session->header.env.cpu[map->map[i]].socket_id == cpu_map__get_socket(map, i))); + (session->header.env.cpu[map->map[i]].socket_id == cpu_map__get_socket(map, i, NULL))); } perf_session__delete(session); diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 70ec8d031f9d..b36845347f01 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -241,7 +241,7 @@ int cpu_map__get_socket_id(int cpu) return ret ?: value; } -int cpu_map__get_socket(struct cpu_map *map, int idx) +int cpu_map__get_socket(struct cpu_map *map, int idx, void *data __maybe_unused) { int cpu; @@ -259,7 +259,8 @@ static int cmp_ids(const void *a, const void *b) } int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res, - int (*f)(struct cpu_map *map, int cpu)) + int (*f)(struct cpu_map *map, int cpu, void *data), + void *data) { struct cpu_map *c; int nr = cpus->nr; @@ -271,7 +272,7 @@ int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res, return -1; for (cpu = 0; cpu < nr; cpu++) { - s1 = f(cpus, cpu); + s1 = f(cpus, cpu, data); for (s2 = 0; s2 < c->nr; s2++) { if (s1 == c->map[s2]) break; @@ -295,7 +296,7 @@ int cpu_map__get_core_id(int cpu) return ret ?: value; } -int cpu_map__get_core(struct cpu_map *map, int idx) +int cpu_map__get_core(struct cpu_map *map, int idx, void *data) { int cpu, s; @@ -306,7 +307,7 @@ int cpu_map__get_core(struct cpu_map *map, int idx) cpu = cpu_map__get_core_id(cpu); - s = cpu_map__get_socket(map, idx); + s = cpu_map__get_socket(map, idx, data); if (s == -1) return -1; @@ -321,12 +322,12 @@ int cpu_map__get_core(struct cpu_map *map, int idx) int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp) { - return cpu_map__build_map(cpus, sockp, cpu_map__get_socket); + return cpu_map__build_map(cpus, sockp, cpu_map__get_socket, NULL); } int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep) { - return cpu_map__build_map(cpus, corep, cpu_map__get_core); + return cpu_map__build_map(cpus, corep, cpu_map__get_core, NULL); } /* setup simple routines to easily access node numbers given a cpu number */ diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 6e36fc35eca6..f1bcd2cfa164 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -19,9 +19,9 @@ struct cpu_map *cpu_map__dummy_new(void); struct cpu_map *cpu_map__read(FILE *file); size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp); int cpu_map__get_socket_id(int cpu); -int cpu_map__get_socket(struct cpu_map *map, int idx); +int cpu_map__get_socket(struct cpu_map *map, int idx, void *data); int cpu_map__get_core_id(int cpu); -int cpu_map__get_core(struct cpu_map *map, int idx); +int cpu_map__get_core(struct cpu_map *map, int idx, void *data); int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp); int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep); @@ -88,5 +88,6 @@ static inline int cpu__get_node(int cpu) } int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res, - int (*f)(struct cpu_map *map, int cpu)); + int (*f)(struct cpu_map *map, int cpu, void *data), + void *data); #endif /* __PERF_CPUMAP_H */ diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 837374181ec7..2d9d8306dbd3 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -230,7 +230,7 @@ static int check_per_pkg(struct perf_evsel *counter, if (!(vals->run && vals->ena)) return 0; - s = cpu_map__get_socket(cpus, cpu); + s = cpu_map__get_socket(cpus, cpu, NULL); if (s < 0) return -1; -- cgit v1.2.3 From d2b5a315ae84d235f00761468885c466f81d7805 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 16 Oct 2015 12:41:25 +0200 Subject: perf script: Check output fields only for samples There's no need to check sampling output fields for events without perf_event_attr::sample_type field set. Signed-off-by: Jiri Olsa Tested-by: Kan Liang Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1444992092-17897-51-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 8ce1c6bbfa45..2653c0273b89 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -686,7 +686,10 @@ static int process_attr(struct perf_tool *tool, union perf_event *event, set_print_ip_opts(&evsel->attr); - return perf_evsel__check_attr(evsel, scr->session); + if (evsel->attr.sample_type) + err = perf_evsel__check_attr(evsel, scr->session); + + return err; } static int process_comm_event(struct perf_tool *tool, -- cgit v1.2.3 From a4c6a3e8bbb675a601f529881c51ff226f83c3f1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 Oct 2015 18:17:25 -0300 Subject: perf bench: Use named initializers in the trailer too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To avoid this splat with gcc 4.4.7: cc1: warnings being treated as errors bench/mem-functions.c:273: error: missing initializer bench/mem-functions.c:273: error: (near initialization for ‘memcpy_functions[4].desc’) bench/mem-functions.c:366: error: missing initializer bench/mem-functions.c:366: error: (near initialization for ‘memset_functions[4].desc’) Cc: David Ahern Cc: Hitoshi Mitake Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/n/tip-0s8o6tgw1pdwvdv02llb9tkd@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/mem-functions.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c index d1de9c4a7ddf..9419b944220f 100644 --- a/tools/perf/bench/mem-functions.c +++ b/tools/perf/bench/mem-functions.c @@ -270,7 +270,7 @@ struct function memcpy_functions[] = { # undef MEMCPY_FN #endif - { NULL, } + { .name = NULL, } }; static const char * const bench_mem_memcpy_usage[] = { @@ -363,7 +363,7 @@ static const struct function memset_functions[] = { # undef MEMSET_FN #endif - { NULL, } + { .name = NULL, } }; int bench_mem_memset(int argc, const char **argv, const char *prefix __maybe_unused) -- cgit v1.2.3