diff options
author | Ingo Molnar <mingo@kernel.org> | 2015-03-04 06:33:49 +0100 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2015-03-04 06:36:15 +0100 |
commit | f8e92fb4b0ffc4d62279ab39f34e798e37e90b0b (patch) | |
tree | 9caa8df664792e64ddcb4ea03fd418a8a529c82e /tools/perf/bench | |
parent | d2c032e3dc58137a7261a7824d3acce435db1d66 (diff) | |
parent | dfecb95cdfeaf7872d83a96bec3a606e9cd95c8d (diff) | |
download | linux-f8e92fb4b0ffc4d62279ab39f34e798e37e90b0b.tar.bz2 |
Merge tag 'alternatives_padding' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp into x86/asm
Pull alternative instructions framework improvements from Borislav Petkov:
"A more involved rework of the alternatives framework to be able to
pad instructions and thus make using the alternatives macros more
straightforward and without having to figure out old and new instruction
sizes but have the toolchain figure that out for us.
Furthermore, it optimizes JMPs used so that fetch and decode can be
relieved with smaller versions of the JMPs, where possible.
Some stats:
x86_64 defconfig:
Alternatives sites total: 2478
Total padding added (in Bytes): 6051
The padding is currently done for:
X86_FEATURE_ALWAYS
X86_FEATURE_ERMS
X86_FEATURE_LFENCE_RDTSC
X86_FEATURE_MFENCE_RDTSC
X86_FEATURE_SMAP
This is with the latest version of the patchset. Of course, on each
machine the alternatives sites actually being patched are a proper
subset of the total number."
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'tools/perf/bench')
-rw-r--r-- | tools/perf/bench/mem-memcpy-x86-64-asm-def.h | 6 | ||||
-rw-r--r-- | tools/perf/bench/mem-memcpy-x86-64-asm.S | 2 | ||||
-rw-r--r-- | tools/perf/bench/mem-memcpy.c | 128 | ||||
-rw-r--r-- | tools/perf/bench/mem-memset-x86-64-asm-def.h | 6 | ||||
-rw-r--r-- | tools/perf/bench/mem-memset-x86-64-asm.S | 2 |
5 files changed, 72 insertions, 72 deletions
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h index d66ab799b35f..8c0c1a2770c8 100644 --- a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h +++ b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h @@ -1,12 +1,12 @@ -MEMCPY_FN(__memcpy, +MEMCPY_FN(memcpy_orig, "x86-64-unrolled", "unrolled memcpy() in arch/x86/lib/memcpy_64.S") -MEMCPY_FN(memcpy_c, +MEMCPY_FN(__memcpy, "x86-64-movsq", "movsq-based memcpy() in arch/x86/lib/memcpy_64.S") -MEMCPY_FN(memcpy_c_e, +MEMCPY_FN(memcpy_erms, "x86-64-movsb", "movsb-based memcpy() in arch/x86/lib/memcpy_64.S") diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S index fcd9cf00600a..e4c2c30143b9 100644 --- a/tools/perf/bench/mem-memcpy-x86-64-asm.S +++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S @@ -1,8 +1,6 @@ #define memcpy MEMCPY /* don't hide glibc's memcpy() */ #define altinstr_replacement text #define globl p2align 4; .globl -#define Lmemcpy_c globl memcpy_c; memcpy_c -#define Lmemcpy_c_e globl memcpy_c_e; memcpy_c_e #include "../../../arch/x86/lib/memcpy_64.S" /* * We need to provide note.GNU-stack section, saying that we want diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c index db1d3a29d97f..d3dfb7936dcd 100644 --- a/tools/perf/bench/mem-memcpy.c +++ b/tools/perf/bench/mem-memcpy.c @@ -36,7 +36,7 @@ static const struct option options[] = { "Specify length of memory to copy. " "Available units: B, KB, MB, GB and TB (upper and lower)"), OPT_STRING('r', "routine", &routine, "default", - "Specify routine to copy"), + "Specify routine to copy, \"all\" runs all available routines"), OPT_INTEGER('i', "iterations", &iterations, "repeat memcpy() invocation this number of times"), OPT_BOOLEAN('c', "cycle", &use_cycle, @@ -135,55 +135,16 @@ struct bench_mem_info { const char *const *usage; }; -static int bench_mem_common(int argc, const char **argv, - const char *prefix __maybe_unused, - struct bench_mem_info *info) +static void __bench_mem_routine(struct bench_mem_info *info, int r_idx, size_t len, double totallen) { - int i; - size_t len; - double totallen; + const struct routine *r = &info->routines[r_idx]; double result_bps[2]; u64 result_cycle[2]; - argc = parse_options(argc, argv, options, - info->usage, 0); - - if (no_prefault && only_prefault) { - fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n"); - return 1; - } - - if (use_cycle) - init_cycle(); - - len = (size_t)perf_atoll((char *)length_str); - totallen = (double)len * iterations; - result_cycle[0] = result_cycle[1] = 0ULL; result_bps[0] = result_bps[1] = 0.0; - if ((s64)len <= 0) { - fprintf(stderr, "Invalid length:%s\n", length_str); - return 1; - } - - /* same to without specifying either of prefault and no-prefault */ - if (only_prefault && no_prefault) - only_prefault = no_prefault = false; - - for (i = 0; info->routines[i].name; i++) { - if (!strcmp(info->routines[i].name, routine)) - break; - } - if (!info->routines[i].name) { - printf("Unknown routine:%s\n", routine); - printf("Available routines...\n"); - for (i = 0; info->routines[i].name; i++) { - printf("\t%s ... %s\n", - info->routines[i].name, info->routines[i].desc); - } - return 1; - } + printf("Routine %s (%s)\n", r->name, r->desc); if (bench_format == BENCH_FORMAT_DEFAULT) printf("# Copying %s Bytes ...\n\n", length_str); @@ -191,28 +152,17 @@ static int bench_mem_common(int argc, const char **argv, if (!only_prefault && !no_prefault) { /* show both of results */ if (use_cycle) { - result_cycle[0] = - info->do_cycle(&info->routines[i], len, false); - result_cycle[1] = - info->do_cycle(&info->routines[i], len, true); + result_cycle[0] = info->do_cycle(r, len, false); + result_cycle[1] = info->do_cycle(r, len, true); } else { - result_bps[0] = - info->do_gettimeofday(&info->routines[i], - len, false); - result_bps[1] = - info->do_gettimeofday(&info->routines[i], - len, true); + result_bps[0] = info->do_gettimeofday(r, len, false); + result_bps[1] = info->do_gettimeofday(r, len, true); } } else { - if (use_cycle) { - result_cycle[pf] = - info->do_cycle(&info->routines[i], - len, only_prefault); - } else { - result_bps[pf] = - info->do_gettimeofday(&info->routines[i], - len, only_prefault); - } + if (use_cycle) + result_cycle[pf] = info->do_cycle(r, len, only_prefault); + else + result_bps[pf] = info->do_gettimeofday(r, len, only_prefault); } switch (bench_format) { @@ -265,6 +215,60 @@ static int bench_mem_common(int argc, const char **argv, die("unknown format: %d\n", bench_format); break; } +} + +static int bench_mem_common(int argc, const char **argv, + const char *prefix __maybe_unused, + struct bench_mem_info *info) +{ + int i; + size_t len; + double totallen; + + argc = parse_options(argc, argv, options, + info->usage, 0); + + if (no_prefault && only_prefault) { + fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n"); + return 1; + } + + if (use_cycle) + init_cycle(); + + len = (size_t)perf_atoll((char *)length_str); + totallen = (double)len * iterations; + + if ((s64)len <= 0) { + fprintf(stderr, "Invalid length:%s\n", length_str); + return 1; + } + + /* same to without specifying either of prefault and no-prefault */ + if (only_prefault && no_prefault) + only_prefault = no_prefault = false; + + if (!strncmp(routine, "all", 3)) { + for (i = 0; info->routines[i].name; i++) + __bench_mem_routine(info, i, len, totallen); + return 0; + } + + for (i = 0; info->routines[i].name; i++) { + if (!strcmp(info->routines[i].name, routine)) + break; + } + if (!info->routines[i].name) { + printf("Unknown routine:%s\n", routine); + printf("Available routines...\n"); + for (i = 0; info->routines[i].name; i++) { + printf("\t%s ... %s\n", + info->routines[i].name, info->routines[i].desc); + } + return 1; + } + + __bench_mem_routine(info, i, len, totallen); return 0; } diff --git a/tools/perf/bench/mem-memset-x86-64-asm-def.h b/tools/perf/bench/mem-memset-x86-64-asm-def.h index a71dff97c1f5..f02d028771d9 100644 --- a/tools/perf/bench/mem-memset-x86-64-asm-def.h +++ b/tools/perf/bench/mem-memset-x86-64-asm-def.h @@ -1,12 +1,12 @@ -MEMSET_FN(__memset, +MEMSET_FN(memset_orig, "x86-64-unrolled", "unrolled memset() in arch/x86/lib/memset_64.S") -MEMSET_FN(memset_c, +MEMSET_FN(__memset, "x86-64-stosq", "movsq-based memset() in arch/x86/lib/memset_64.S") -MEMSET_FN(memset_c_e, +MEMSET_FN(memset_erms, "x86-64-stosb", "movsb-based memset() in arch/x86/lib/memset_64.S") diff --git a/tools/perf/bench/mem-memset-x86-64-asm.S b/tools/perf/bench/mem-memset-x86-64-asm.S index 9e5af89ed13a..de278784c866 100644 --- a/tools/perf/bench/mem-memset-x86-64-asm.S +++ b/tools/perf/bench/mem-memset-x86-64-asm.S @@ -1,8 +1,6 @@ #define memset MEMSET /* don't hide glibc's memset() */ #define altinstr_replacement text #define globl p2align 4; .globl -#define Lmemset_c globl memset_c; memset_c -#define Lmemset_c_e globl memset_c_e; memset_c_e #include "../../../arch/x86/lib/memset_64.S" /* |