diff options
author | Ingo Molnar <mingo@kernel.org> | 2016-10-24 20:42:42 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2016-10-24 20:42:42 +0200 |
commit | 76e2d2617d767c445498c4c4b1162eb2201cdd77 (patch) | |
tree | e03764dba70ea6993366e25d16e1735b2d40cd26 /tools/perf/bench | |
parent | e9c848928abf4cb60601e9ae7d336f0333c98bca (diff) | |
parent | 04b553ad7dc347eabd3cb4705932272453175a80 (diff) | |
download | linux-76e2d2617d767c445498c4c4b1162eb2201cdd77.tar.bz2 |
Merge tag 'perf-core-for-mingo-20161024' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:
New features:
- Dynamicly change verbosity level by pressing 'V' in the 'perf top/report'
hists TUI browser (Alexis Berlemont)
- Implement 'perf trace --delay' in the same fashion as in 'perf record --delay',
to skip sampling workload initialization events (Alexis Berlemont)
- Make vendor named events case insensitive in 'perf list', i.e.
'perf list LONGEST_LAT' works just the same as 'perf list longest_lat' (Andi Kleen)
- Show instruction bytes and lenght in 'perf script' for Intel PT and BTS (Andi Kleen, Adrian Hunter)
E.g:
% perf record -e intel_pt// foo
% perf script --itrace=i0ns -F ip,insn,insnlen
ffffffff8101232f ilen: 5 insn: 0f 1f 44 00 00
ffffffff81012334 ilen: 1 insn: 5b
ffffffff81012335 ilen: 1 insn: 5d
ffffffff81012336 ilen: 1 insn: c3
ffffffff810123e3 ilen: 1 insn: 5b
ffffffff810123e4 ilen: 2 insn: 41 5c
ffffffff810123e6 ilen: 1 insn: 5d
ffffffff810123e7 ilen: 1 insn: c3
ffffffff810124a6 ilen: 2 insn: 31 c0
ffffffff810124a8 ilen: 9 insn: 41 83 bc 24 a8 01 00 00 01
ffffffff810124b1 ilen: 2 insn: 75 87
- Allow enabling the perf_event_attr.branch_type attribute member: (Andi Kleen)
perf record -e sched:sched_switch,cpu/cpu-cycles,branch_type=any/ ...
- Add unwinding support for jitdump (Stefano Sanfilippo)
Fixes:
- Use raw_syscall:sys_enter timestamp in 'perf trace' (Arnaldo Carvalho de Melo)
Infrastructure:
- Allow jitdump to be built without libdwarf (Maciej Debski)
- Sync x86's syscall table tools/ copy (Arnaldo Carvalho de Melo)
- Fixes to avoid calling die() in library fuctions already propagating other
errors (Arnaldo Carvalho de Melo)
- Improvements to allow libtraceevent to be properly installed in distro
packages (Jiri Olsa)
- Removing coresight miscellaneous debug output (Mathieu Poirier)
- Cache align the 'perf bench futex' worker struct (Sebastian Andrzej Siewior)
Documentation:
- Minor improvements on the documentation of event parameters (Andi Kleen)
- Add jitdump format specification document (Stephane Eranian)
Spelling fixes:
- Fix typo "No enough" to "Not enough" (Alexander Alemayhu)
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'tools/perf/bench')
-rw-r--r-- | tools/perf/bench/futex-hash.c | 5 | ||||
-rw-r--r-- | tools/perf/bench/mem-functions.c | 77 |
2 files changed, 34 insertions, 48 deletions
diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index 8024cd5febd2..d9e5e80bb4d0 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -39,12 +39,15 @@ static unsigned int threads_starting; static struct stats throughput_stats; static pthread_cond_t thread_parent, thread_worker; +#define SMP_CACHE_BYTES 256 +#define __cacheline_aligned __attribute__ ((aligned (SMP_CACHE_BYTES))) + struct worker { int tid; u_int32_t *futex; pthread_t thread; unsigned long ops; -}; +} __cacheline_aligned; static const struct option options[] = { OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"), diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c index c684910e5a48..52504a83b5a1 100644 --- a/tools/perf/bench/mem-functions.c +++ b/tools/perf/bench/mem-functions.c @@ -106,9 +106,10 @@ static double timeval2double(struct timeval *ts) struct bench_mem_info { const struct function *functions; - u64 (*do_cycles)(const struct function *r, size_t size); - double (*do_gettimeofday)(const struct function *r, size_t size); + u64 (*do_cycles)(const struct function *r, size_t size, void *src, void *dst); + double (*do_gettimeofday)(const struct function *r, size_t size, void *src, void *dst); const char *const *usage; + bool alloc_src; }; static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t size, double size_total) @@ -116,16 +117,26 @@ static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t const struct function *r = &info->functions[r_idx]; double result_bps = 0.0; u64 result_cycles = 0; + void *src = NULL, *dst = zalloc(size); printf("# function '%s' (%s)\n", r->name, r->desc); + if (dst == NULL) + goto out_alloc_failed; + + if (info->alloc_src) { + src = zalloc(size); + if (src == NULL) + goto out_alloc_failed; + } + if (bench_format == BENCH_FORMAT_DEFAULT) printf("# Copying %s bytes ...\n\n", size_str); if (use_cycles) { - result_cycles = info->do_cycles(r, size); + result_cycles = info->do_cycles(r, size, src, dst); } else { - result_bps = info->do_gettimeofday(r, size); + result_bps = info->do_gettimeofday(r, size, src, dst); } switch (bench_format) { @@ -149,6 +160,14 @@ static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t BUG_ON(1); break; } + +out_free: + free(src); + free(dst); + return; +out_alloc_failed: + printf("# Memory allocation failed - maybe size (%s) is too large?\n", size_str); + goto out_free; } static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *info) @@ -201,28 +220,14 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info * return 0; } -static void memcpy_alloc_mem(void **dst, void **src, size_t size) -{ - *dst = zalloc(size); - if (!*dst) - die("memory allocation failed - maybe size is too large?\n"); - - *src = zalloc(size); - if (!*src) - die("memory allocation failed - maybe size is too large?\n"); - - /* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */ - memset(*src, 0, size); -} - -static u64 do_memcpy_cycles(const struct function *r, size_t size) +static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst) { u64 cycle_start = 0ULL, cycle_end = 0ULL; - void *src = NULL, *dst = NULL; memcpy_t fn = r->fn.memcpy; int i; - memcpy_alloc_mem(&dst, &src, size); + /* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */ + memset(src, 0, size); /* * We prefault the freshly allocated memory range here, @@ -235,20 +240,15 @@ static u64 do_memcpy_cycles(const struct function *r, size_t size) fn(dst, src, size); cycle_end = get_cycles(); - free(src); - free(dst); return cycle_end - cycle_start; } -static double do_memcpy_gettimeofday(const struct function *r, size_t size) +static double do_memcpy_gettimeofday(const struct function *r, size_t size, void *src, void *dst) { struct timeval tv_start, tv_end, tv_diff; memcpy_t fn = r->fn.memcpy; - void *src = NULL, *dst = NULL; int i; - memcpy_alloc_mem(&dst, &src, size); - /* * We prefault the freshly allocated memory range here, * to not measure page fault overhead: @@ -262,9 +262,6 @@ static double do_memcpy_gettimeofday(const struct function *r, size_t size) timersub(&tv_end, &tv_start, &tv_diff); - free(src); - free(dst); - return (double)(((double)size * nr_loops) / timeval2double(&tv_diff)); } @@ -294,27 +291,18 @@ int bench_mem_memcpy(int argc, const char **argv, const char *prefix __maybe_unu .do_cycles = do_memcpy_cycles, .do_gettimeofday = do_memcpy_gettimeofday, .usage = bench_mem_memcpy_usage, + .alloc_src = true, }; return bench_mem_common(argc, argv, &info); } -static void memset_alloc_mem(void **dst, size_t size) -{ - *dst = zalloc(size); - if (!*dst) - die("memory allocation failed - maybe size is too large?\n"); -} - -static u64 do_memset_cycles(const struct function *r, size_t size) +static u64 do_memset_cycles(const struct function *r, size_t size, void *src __maybe_unused, void *dst) { u64 cycle_start = 0ULL, cycle_end = 0ULL; memset_t fn = r->fn.memset; - void *dst = NULL; int i; - memset_alloc_mem(&dst, size); - /* * We prefault the freshly allocated memory range here, * to not measure page fault overhead: @@ -326,19 +314,15 @@ static u64 do_memset_cycles(const struct function *r, size_t size) fn(dst, i, size); cycle_end = get_cycles(); - free(dst); return cycle_end - cycle_start; } -static double do_memset_gettimeofday(const struct function *r, size_t size) +static double do_memset_gettimeofday(const struct function *r, size_t size, void *src __maybe_unused, void *dst) { struct timeval tv_start, tv_end, tv_diff; memset_t fn = r->fn.memset; - void *dst = NULL; int i; - memset_alloc_mem(&dst, size); - /* * We prefault the freshly allocated memory range here, * to not measure page fault overhead: @@ -352,7 +336,6 @@ static double do_memset_gettimeofday(const struct function *r, size_t size) timersub(&tv_end, &tv_start, &tv_diff); - free(dst); return (double)(((double)size * nr_loops) / timeval2double(&tv_diff)); } |