diff options
-rw-r--r-- | Makefile | 1 | ||||
-rw-r--r-- | arch/s390/include/asm/ftrace.h | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/alternative-asm.h | 9 | ||||
-rw-r--r-- | arch/x86/include/asm/cpufeature.h | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/ftrace.h | 7 | ||||
-rw-r--r-- | arch/x86/include/asm/setup.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/uaccess.h | 2 | ||||
-rw-r--r-- | arch/x86/kernel/alternative.c | 9 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/intel.c | 19 | ||||
-rw-r--r-- | arch/x86/lib/clear_page_64.S | 33 | ||||
-rw-r--r-- | arch/x86/lib/copy_user_64.S | 69 | ||||
-rw-r--r-- | arch/x86/lib/memcpy_64.S | 45 | ||||
-rw-r--r-- | arch/x86/lib/memmove_64.S | 29 | ||||
-rw-r--r-- | arch/x86/lib/memset_64.S | 54 | ||||
-rw-r--r-- | include/linux/init.h | 14 | ||||
-rw-r--r-- | scripts/Makefile.build | 12 | ||||
-rw-r--r-- | scripts/recordmcount.c | 168 | ||||
-rw-r--r-- | scripts/recordmcount.h | 174 | ||||
-rwxr-xr-x | scripts/recordmcount.pl | 5 | ||||
-rw-r--r-- | tools/perf/builtin-stat.c | 347 | ||||
-rw-r--r-- | tools/perf/util/include/asm/alternative-asm.h | 8 |
22 files changed, 796 insertions, 219 deletions
@@ -1268,6 +1268,7 @@ help: @echo ' make C=1 [targets] Check all c source with $$CHECK (sparse by default)' @echo ' make C=2 [targets] Force check of all c source with $$CHECK' @echo ' make W=1 [targets] Enable extra gcc checks' + @echo ' make RECORDMCOUNT_WARN=1 [targets] Warn about ignored mcount sections' @echo '' @echo 'Execute "make" or "make all" to build all targets marked with [*] ' @echo 'For further info see the ./README file' diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index 3c29be4836ed..b7931faaef6d 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -11,15 +11,13 @@ struct dyn_arch_ftrace { }; #ifdef CONFIG_64BIT #define MCOUNT_INSN_SIZE 12 -#define MCOUNT_OFFSET 8 #else #define MCOUNT_INSN_SIZE 20 -#define MCOUNT_OFFSET 4 #endif static inline unsigned long ftrace_call_adjust(unsigned long addr) { - return addr - MCOUNT_OFFSET; + return addr; } #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h index a63a68be1cce..94d420b360d1 100644 --- a/arch/x86/include/asm/alternative-asm.h +++ b/arch/x86/include/asm/alternative-asm.h @@ -15,4 +15,13 @@ .endm #endif +.macro altinstruction_entry orig alt feature orig_len alt_len + .align 8 + .quad \orig + .quad \alt + .word \feature + .byte \orig_len + .byte \alt_len +.endm + #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 91f3e087cf21..7f2f7b123293 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -195,6 +195,7 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ #define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ +#define X86_FEATURE_ERMS (9*32+ 9) /* Enhanced REP MOVSB/STOSB */ #if defined(__KERNEL__) && !defined(__ASSEMBLY__) diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index db24c2278be0..268c783ab1c0 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -38,11 +38,10 @@ extern void mcount(void); static inline unsigned long ftrace_call_adjust(unsigned long addr) { /* - * call mcount is "e8 <4 byte offset>" - * The addr points to the 4 byte offset and the caller of this - * function wants the pointer to e8. Simply subtract one. + * addr is the address of the mcount call instruction. + * recordmcount does the necessary offset calculation. */ - return addr - 1; + return addr; } #ifdef CONFIG_DYNAMIC_FTRACE diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index db8aa19a08a2..647d8a06ce4f 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -88,7 +88,7 @@ void *extend_brk(size_t size, size_t align); * executable.) */ #define RESERVE_BRK(name,sz) \ - static void __section(.discard.text) __used \ + static void __section(.discard.text) __used notrace \ __brk_reservation_fn_##name##__(void) { \ asm volatile ( \ ".pushsection .brk_reservation,\"aw\",@nobits;" \ diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index abd3e0ea762a..99f0ad753f32 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -42,7 +42,7 @@ * Returns 0 if the range is valid, nonzero otherwise. * * This is equivalent to the following test: - * (u33)addr + (u33)size >= (u33)current->addr_limit.seg (u65 for x86_64) + * (u33)addr + (u33)size > (u33)current->addr_limit.seg (u65 for x86_64) * * This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry... */ diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 651454b0c811..1eeeafcb4410 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -210,6 +210,15 @@ void __init_or_module apply_alternatives(struct alt_instr *start, u8 insnbuf[MAX_PATCH_LEN]; DPRINTK("%s: alt table %p -> %p\n", __func__, start, end); + /* + * The scan order should be from start to end. A later scanned + * alternative code can overwrite a previous scanned alternative code. + * Some kernel functions (e.g. memcpy, memset, etc) use this order to + * patch code. + * + * So be careful if you want to change the scan order to any other + * order. + */ for (a = start; a < end; a++) { u8 *instr = a->instr; BUG_ON(a->replacementlen > a->instrlen); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e2ced0074a45..173f3a3fa1a6 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -565,8 +565,7 @@ void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx); - if (eax > 0) - c->x86_capability[9] = ebx; + c->x86_capability[9] = ebx; } /* AMD-defined flags: level 0x80000001 */ diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index df86bc8c859d..fc73a34ba8c9 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -29,10 +29,10 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) { + u64 misc_enable; + /* Unmask CPUID levels if masked: */ if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) { - u64 misc_enable; - rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); if (misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID) { @@ -118,8 +118,6 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) * (model 2) with the same problem. */ if (c->x86 == 15) { - u64 misc_enable; - rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); if (misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING) { @@ -130,6 +128,19 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) } } #endif + + /* + * If fast string is not enabled in IA32_MISC_ENABLE for any reason, + * clear the fast string and enhanced fast string CPU capabilities. + */ + if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) { + rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); + if (!(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING)) { + printk(KERN_INFO "Disabled fast string operations\n"); + setup_clear_cpu_cap(X86_FEATURE_REP_GOOD); + setup_clear_cpu_cap(X86_FEATURE_ERMS); + } + } } #ifdef CONFIG_X86_32 diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S index aa4326bfb24a..f2145cfa12a6 100644 --- a/arch/x86/lib/clear_page_64.S +++ b/arch/x86/lib/clear_page_64.S @@ -1,5 +1,6 @@ #include <linux/linkage.h> #include <asm/dwarf2.h> +#include <asm/alternative-asm.h> /* * Zero a page. @@ -14,6 +15,15 @@ ENTRY(clear_page_c) CFI_ENDPROC ENDPROC(clear_page_c) +ENTRY(clear_page_c_e) + CFI_STARTPROC + movl $4096,%ecx + xorl %eax,%eax + rep stosb + ret + CFI_ENDPROC +ENDPROC(clear_page_c_e) + ENTRY(clear_page) CFI_STARTPROC xorl %eax,%eax @@ -38,21 +48,26 @@ ENTRY(clear_page) .Lclear_page_end: ENDPROC(clear_page) - /* Some CPUs run faster using the string instructions. - It is also a lot simpler. Use this when possible */ + /* + * Some CPUs support enhanced REP MOVSB/STOSB instructions. + * It is recommended to use this when possible. + * If enhanced REP MOVSB/STOSB is not available, try to use fast string. + * Otherwise, use original function. + * + */ #include <asm/cpufeature.h> .section .altinstr_replacement,"ax" 1: .byte 0xeb /* jmp <disp8> */ .byte (clear_page_c - clear_page) - (2f - 1b) /* offset */ -2: +2: .byte 0xeb /* jmp <disp8> */ + .byte (clear_page_c_e - clear_page) - (3f - 2b) /* offset */ +3: .previous .section .altinstructions,"a" - .align 8 - .quad clear_page - .quad 1b - .word X86_FEATURE_REP_GOOD - .byte .Lclear_page_end - clear_page - .byte 2b - 1b + altinstruction_entry clear_page,1b,X86_FEATURE_REP_GOOD,\ + .Lclear_page_end-clear_page, 2b-1b + altinstruction_entry clear_page,2b,X86_FEATURE_ERMS, \ + .Lclear_page_end-clear_page,3b-2b .previous diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 99e482615195..024840266ba0 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -15,23 +15,30 @@ #include <asm/asm-offsets.h> #include <asm/thread_info.h> #include <asm/cpufeature.h> +#include <asm/alternative-asm.h> - .macro ALTERNATIVE_JUMP feature,orig,alt +/* + * By placing feature2 after feature1 in altinstructions section, we logically + * implement: + * If CPU has feature2, jmp to alt2 is used + * else if CPU has feature1, jmp to alt1 is used + * else jmp to orig is used. + */ + .macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2 0: .byte 0xe9 /* 32bit jump */ .long \orig-1f /* by default jump to orig */ 1: .section .altinstr_replacement,"ax" 2: .byte 0xe9 /* near jump with 32bit immediate */ - .long \alt-1b /* offset */ /* or alternatively to alt */ + .long \alt1-1b /* offset */ /* or alternatively to alt1 */ +3: .byte 0xe9 /* near jump with 32bit immediate */ + .long \alt2-1b /* offset */ /* or alternatively to alt2 */ .previous + .section .altinstructions,"a" - .align 8 - .quad 0b - .quad 2b - .word \feature /* when feature is set */ - .byte 5 - .byte 5 + altinstruction_entry 0b,2b,\feature1,5,5 + altinstruction_entry 0b,3b,\feature2,5,5 .previous .endm @@ -72,8 +79,10 @@ ENTRY(_copy_to_user) addq %rdx,%rcx jc bad_to_user cmpq TI_addr_limit(%rax),%rcx - jae bad_to_user - ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string + ja bad_to_user + ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \ + copy_user_generic_unrolled,copy_user_generic_string, \ + copy_user_enhanced_fast_string CFI_ENDPROC ENDPROC(_copy_to_user) @@ -85,8 +94,10 @@ ENTRY(_copy_from_user) addq %rdx,%rcx jc bad_from_user cmpq TI_addr_limit(%rax),%rcx - jae bad_from_user - ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string + ja bad_from_user + ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \ + copy_user_generic_unrolled,copy_user_generic_string, \ + copy_user_enhanced_fast_string CFI_ENDPROC ENDPROC(_copy_from_user) @@ -255,3 +266,37 @@ ENTRY(copy_user_generic_string) .previous CFI_ENDPROC ENDPROC(copy_user_generic_string) + +/* + * Some CPUs are adding enhanced REP MOVSB/STOSB instructions. + * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled. + * + * Input: + * rdi destination + * rsi source + * rdx count + * + * Output: + * eax uncopied bytes or 0 if successful. + */ +ENTRY(copy_user_enhanced_fast_string) + CFI_STARTPROC + andl %edx,%edx + jz 2f + movl %edx,%ecx +1: rep + movsb +2: xorl %eax,%eax + ret + + .section .fixup,"ax" +12: movl %ecx,%edx /* ecx is zerorest also */ + jmp copy_user_handle_tail + .previous + + .section __ex_table,"a" + .align 8 + .quad 1b,12b + .previous + CFI_ENDPROC +ENDPROC(copy_user_enhanced_fast_string) diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 75ef61e35e38..daab21dae2d1 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -4,6 +4,7 @@ #include <asm/cpufeature.h> #include <asm/dwarf2.h> +#include <asm/alternative-asm.h> /* * memcpy - Copy a memory block. @@ -37,6 +38,23 @@ .Lmemcpy_e: .previous +/* + * memcpy_c_e() - enhanced fast string memcpy. This is faster and simpler than + * memcpy_c. Use memcpy_c_e when possible. + * + * This gets patched over the unrolled variant (below) via the + * alternative instructions framework: + */ + .section .altinstr_replacement, "ax", @progbits +.Lmemcpy_c_e: + movq %rdi, %rax + + movl %edx, %ecx + rep movsb + ret +.Lmemcpy_e_e: + .previous + ENTRY(__memcpy) ENTRY(memcpy) CFI_STARTPROC @@ -171,21 +189,22 @@ ENDPROC(memcpy) ENDPROC(__memcpy) /* - * Some CPUs run faster using the string copy instructions. - * It is also a lot simpler. Use this when possible: - */ - - .section .altinstructions, "a" - .align 8 - .quad memcpy - .quad .Lmemcpy_c - .word X86_FEATURE_REP_GOOD - - /* + * Some CPUs are adding enhanced REP MOVSB/STOSB feature + * If the feature is supported, memcpy_c_e() is the first choice. + * If enhanced rep movsb copy is not available, use fast string copy + * memcpy_c() when possible. This is faster and code is simpler than + * original memcpy(). + * Otherwise, original memcpy() is used. + * In .altinstructions section, ERMS feature is placed after REG_GOOD + * feature to implement the right patch order. + * * Replace only beginning, memcpy is used to apply alternatives, * so it is silly to overwrite itself with nops - reboot is the * only outcome... */ - .byte .Lmemcpy_e - .Lmemcpy_c - .byte .Lmemcpy_e - .Lmemcpy_c + .section .altinstructions, "a" + altinstruction_entry memcpy,.Lmemcpy_c,X86_FEATURE_REP_GOOD,\ + .Lmemcpy_e-.Lmemcpy_c,.Lmemcpy_e-.Lmemcpy_c + altinstruction_entry memcpy,.Lmemcpy_c_e,X86_FEATURE_ERMS, \ + .Lmemcpy_e_e-.Lmemcpy_c_e,.Lmemcpy_e_e-.Lmemcpy_c_e .previous diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index 0ecb8433e5a8..d0ec9c2936d7 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S @@ -8,6 +8,7 @@ #define _STRING_C #include <linux/linkage.h> #include <asm/dwarf2.h> +#include <asm/cpufeature.h> #undef memmove @@ -24,6 +25,7 @@ */ ENTRY(memmove) CFI_STARTPROC + /* Handle more 32bytes in loop */ mov %rdi, %rax cmp $0x20, %rdx @@ -31,8 +33,13 @@ ENTRY(memmove) /* Decide forward/backward copy mode */ cmp %rdi, %rsi - jb 2f + jge .Lmemmove_begin_forward + mov %rsi, %r8 + add %rdx, %r8 + cmp %rdi, %r8 + jg 2f +.Lmemmove_begin_forward: /* * movsq instruction have many startup latency * so we handle small size by general register. @@ -78,6 +85,8 @@ ENTRY(memmove) rep movsq movq %r11, (%r10) jmp 13f +.Lmemmove_end_forward: + /* * Handle data backward by movsq. */ @@ -194,4 +203,22 @@ ENTRY(memmove) 13: retq CFI_ENDPROC + + .section .altinstr_replacement,"ax" +.Lmemmove_begin_forward_efs: + /* Forward moving data. */ + movq %rdx, %rcx + rep movsb + retq +.Lmemmove_end_forward_efs: + .previous + + .section .altinstructions,"a" + .align 8 + .quad .Lmemmove_begin_forward + .quad .Lmemmove_begin_forward_efs + .word X86_FEATURE_ERMS + .byte .Lmemmove_end_forward-.Lmemmove_begin_forward + .byte .Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs + .previous ENDPROC(memmove) diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index 09d344269652..79bd454b78a3 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S @@ -2,9 +2,13 @@ #include <linux/linkage.h> #include <asm/dwarf2.h> +#include <asm/cpufeature.h> +#include <asm/alternative-asm.h> /* - * ISO C memset - set a memory block to a byte value. + * ISO C memset - set a memory block to a byte value. This function uses fast + * string to get better performance than the original function. The code is + * simpler and shorter than the orignal function as well. * * rdi destination * rsi value (char) @@ -31,6 +35,28 @@ .Lmemset_e: .previous +/* + * ISO C memset - set a memory block to a byte value. This function uses + * enhanced rep stosb to override the fast string function. + * The code is simpler and shorter than the fast string function as well. + * + * rdi destination + * rsi value (char) + * rdx count (bytes) + * + * rax original destination + */ + .section .altinstr_replacement, "ax", @progbits +.Lmemset_c_e: + movq %rdi,%r9 + movb %sil,%al + movl %edx,%ecx + rep stosb + movq %r9,%rax + ret +.Lmemset_e_e: + .previous + ENTRY(memset) ENTRY(__memset) CFI_STARTPROC @@ -112,16 +138,20 @@ ENTRY(__memset) ENDPROC(memset) ENDPROC(__memset) - /* Some CPUs run faster using the string instructions. - It is also a lot simpler. Use this when possible */ - -#include <asm/cpufeature.h> - + /* Some CPUs support enhanced REP MOVSB/STOSB feature. + * It is recommended to use this when possible. + * + * If enhanced REP MOVSB/STOSB feature is not available, use fast string + * instructions. + * + * Otherwise, use original memset function. + * + * In .altinstructions section, ERMS feature is placed after REG_GOOD + * feature to implement the right patch order. + */ .section .altinstructions,"a" - .align 8 - .quad memset - .quad .Lmemset_c - .word X86_FEATURE_REP_GOOD - .byte .Lfinal - memset - .byte .Lmemset_e - .Lmemset_c + altinstruction_entry memset,.Lmemset_c,X86_FEATURE_REP_GOOD,\ + .Lfinal-memset,.Lmemset_e-.Lmemset_c + altinstruction_entry memset,.Lmemset_c_e,X86_FEATURE_ERMS, \ + .Lfinal-memset,.Lmemset_e_e-.Lmemset_c_e .previous diff --git a/include/linux/init.h b/include/linux/init.h index 577671c55153..9146f39cdddf 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -79,29 +79,29 @@ #define __exitused __used #endif -#define __exit __section(.exit.text) __exitused __cold +#define __exit __section(.exit.text) __exitused __cold notrace /* Used for HOTPLUG */ -#define __devinit __section(.devinit.text) __cold +#define __devinit __section(.devinit.text) __cold notrace #define __devinitdata __section(.devinit.data) #define __devinitconst __section(.devinit.rodata) -#define __devexit __section(.devexit.text) __exitused __cold +#define __devexit __section(.devexit.text) __exitused __cold notrace #define __devexitdata __section(.devexit.data) #define __devexitconst __section(.devexit.rodata) /* Used for HOTPLUG_CPU */ -#define __cpuinit __section(.cpuinit.text) __cold +#define __cpuinit __section(.cpuinit.text) __cold notrace #define __cpuinitdata __section(.cpuinit.data) #define __cpuinitconst __section(.cpuinit.rodata) -#define __cpuexit __section(.cpuexit.text) __exitused __cold +#define __cpuexit __section(.cpuexit.text) __exitused __cold notrace #define __cpuexitdata __section(.cpuexit.data) #define __cpuexitconst __section(.cpuexit.rodata) /* Used for MEMORY_HOTPLUG */ -#define __meminit __section(.meminit.text) __cold +#define __meminit __section(.meminit.text) __cold notrace #define __meminitdata __section(.meminit.data) #define __meminitconst __section(.meminit.rodata) -#define __memexit __section(.memexit.text) __exitused __cold +#define __memexit __section(.memexit.text) __exitused __cold notrace #define __memexitdata __section(.memexit.data) #define __memexitconst __section(.memexit.rodata) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index d5f925abe4d2..6165622c3e29 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -244,14 +244,19 @@ endif ifdef CONFIG_FTRACE_MCOUNT_RECORD ifdef BUILD_C_RECORDMCOUNT +ifeq ("$(origin RECORDMCOUNT_WARN)", "command line") + RECORDMCOUNT_FLAGS = -w +endif # Due to recursion, we must skip empty.o. # The empty.o file is created in the make process in order to determine # the target endianness and word size. It is made before all other C # files, including recordmcount. sub_cmd_record_mcount = \ if [ $(@) != "scripts/mod/empty.o" ]; then \ - $(objtree)/scripts/recordmcount "$(@)"; \ + $(objtree)/scripts/recordmcount $(RECORDMCOUNT_FLAGS) "$(@)"; \ fi; +recordmcount_source := $(srctree)/scripts/recordmcount.c \ + $(srctree)/scripts/recordmcount.h else sub_cmd_record_mcount = set -e ; perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \ "$(if $(CONFIG_CPU_BIG_ENDIAN),big,little)" \ @@ -259,6 +264,7 @@ sub_cmd_record_mcount = set -e ; perl $(srctree)/scripts/recordmcount.pl "$(ARCH "$(OBJDUMP)" "$(OBJCOPY)" "$(CC) $(KBUILD_CFLAGS)" \ "$(LD)" "$(NM)" "$(RM)" "$(MV)" \ "$(if $(part-of-module),1,0)" "$(@)"; +recordmcount_source := $(srctree)/scripts/recordmcount.pl endif cmd_record_mcount = \ if [ "$(findstring -pg,$(_c_flags))" = "-pg" ]; then \ @@ -279,13 +285,13 @@ define rule_cc_o_c endef # Built-in and composite module parts -$(obj)/%.o: $(src)/%.c FORCE +$(obj)/%.o: $(src)/%.c $(recordmcount_source) FORCE $(call cmd,force_checksrc) $(call if_changed_rule,cc_o_c) # Single-part modules are special since we need to mark them in $(MODVERDIR) -$(single-used-m): $(obj)/%.o: $(src)/%.c FORCE +$(single-used-m): $(obj)/%.o: $(src)/%.c $(recordmcount_source) FORCE $(call cmd,force_checksrc) $(call if_changed_rule,cc_o_c) @{ echo $(@:.o=.ko); echo $@; } > $(MODVERDIR)/$(@F:.o=.mod) diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c index f9f6f52db772..ee52cb8e17ad 100644 --- a/scripts/recordmcount.c +++ b/scripts/recordmcount.c @@ -24,6 +24,7 @@ #include <sys/types.h> #include <sys/mman.h> #include <sys/stat.h> +#include <getopt.h> #include <elf.h> #include <fcntl.h> #include <setjmp.h> @@ -39,6 +40,7 @@ static char gpfx; /* prefix for global symbol name (sometimes '_') */ static struct stat sb; /* Remember .st_size, etc. */ static jmp_buf jmpenv; /* setjmp/longjmp per-file error escape */ static const char *altmcount; /* alternate mcount symbol name */ +static int warn_on_notrace_sect; /* warn when section has mcount not being recorded */ /* setjmp() return values */ enum { @@ -78,7 +80,7 @@ static off_t ulseek(int const fd, off_t const offset, int const whence) { off_t const w = lseek(fd, offset, whence); - if ((off_t)-1 == w) { + if (w == (off_t)-1) { perror("lseek"); fail_file(); } @@ -111,13 +113,41 @@ static void * umalloc(size_t size) { void *const addr = malloc(size); - if (0 == addr) { + if (addr == 0) { fprintf(stderr, "malloc failed: %zu bytes\n", size); fail_file(); } return addr; } +static unsigned char ideal_nop5_x86_64[5] = { 0x0f, 0x1f, 0x44, 0x00, 0x00 }; +static unsigned char ideal_nop5_x86_32[5] = { 0x3e, 0x8d, 0x74, 0x26, 0x00 }; +static unsigned char *ideal_nop; + +static char rel_type_nop; + +static int (*make_nop)(void *map, size_t const offset); + +static int make_nop_x86(void *map, size_t const offset) +{ + uint32_t *ptr; + unsigned char *op; + + /* Confirm we have 0xe8 0x0 0x0 0x0 0x0 */ + ptr = map + offset; + if (*ptr != 0) + return -1; + + op = map + offset - 1; + if (*op != 0xe8) + return -1; + + /* convert to nop */ + ulseek(fd_map, offset - 1, SEEK_SET); + uwrite(fd_map, ideal_nop, 5); + return 0; +} + /* * Get the whole file as a programming convenience in order to avoid * malloc+lseek+read+free of many pieces. If successful, then mmap @@ -136,7 +166,7 @@ static void *mmap_file(char const *fname) void *addr; fd_map = open(fname, O_RDWR); - if (0 > fd_map || 0 > fstat(fd_map, &sb)) { + if (fd_map < 0 || fstat(fd_map, &sb) < 0) { perror(fname); fail_file(); } @@ -147,7 +177,7 @@ static void *mmap_file(char const *fname) addr = mmap(0, sb.st_size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd_map, 0); mmap_failed = 0; - if (MAP_FAILED == addr) { + if (addr == MAP_FAILED) { mmap_failed = 1; addr = umalloc(sb.st_size); uread(fd_map, addr, sb.st_size); @@ -206,12 +236,13 @@ static uint32_t (*w2)(uint16_t); static int is_mcounted_section_name(char const *const txtname) { - return 0 == strcmp(".text", txtname) || - 0 == strcmp(".ref.text", txtname) || - 0 == strcmp(".sched.text", txtname) || - 0 == strcmp(".spinlock.text", txtname) || - 0 == strcmp(".irqentry.text", txtname) || - 0 == strcmp(".text.unlikely", txtname); + return strcmp(".text", txtname) == 0 || + strcmp(".ref.text", txtname) == 0 || + strcmp(".sched.text", txtname) == 0 || + strcmp(".spinlock.text", txtname) == 0 || + strcmp(".irqentry.text", txtname) == 0 || + strcmp(".kprobes.text", txtname) == 0 || + strcmp(".text.unlikely", txtname) == 0; } /* 32 bit and 64 bit are very similar */ @@ -264,43 +295,48 @@ do_file(char const *const fname) w8 = w8nat; switch (ehdr->e_ident[EI_DATA]) { static unsigned int const endian = 1; - default: { + default: fprintf(stderr, "unrecognized ELF data encoding %d: %s\n", ehdr->e_ident[EI_DATA], fname); fail_file(); - } break; - case ELFDATA2LSB: { - if (1 != *(unsigned char const *)&endian) { + break; + case ELFDATA2LSB: + if (*(unsigned char const *)&endian != 1) { /* main() is big endian, file.o is little endian. */ w = w4rev; w2 = w2rev; w8 = w8rev; } - } break; - case ELFDATA2MSB: { - if (0 != *(unsigned char const *)&endian) { + break; + case ELFDATA2MSB: + if (*(unsigned char const *)&endian != 0) { /* main() is little endian, file.o is big endian. */ w = w4rev; w2 = w2rev; w8 = w8rev; } - } break; + break; } /* end switch */ - if (0 != memcmp(ELFMAG, ehdr->e_ident, SELFMAG) - || ET_REL != w2(ehdr->e_type) - || EV_CURRENT != ehdr->e_ident[EI_VERSION]) { + if (memcmp(ELFMAG, ehdr->e_ident, SELFMAG) != 0 + || w2(ehdr->e_type) != ET_REL + || ehdr->e_ident[EI_VERSION] != EV_CURRENT) { fprintf(stderr, "unrecognized ET_REL file %s\n", fname); fail_file(); } gpfx = 0; switch (w2(ehdr->e_machine)) { - default: { + default: fprintf(stderr, "unrecognized e_machine %d %s\n", w2(ehdr->e_machine), fname); fail_file(); - } break; - case EM_386: reltype = R_386_32; break; + break; + case EM_386: + reltype = R_386_32; + make_nop = make_nop_x86; + ideal_nop = ideal_nop5_x86_32; + mcount_adjust_32 = -1; + break; case EM_ARM: reltype = R_ARM_ABS32; altmcount = "__gnu_mcount_nc"; break; @@ -311,67 +347,91 @@ do_file(char const *const fname) case EM_S390: /* reltype: e_class */ gpfx = '_'; break; case EM_SH: reltype = R_SH_DIR32; break; case EM_SPARCV9: reltype = R_SPARC_64; gpfx = '_'; break; - case EM_X86_64: reltype = R_X86_64_64; break; + case EM_X86_64: + make_nop = make_nop_x86; + ideal_nop = ideal_nop5_x86_64; + reltype = R_X86_64_64; + mcount_adjust_64 = -1; + break; } /* end switch */ switch (ehdr->e_ident[EI_CLASS]) { - default: { + default: fprintf(stderr, "unrecognized ELF class %d %s\n", ehdr->e_ident[EI_CLASS], fname); fail_file(); - } break; - case ELFCLASS32: { - if (sizeof(Elf32_Ehdr) != w2(ehdr->e_ehsize) - || sizeof(Elf32_Shdr) != w2(ehdr->e_shentsize)) { + break; + case ELFCLASS32: + if (w2(ehdr->e_ehsize) != sizeof(Elf32_Ehdr) + || w2(ehdr->e_shentsize) != sizeof(Elf32_Shdr)) { fprintf(stderr, "unrecognized ET_REL file: %s\n", fname); fail_file(); } - if (EM_S390 == w2(ehdr->e_machine)) + if (w2(ehdr->e_machine) == EM_S390) { reltype = R_390_32; - if (EM_MIPS == w2(ehdr->e_machine)) { + mcount_adjust_32 = -4; + } + if (w2(ehdr->e_machine) == EM_MIPS) { reltype = R_MIPS_32; is_fake_mcount32 = MIPS32_is_fake_mcount; } do32(ehdr, fname, reltype); - } break; + break; case ELFCLASS64: { Elf64_Ehdr *const ghdr = (Elf64_Ehdr *)ehdr; - if (sizeof(Elf64_Ehdr) != w2(ghdr->e_ehsize) - || sizeof(Elf64_Shdr) != w2(ghdr->e_shentsize)) { + if (w2(ghdr->e_ehsize) != sizeof(Elf64_Ehdr) + || w2(ghdr->e_shentsize) != sizeof(Elf64_Shdr)) { fprintf(stderr, "unrecognized ET_REL file: %s\n", fname); fail_file(); } - if (EM_S390 == w2(ghdr->e_machine)) + if (w2(ghdr->e_machine) == EM_S390) { reltype = R_390_64; - if (EM_MIPS == w2(ghdr->e_machine)) { + mcount_adjust_64 = -8; + } + if (w2(ghdr->e_machine) == EM_MIPS) { reltype = R_MIPS_64; Elf64_r_sym = MIPS64_r_sym; Elf64_r_info = MIPS64_r_info; is_fake_mcount64 = MIPS64_is_fake_mcount; } do64(ghdr, fname, reltype); - } break; + break; + } } /* end switch */ cleanup(); } int -main(int argc, char const *argv[]) +main(int argc, char *argv[]) { const char ftrace[] = "/ftrace.o"; int ftrace_size = sizeof(ftrace) - 1; int n_error = 0; /* gcc-4.3.0 false positive complaint */ + int c; + int i; + + while ((c = getopt(argc, argv, "w")) >= 0) { + switch (c) { + case 'w': + warn_on_notrace_sect = 1; + break; + default: + fprintf(stderr, "usage: recordmcount [-w] file.o...\n"); + return 0; + } + } - if (argc <= 1) { - fprintf(stderr, "usage: recordmcount file.o...\n"); + if ((argc - optind) < 1) { + fprintf(stderr, "usage: recordmcount [-w] file.o...\n"); return 0; } /* Process each file in turn, allowing deep failure. */ - for (--argc, ++argv; 0 < argc; --argc, ++argv) { + for (i = optind; i < argc; i++) { + char *file = argv[i]; int const sjval = setjmp(jmpenv); int len; @@ -380,29 +440,29 @@ main(int argc, char const *argv[]) * function but does not call it. Since ftrace.o should * not be traced anyway, we just skip it. */ - len = strlen(argv[0]); + len = strlen(file); if (len >= ftrace_size && - strcmp(argv[0] + (len - ftrace_size), ftrace) == 0) + strcmp(file + (len - ftrace_size), ftrace) == 0) continue; switch (sjval) { - default: { - fprintf(stderr, "internal error: %s\n", argv[0]); + default: + fprintf(stderr, "internal error: %s\n", file); exit(1); - } break; - case SJ_SETJMP: { /* normal sequence */ + break; + case SJ_SETJMP: /* normal sequence */ /* Avoid problems if early cleanup() */ fd_map = -1; ehdr_curr = NULL; mmap_failed = 1; - do_file(argv[0]); - } break; - case SJ_FAIL: { /* error in do_file or below */ + do_file(file); + break; + case SJ_FAIL: /* error in do_file or below */ ++n_error; - } break; - case SJ_SUCCEED: { /* premature success */ + break; + case SJ_SUCCEED: /* premature success */ /* do nothing */ - } break; + break; } /* end switch */ } return !!n_error; diff --git a/scripts/recordmcount.h b/scripts/recordmcount.h index baf187bee983..4be60364a405 100644 --- a/scripts/recordmcount.h +++ b/scripts/recordmcount.h @@ -22,11 +22,15 @@ #undef is_fake_mcount #undef fn_is_fake_mcount #undef MIPS_is_fake_mcount +#undef mcount_adjust #undef sift_rel_mcount +#undef nop_mcount #undef find_secsym_ndx #undef __has_rel_mcount #undef has_rel_mcount #undef tot_relsize +#undef get_mcountsym +#undef get_sym_str_and_relp #undef do_func #undef Elf_Addr #undef Elf_Ehdr @@ -49,14 +53,18 @@ #ifdef RECORD_MCOUNT_64 # define append_func append64 # define sift_rel_mcount sift64_rel_mcount +# define nop_mcount nop_mcount_64 # define find_secsym_ndx find64_secsym_ndx # define __has_rel_mcount __has64_rel_mcount # define has_rel_mcount has64_rel_mcount # define tot_relsize tot64_relsize +# define get_sym_str_and_relp get_sym_str_and_relp_64 # define do_func do64 +# define get_mcountsym get_mcountsym_64 # define is_fake_mcount is_fake_mcount64 # define fn_is_fake_mcount fn_is_fake_mcount64 # define MIPS_is_fake_mcount MIPS64_is_fake_mcount +# define mcount_adjust mcount_adjust_64 # define Elf_Addr Elf64_Addr # define Elf_Ehdr Elf64_Ehdr # define Elf_Shdr Elf64_Shdr @@ -77,14 +85,18 @@ #else # define append_func append32 # define sift_rel_mcount sift32_rel_mcount +# define nop_mcount nop_mcount_32 # define find_secsym_ndx find32_secsym_ndx # define __has_rel_mcount __has32_rel_mcount # define has_rel_mcount has32_rel_mcount # define tot_relsize tot32_relsize +# define get_sym_str_and_relp get_sym_str_and_relp_32 # define do_func do32 +# define get_mcountsym get_mcountsym_32 # define is_fake_mcount is_fake_mcount32 # define fn_is_fake_mcount fn_is_fake_mcount32 # define MIPS_is_fake_mcount MIPS32_is_fake_mcount +# define mcount_adjust mcount_adjust_32 # define Elf_Addr Elf32_Addr # define Elf_Ehdr Elf32_Ehdr # define Elf_Shdr Elf32_Shdr @@ -123,6 +135,8 @@ static void fn_ELF_R_INFO(Elf_Rel *const rp, unsigned sym, unsigned type) } static void (*Elf_r_info)(Elf_Rel *const rp, unsigned sym, unsigned type) = fn_ELF_R_INFO; +static int mcount_adjust = 0; + /* * MIPS mcount long call has 2 _mcount symbols, only the position of the 1st * _mcount symbol is needed for dynamic function tracer, with it, to disable @@ -234,6 +248,49 @@ static void append_func(Elf_Ehdr *const ehdr, uwrite(fd_map, ehdr, sizeof(*ehdr)); } +static unsigned get_mcountsym(Elf_Sym const *const sym0, + Elf_Rel const *relp, + char const *const str0) +{ + unsigned mcountsym = 0; + + Elf_Sym const *const symp = + &sym0[Elf_r_sym(relp)]; + char const *symname = &str0[w(symp->st_name)]; + char const *mcount = gpfx == '_' ? "_mcount" : "mcount"; + + if (symname[0] == '.') + ++symname; /* ppc64 hack */ + if (strcmp(mcount, symname) == 0 || + (altmcount && strcmp(altmcount, symname) == 0)) + mcountsym = Elf_r_sym(relp); + + return mcountsym; +} + +static void get_sym_str_and_relp(Elf_Shdr const *const relhdr, + Elf_Ehdr const *const ehdr, + Elf_Sym const **sym0, + char const **str0, + Elf_Rel const **relp) +{ + Elf_Shdr *const shdr0 = (Elf_Shdr *)(_w(ehdr->e_shoff) + + (void *)ehdr); + unsigned const symsec_sh_link = w(relhdr->sh_link); + Elf_Shdr const *const symsec = &shdr0[symsec_sh_link]; + Elf_Shdr const *const strsec = &shdr0[w(symsec->sh_link)]; + Elf_Rel const *const rel0 = (Elf_Rel const *)(_w(relhdr->sh_offset) + + (void *)ehdr); + + *sym0 = (Elf_Sym const *)(_w(symsec->sh_offset) + + (void *)ehdr); + + *str0 = (char const *)(_w(strsec->sh_offset) + + (void *)ehdr); + + *relp = rel0; +} + /* * Look at the relocations in order to find the calls to mcount. * Accumulate the section offsets that are found, and their relocation info, @@ -250,47 +307,27 @@ static uint_t *sift_rel_mcount(uint_t *mlocp, { uint_t *const mloc0 = mlocp; Elf_Rel *mrelp = *mrelpp; - Elf_Shdr *const shdr0 = (Elf_Shdr *)(_w(ehdr->e_shoff) - + (void *)ehdr); - unsigned const symsec_sh_link = w(relhdr->sh_link); - Elf_Shdr const *const symsec = &shdr0[symsec_sh_link]; - Elf_Sym const *const sym0 = (Elf_Sym const *)(_w(symsec->sh_offset) - + (void *)ehdr); - - Elf_Shdr const *const strsec = &shdr0[w(symsec->sh_link)]; - char const *const str0 = (char const *)(_w(strsec->sh_offset) - + (void *)ehdr); - - Elf_Rel const *const rel0 = (Elf_Rel const *)(_w(relhdr->sh_offset) - + (void *)ehdr); + Elf_Sym const *sym0; + char const *str0; + Elf_Rel const *relp; unsigned rel_entsize = _w(relhdr->sh_entsize); unsigned const nrel = _w(relhdr->sh_size) / rel_entsize; - Elf_Rel const *relp = rel0; - unsigned mcountsym = 0; unsigned t; + get_sym_str_and_relp(relhdr, ehdr, &sym0, &str0, &relp); + for (t = nrel; t; --t) { - if (!mcountsym) { - Elf_Sym const *const symp = - &sym0[Elf_r_sym(relp)]; - char const *symname = &str0[w(symp->st_name)]; - char const *mcount = '_' == gpfx ? "_mcount" : "mcount"; - - if ('.' == symname[0]) - ++symname; /* ppc64 hack */ - if (0 == strcmp(mcount, symname) || - (altmcount && 0 == strcmp(altmcount, symname))) - mcountsym = Elf_r_sym(relp); - } + if (!mcountsym) + mcountsym = get_mcountsym(sym0, relp, str0); if (mcountsym == Elf_r_sym(relp) && !is_fake_mcount(relp)) { - uint_t const addend = _w(_w(relp->r_offset) - recval); - + uint_t const addend = + _w(_w(relp->r_offset) - recval + mcount_adjust); mrelp->r_offset = _w(offbase + ((void *)mlocp - (void *)mloc0)); Elf_r_info(mrelp, recsym, reltype); - if (sizeof(Elf_Rela) == rel_entsize) { + if (rel_entsize == sizeof(Elf_Rela)) { ((Elf_Rela *)mrelp)->r_addend = addend; *mlocp++ = 0; } else @@ -304,6 +341,63 @@ static uint_t *sift_rel_mcount(uint_t *mlocp, return mlocp; } +/* + * Read the relocation table again, but this time its called on sections + * that are not going to be traced. The mcount calls here will be converted + * into nops. + */ +static void nop_mcount(Elf_Shdr const *const relhdr, + Elf_Ehdr const *const ehdr, + const char *const txtname) +{ + Elf_Shdr *const shdr0 = (Elf_Shdr *)(_w(ehdr->e_shoff) + + (void *)ehdr); + Elf_Sym const *sym0; + char const *str0; + Elf_Rel const *relp; + Elf_Shdr const *const shdr = &shdr0[w(relhdr->sh_info)]; + unsigned rel_entsize = _w(relhdr->sh_entsize); + unsigned const nrel = _w(relhdr->sh_size) / rel_entsize; + unsigned mcountsym = 0; + unsigned t; + int once = 0; + + get_sym_str_and_relp(relhdr, ehdr, &sym0, &str0, &relp); + + for (t = nrel; t; --t) { + int ret = -1; + + if (!mcountsym) + mcountsym = get_mcountsym(sym0, relp, str0); + + if (mcountsym == Elf_r_sym(relp) && !is_fake_mcount(relp)) { + if (make_nop) + ret = make_nop((void *)ehdr, shdr->sh_offset + relp->r_offset); + if (warn_on_notrace_sect && !once) { + printf("Section %s has mcount callers being ignored\n", + txtname); + once = 1; + /* just warn? */ + if (!make_nop) + return; + } + } + + /* + * If we successfully removed the mcount, mark the relocation + * as a nop (don't do anything with it). + */ + if (!ret) { + Elf_Rel rel; + rel = *(Elf_Rel *)relp; + Elf_r_info(&rel, Elf_r_sym(relp), rel_type_nop); + ulseek(fd_map, (void *)relp - (void *)ehdr, SEEK_SET); + uwrite(fd_map, &rel, sizeof(rel)); + } + relp = (Elf_Rel const *)(rel_entsize + (void *)relp); + } +} + /* * Find a symbol in the given section, to be used as the base for relocating @@ -354,13 +448,13 @@ __has_rel_mcount(Elf_Shdr const *const relhdr, /* is SHT_REL or SHT_RELA */ Elf_Shdr const *const txthdr = &shdr0[w(relhdr->sh_info)]; char const *const txtname = &shstrtab[w(txthdr->sh_name)]; - if (0 == strcmp("__mcount_loc", txtname)) { + if (strcmp("__mcount_loc", txtname) == 0) { fprintf(stderr, "warning: __mcount_loc already exists: %s\n", fname); succeed_file(); } - if (SHT_PROGBITS != w(txthdr->sh_type) || - !is_mcounted_section_name(txtname)) + if (w(txthdr->sh_type) != SHT_PROGBITS || + !(w(txthdr->sh_flags) & SHF_EXECINSTR)) return NULL; return txtname; } @@ -370,7 +464,7 @@ static char const *has_rel_mcount(Elf_Shdr const *const relhdr, char const *const shstrtab, char const *const fname) { - if (SHT_REL != w(relhdr->sh_type) && SHT_RELA != w(relhdr->sh_type)) + if (w(relhdr->sh_type) != SHT_REL && w(relhdr->sh_type) != SHT_RELA) return NULL; return __has_rel_mcount(relhdr, shdr0, shstrtab, fname); } @@ -383,9 +477,11 @@ static unsigned tot_relsize(Elf_Shdr const *const shdr0, { unsigned totrelsz = 0; Elf_Shdr const *shdrp = shdr0; + char const *txtname; for (; nhdr; --nhdr, ++shdrp) { - if (has_rel_mcount(shdrp, shdr0, shstrtab, fname)) + txtname = has_rel_mcount(shdrp, shdr0, shstrtab, fname); + if (txtname && is_mcounted_section_name(txtname)) totrelsz += _w(shdrp->sh_size); } return totrelsz; @@ -421,7 +517,7 @@ do_func(Elf_Ehdr *const ehdr, char const *const fname, unsigned const reltype) for (relhdr = shdr0, k = nhdr; k; --k, ++relhdr) { char const *const txtname = has_rel_mcount(relhdr, shdr0, shstrtab, fname); - if (txtname) { + if (txtname && is_mcounted_section_name(txtname)) { uint_t recval = 0; unsigned const recsym = find_secsym_ndx( w(relhdr->sh_info), txtname, &recval, @@ -432,6 +528,12 @@ do_func(Elf_Ehdr *const ehdr, char const *const fname, unsigned const reltype) mlocp = sift_rel_mcount(mlocp, (void *)mlocp - (void *)mloc0, &mrelp, relhdr, ehdr, recsym, recval, reltype); + } else if (txtname && (warn_on_notrace_sect || make_nop)) { + /* + * This section is ignored by ftrace, but still + * has mcount calls. Convert them to nops now. + */ + nop_mcount(relhdr, ehdr, txtname); } } if (mloc0 != mlocp) { diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index 4be0deea71ca..858966ab019c 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -134,6 +134,7 @@ my %text_sections = ( ".sched.text" => 1, ".spinlock.text" => 1, ".irqentry.text" => 1, + ".kprobes.text" => 1, ".text.unlikely" => 1, ); @@ -222,6 +223,7 @@ if ($arch eq "x86_64") { $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount([+-]0x[0-9a-zA-Z]+)?\$"; $type = ".quad"; $alignment = 8; + $mcount_adjust = -1; # force flags for this arch $ld .= " -m elf_x86_64"; @@ -231,6 +233,7 @@ if ($arch eq "x86_64") { } elsif ($arch eq "i386") { $alignment = 4; + $mcount_adjust = -1; # force flags for this arch $ld .= " -m elf_i386"; @@ -240,12 +243,14 @@ if ($arch eq "x86_64") { } elsif ($arch eq "s390" && $bits == 32) { $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*R_390_32\\s+_mcount\$"; + $mcount_adjust = -4; $alignment = 4; $ld .= " -m elf_s390"; $cc .= " -m31"; } elsif ($arch eq "s390" && $bits == 64) { $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*R_390_(PC|PLT)32DBL\\s+_mcount\\+0x2\$"; + $mcount_adjust = -8; $alignment = 8; $type = ".quad"; $ld .= " -m elf64_s390"; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 602c3c96fa1e..a9f06715e44d 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -6,24 +6,28 @@ * * Sample output: - $ perf stat ~/hackbench 10 - Time: 0.104 + $ perf stat ./hackbench 10 - Performance counter stats for '/home/mingo/hackbench': + Time: 0.118 - 1255.538611 task clock ticks # 10.143 CPU utilization factor - 54011 context switches # 0.043 M/sec - 385 CPU migrations # 0.000 M/sec - 17755 pagefaults # 0.014 M/sec - 3808323185 CPU cycles # 3033.219 M/sec - 1575111190 instructions # 1254.530 M/sec - 17367895 cache references # 13.833 M/sec - 7674421 cache misses # 6.112 M/sec + Performance counter stats for './hackbench 10': - Wall-clock time elapsed: 123.786620 msecs + 1708.761321 task-clock # 11.037 CPUs utilized + 41,190 context-switches # 0.024 M/sec + 6,735 CPU-migrations # 0.004 M/sec + 17,318 page-faults # 0.010 M/sec + 5,205,202,243 cycles # 3.046 GHz + 3,856,436,920 stalled-cycles-frontend # 74.09% frontend cycles idle + 1,600,790,871 stalled-cycles-backend # 30.75% backend cycles idle + 2,603,501,247 instructions # 0.50 insns per cycle + # 1.48 stalled cycles per insn + 484,357,498 branches # 283.455 M/sec + 6,388,934 branch-misses # 1.32% of all branches + + 0.154822978 seconds time elapsed * - * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com> + * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com> * * Improvements and fixes by: * @@ -75,22 +79,10 @@ static struct perf_event_attr default_attrs[] = { }; /* - * Detailed stats: + * Detailed stats (-d), covering the L1 and last level data caches: */ static struct perf_event_attr detailed_attrs[] = { - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, - - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, - { .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_L1D << 0 | @@ -116,6 +108,69 @@ static struct perf_event_attr detailed_attrs[] = { (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, }; +/* + * Very detailed stats (-d -d), covering the instruction cache and the TLB caches: + */ +static struct perf_event_attr very_detailed_attrs[] = { + + { .type = PERF_TYPE_HW_CACHE, + .config = + PERF_COUNT_HW_CACHE_L1I << 0 | + (PERF_COUNT_HW_CACHE_OP_READ << 8) | + (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, + + { .type = PERF_TYPE_HW_CACHE, + .config = + PERF_COUNT_HW_CACHE_L1I << 0 | + (PERF_COUNT_HW_CACHE_OP_READ << 8) | + (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, + + { .type = PERF_TYPE_HW_CACHE, + .config = + PERF_COUNT_HW_CACHE_DTLB << 0 | + (PERF_COUNT_HW_CACHE_OP_READ << 8) | + (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, + + { .type = PERF_TYPE_HW_CACHE, + .config = + PERF_COUNT_HW_CACHE_DTLB << 0 | + (PERF_COUNT_HW_CACHE_OP_READ << 8) | + (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, + + { .type = PERF_TYPE_HW_CACHE, + .config = + PERF_COUNT_HW_CACHE_ITLB << 0 | + (PERF_COUNT_HW_CACHE_OP_READ << 8) | + (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, + + { .type = PERF_TYPE_HW_CACHE, + .config = + PERF_COUNT_HW_CACHE_ITLB << 0 | + (PERF_COUNT_HW_CACHE_OP_READ << 8) | + (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, + +}; + +/* + * Very, very detailed stats (-d -d -d), adding prefetch events: + */ +static struct perf_event_attr very_very_detailed_attrs[] = { + + { .type = PERF_TYPE_HW_CACHE, + .config = + PERF_COUNT_HW_CACHE_L1D << 0 | + (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | + (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, + + { .type = PERF_TYPE_HW_CACHE, + .config = + PERF_COUNT_HW_CACHE_L1D << 0 | + (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | + (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, +}; + + + struct perf_evlist *evsel_list; static bool system_wide = false; @@ -129,7 +184,7 @@ static pid_t target_pid = -1; static pid_t target_tid = -1; static pid_t child_pid = -1; static bool null_run = false; -static bool detailed_run = false; +static int detailed_run = 0; static bool sync_run = false; static bool big_num = true; static int big_num_opt = -1; @@ -206,6 +261,10 @@ struct stats runtime_stalled_cycles_back_stats[MAX_NR_CPUS]; struct stats runtime_branches_stats[MAX_NR_CPUS]; struct stats runtime_cacherefs_stats[MAX_NR_CPUS]; struct stats runtime_l1_dcache_stats[MAX_NR_CPUS]; +struct stats runtime_l1_icache_stats[MAX_NR_CPUS]; +struct stats runtime_ll_cache_stats[MAX_NR_CPUS]; +struct stats runtime_itlb_cache_stats[MAX_NR_CPUS]; +struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS]; struct stats walltime_nsecs_stats; static int create_perf_stat_counter(struct perf_evsel *evsel) @@ -262,6 +321,14 @@ static void update_shadow_stats(struct perf_evsel *counter, u64 *count) update_stats(&runtime_cacherefs_stats[0], count[0]); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) update_stats(&runtime_l1_dcache_stats[0], count[0]); + else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) + update_stats(&runtime_l1_icache_stats[0], count[0]); + else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) + update_stats(&runtime_ll_cache_stats[0], count[0]); + else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) + update_stats(&runtime_dtlb_cache_stats[0], count[0]); + else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) + update_stats(&runtime_itlb_cache_stats[0], count[0]); } /* @@ -464,7 +531,7 @@ static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg) { double msecs = avg / 1e6; char cpustr[16] = { '\0', }; - const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-24s"; + const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-25s"; if (no_aggr) sprintf(cpustr, "CPU%*d%s", @@ -575,6 +642,98 @@ static void print_l1_dcache_misses(int cpu, struct perf_evsel *evsel __used, dou fprintf(stderr, " of all L1-dcache hits "); } +static void print_l1_icache_misses(int cpu, struct perf_evsel *evsel __used, double avg) +{ + double total, ratio = 0.0; + const char *color; + + total = avg_stats(&runtime_l1_icache_stats[cpu]); + + if (total) + ratio = avg / total * 100.0; + + color = PERF_COLOR_NORMAL; + if (ratio > 20.0) + color = PERF_COLOR_RED; + else if (ratio > 10.0) + color = PERF_COLOR_MAGENTA; + else if (ratio > 5.0) + color = PERF_COLOR_YELLOW; + + fprintf(stderr, " # "); + color_fprintf(stderr, color, "%6.2f%%", ratio); + fprintf(stderr, " of all L1-icache hits "); +} + +static void print_dtlb_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg) +{ + double total, ratio = 0.0; + const char *color; + + total = avg_stats(&runtime_dtlb_cache_stats[cpu]); + + if (total) + ratio = avg / total * 100.0; + + color = PERF_COLOR_NORMAL; + if (ratio > 20.0) + color = PERF_COLOR_RED; + else if (ratio > 10.0) + color = PERF_COLOR_MAGENTA; + else if (ratio > 5.0) + color = PERF_COLOR_YELLOW; + + fprintf(stderr, " # "); + color_fprintf(stderr, color, "%6.2f%%", ratio); + fprintf(stderr, " of all dTLB cache hits "); +} + +static void print_itlb_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg) +{ + double total, ratio = 0.0; + const char *color; + + total = avg_stats(&runtime_itlb_cache_stats[cpu]); + + if (total) + ratio = avg / total * 100.0; + + color = PERF_COLOR_NORMAL; + if (ratio > 20.0) + color = PERF_COLOR_RED; + else if (ratio > 10.0) + color = PERF_COLOR_MAGENTA; + else if (ratio > 5.0) + color = PERF_COLOR_YELLOW; + + fprintf(stderr, " # "); + color_fprintf(stderr, color, "%6.2f%%", ratio); + fprintf(stderr, " of all iTLB cache hits "); +} + +static void print_ll_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg) +{ + double total, ratio = 0.0; + const char *color; + + total = avg_stats(&runtime_ll_cache_stats[cpu]); + + if (total) + ratio = avg / total * 100.0; + + color = PERF_COLOR_NORMAL; + if (ratio > 20.0) + color = PERF_COLOR_RED; + else if (ratio > 10.0) + color = PERF_COLOR_MAGENTA; + else if (ratio > 5.0) + color = PERF_COLOR_YELLOW; + + fprintf(stderr, " # "); + color_fprintf(stderr, color, "%6.2f%%", ratio); + fprintf(stderr, " of all LL-cache hits "); +} + static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) { double total, ratio = 0.0; @@ -584,9 +743,9 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) if (csv_output) fmt = "%s%.0f%s%s"; else if (big_num) - fmt = "%s%'18.0f%s%-24s"; + fmt = "%s%'18.0f%s%-25s"; else - fmt = "%s%18.0f%s%-24s"; + fmt = "%s%18.0f%s%-25s"; if (no_aggr) sprintf(cpustr, "CPU%*d%s", @@ -616,7 +775,7 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) if (total && avg) { ratio = total / avg; - fprintf(stderr, "\n # %5.2f stalled cycles per insn", ratio); + fprintf(stderr, "\n # %5.2f stalled cycles per insn", ratio); } } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) && @@ -629,6 +788,34 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && runtime_l1_dcache_stats[cpu].n != 0) { print_l1_dcache_misses(cpu, evsel, avg); + } else if ( + evsel->attr.type == PERF_TYPE_HW_CACHE && + evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I | + ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && + runtime_l1_icache_stats[cpu].n != 0) { + print_l1_icache_misses(cpu, evsel, avg); + } else if ( + evsel->attr.type == PERF_TYPE_HW_CACHE && + evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB | + ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && + runtime_dtlb_cache_stats[cpu].n != 0) { + print_dtlb_cache_misses(cpu, evsel, avg); + } else if ( + evsel->attr.type == PERF_TYPE_HW_CACHE && + evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB | + ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && + runtime_itlb_cache_stats[cpu].n != 0) { + print_itlb_cache_misses(cpu, evsel, avg); + } else if ( + evsel->attr.type == PERF_TYPE_HW_CACHE && + evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL | + ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && + runtime_ll_cache_stats[cpu].n != 0) { + print_ll_cache_misses(cpu, evsel, avg); } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) && runtime_cacherefs_stats[cpu].n != 0) { total = avg_stats(&runtime_cacherefs_stats[cpu]); @@ -704,7 +891,7 @@ static void print_counter_aggr(struct perf_evsel *counter) avg_enabled = avg_stats(&ps->res_stats[1]); avg_running = avg_stats(&ps->res_stats[2]); - fprintf(stderr, " (%.2f%%)", 100 * avg_running / avg_enabled); + fprintf(stderr, " [%5.2f%%]", 100 * avg_running / avg_enabled); } fprintf(stderr, "\n"); } @@ -787,10 +974,12 @@ static void print_stat(int argc, const char **argv) } if (!csv_output) { - fprintf(stderr, "\n"); - fprintf(stderr, " %18.9f seconds time elapsed", + if (!null_run) + fprintf(stderr, "\n"); + fprintf(stderr, " %17.9f seconds time elapsed", avg_stats(&walltime_nsecs_stats)/1e9); if (run_count > 1) { + fprintf(stderr, " "); print_noise_pct(stddev_stats(&walltime_nsecs_stats), avg_stats(&walltime_nsecs_stats)); } @@ -854,7 +1043,7 @@ static const struct option options[] = { "repeat command and print average + stddev (max: 100)"), OPT_BOOLEAN('n', "null", &null_run, "null run - dont start any counters"), - OPT_BOOLEAN('d', "detailed", &detailed_run, + OPT_INCR('d', "detailed", &detailed_run, "detailed run - start a lot of events"), OPT_BOOLEAN('S', "sync", &sync_run, "call sync() before starting a run"), @@ -873,6 +1062,70 @@ static const struct option options[] = { OPT_END() }; +/* + * Add default attributes, if there were no attributes specified or + * if -d/--detailed, -d -d or -d -d -d is used: + */ +static int add_default_attributes(void) +{ + struct perf_evsel *pos; + size_t attr_nr = 0; + size_t c; + + /* Set attrs if no event is selected and !null_run: */ + if (null_run) + return 0; + + if (!evsel_list->nr_entries) { + for (c = 0; c < ARRAY_SIZE(default_attrs); c++) { + pos = perf_evsel__new(default_attrs + c, c + attr_nr); + if (pos == NULL) + return -1; + perf_evlist__add(evsel_list, pos); + } + attr_nr += c; + } + + /* Detailed events get appended to the event list: */ + + if (detailed_run < 1) + return 0; + + /* Append detailed run extra attributes: */ + for (c = 0; c < ARRAY_SIZE(detailed_attrs); c++) { + pos = perf_evsel__new(detailed_attrs + c, c + attr_nr); + if (pos == NULL) + return -1; + perf_evlist__add(evsel_list, pos); + } + attr_nr += c; + + if (detailed_run < 2) + return 0; + + /* Append very detailed run extra attributes: */ + for (c = 0; c < ARRAY_SIZE(very_detailed_attrs); c++) { + pos = perf_evsel__new(very_detailed_attrs + c, c + attr_nr); + if (pos == NULL) + return -1; + perf_evlist__add(evsel_list, pos); + } + + if (detailed_run < 3) + return 0; + + /* Append very, very detailed run extra attributes: */ + for (c = 0; c < ARRAY_SIZE(very_very_detailed_attrs); c++) { + pos = perf_evsel__new(very_very_detailed_attrs + c, c + attr_nr); + if (pos == NULL) + return -1; + perf_evlist__add(evsel_list, pos); + } + + + return 0; +} + int cmd_stat(int argc, const char **argv, const char *prefix __used) { struct perf_evsel *pos; @@ -918,28 +1171,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) usage_with_options(stat_usage, options); } - /* Set attrs and nr_counters if no event is selected and !null_run */ - if (detailed_run) { - size_t c; - - for (c = 0; c < ARRAY_SIZE(detailed_attrs); ++c) { - pos = perf_evsel__new(&detailed_attrs[c], c); - if (pos == NULL) - goto out; - perf_evlist__add(evsel_list, pos); - } - } - /* Set attrs and nr_counters if no event is selected and !null_run */ - if (!detailed_run && !null_run && !evsel_list->nr_entries) { - size_t c; - - for (c = 0; c < ARRAY_SIZE(default_attrs); ++c) { - pos = perf_evsel__new(&default_attrs[c], c); - if (pos == NULL) - goto out; - perf_evlist__add(evsel_list, pos); - } - } + if (add_default_attributes()) + goto out; if (target_pid != -1) target_tid = target_pid; diff --git a/tools/perf/util/include/asm/alternative-asm.h b/tools/perf/util/include/asm/alternative-asm.h new file mode 100644 index 000000000000..6789d788d494 --- /dev/null +++ b/tools/perf/util/include/asm/alternative-asm.h @@ -0,0 +1,8 @@ +#ifndef _PERF_ASM_ALTERNATIVE_ASM_H +#define _PERF_ASM_ALTERNATIVE_ASM_H + +/* Just disable it so we can build arch/x86/lib/memcpy_64.S for perf bench: */ + +#define altinstruction_entry # + +#endif |