diff options
author | Ingo Molnar <mingo@kernel.org> | 2019-12-25 10:43:08 +0100 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2019-12-25 10:43:08 +0100 |
commit | 46f5cfc13d54962097cb0cc5f97593e2a98a6aed (patch) | |
tree | 1ec5da58e3d085a7e358a4b7adf3b6e6b7ce1753 | |
parent | 46cf053efec6a3a5f343fead837777efe8252a46 (diff) | |
parent | 2040cf9f59037aa8aec749363e69ead165b67b43 (diff) | |
download | linux-46f5cfc13d54962097cb0cc5f97593e2a98a6aed.tar.bz2 |
Merge branch 'core/kprobes' into perf/core, to pick up a completed branch
Signed-off-by: Ingo Molnar <mingo@kernel.org>
38 files changed, 604 insertions, 1131 deletions
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 8b679e2ca3c3..89e5d864e923 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -53,8 +53,8 @@ obj-$(CONFIG_HAVE_ARM_SCU) += smp_scu.o obj-$(CONFIG_HAVE_ARM_TWD) += smp_twd.o obj-$(CONFIG_ARM_ARCH_TIMER) += arch_timer.o obj-$(CONFIG_FUNCTION_TRACER) += entry-ftrace.o -obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o insn.o -obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o insn.o +obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o insn.o patch.o +obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o insn.o patch.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o insn.o patch.o obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o # Main staffs in KPROBES are in arch/arm/probes/ . diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index bda949fd84e8..2a5ff69c28e6 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -22,6 +22,7 @@ #include <asm/ftrace.h> #include <asm/insn.h> #include <asm/set_memory.h> +#include <asm/patch.h> #ifdef CONFIG_THUMB2_KERNEL #define NOP 0xf85deb04 /* pop.w {lr} */ @@ -35,9 +36,7 @@ static int __ftrace_modify_code(void *data) { int *command = data; - set_kernel_text_rw(); ftrace_modify_all_code(*command); - set_kernel_text_ro(); return 0; } @@ -59,13 +58,11 @@ static unsigned long adjust_address(struct dyn_ftrace *rec, unsigned long addr) int ftrace_arch_code_modify_prepare(void) { - set_all_modules_text_rw(); return 0; } int ftrace_arch_code_modify_post_process(void) { - set_all_modules_text_ro(); /* Make sure any TLB misses during machine stop are cleared. */ flush_tlb_all(); return 0; @@ -97,10 +94,7 @@ static int ftrace_modify_code(unsigned long pc, unsigned long old, return -EINVAL; } - if (probe_kernel_write((void *)pc, &new, MCOUNT_INSN_SIZE)) - return -EPERM; - - flush_icache_range(pc, pc + MCOUNT_INSN_SIZE); + __patch_text((void *)pc, new); return 0; } diff --git a/arch/nds32/kernel/ftrace.c b/arch/nds32/kernel/ftrace.c index fd2a54b8cd57..22ab77ea27ad 100644 --- a/arch/nds32/kernel/ftrace.c +++ b/arch/nds32/kernel/ftrace.c @@ -89,18 +89,6 @@ int __init ftrace_dyn_arch_init(void) return 0; } -int ftrace_arch_code_modify_prepare(void) -{ - set_all_modules_text_rw(); - return 0; -} - -int ftrace_arch_code_modify_post_process(void) -{ - set_all_modules_text_ro(); - return 0; -} - static unsigned long gen_sethi_insn(unsigned long addr) { unsigned long opcode = 0x46000000; diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index c2a7458f912c..85be2f506272 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -47,8 +47,6 @@ struct dyn_arch_ftrace { /* No extra data needed for x86 */ }; -int ftrace_int3_handler(struct pt_regs *regs); - #define FTRACE_GRAPH_TRAMP_ADDR FTRACE_GRAPH_ADDR #endif /* CONFIG_DYNAMIC_FTRACE */ diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 5dc909d9ad81..95b1f053bd96 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -11,12 +11,11 @@ #include <asm-generic/kprobes.h> -#define BREAKPOINT_INSTRUCTION 0xcc - #ifdef CONFIG_KPROBES #include <linux/types.h> #include <linux/ptrace.h> #include <linux/percpu.h> +#include <asm/text-patching.h> #include <asm/insn.h> #define __ARCH_WANT_KPROBES_INSN_SLOT @@ -25,10 +24,7 @@ struct pt_regs; struct kprobe; typedef u8 kprobe_opcode_t; -#define RELATIVEJUMP_OPCODE 0xe9 -#define RELATIVEJUMP_SIZE 5 -#define RELATIVECALL_OPCODE 0xe8 -#define RELATIVE_ADDR_SIZE 4 + #define MAX_STACK_SIZE 64 #define CUR_STACK_SIZE(ADDR) \ (current_top_of_stack() - (unsigned long)(ADDR)) @@ -43,11 +39,11 @@ extern __visible kprobe_opcode_t optprobe_template_entry[]; extern __visible kprobe_opcode_t optprobe_template_val[]; extern __visible kprobe_opcode_t optprobe_template_call[]; extern __visible kprobe_opcode_t optprobe_template_end[]; -#define MAX_OPTIMIZED_LENGTH (MAX_INSN_SIZE + RELATIVE_ADDR_SIZE) +#define MAX_OPTIMIZED_LENGTH (MAX_INSN_SIZE + DISP32_SIZE) #define MAX_OPTINSN_SIZE \ (((unsigned long)optprobe_template_end - \ (unsigned long)optprobe_template_entry) + \ - MAX_OPTIMIZED_LENGTH + RELATIVEJUMP_SIZE) + MAX_OPTIMIZED_LENGTH + JMP32_INSN_SIZE) extern const int kretprobe_blacklist_size; @@ -73,7 +69,7 @@ struct arch_specific_insn { struct arch_optimized_insn { /* copy of the original instructions */ - kprobe_opcode_t copied_insn[RELATIVE_ADDR_SIZE]; + kprobe_opcode_t copied_insn[DISP32_SIZE]; /* detour code buffer */ kprobe_opcode_t *insn; /* the size of instructions copied to detour code buffer */ diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index 2ee8e469dcf5..64c3dce374e5 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -81,8 +81,6 @@ int set_direct_map_invalid_noflush(struct page *page); int set_direct_map_default_noflush(struct page *page); extern int kernel_set_to_readonly; -void set_kernel_text_rw(void); -void set_kernel_text_ro(void); #ifdef CONFIG_X86_64 static inline int set_mce_nospec(unsigned long pfn) diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h index 23c626a742e8..67315fa3956a 100644 --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -25,14 +25,6 @@ static inline void apply_paravirt(struct paravirt_patch_site *start, */ #define POKE_MAX_OPCODE_SIZE 5 -struct text_poke_loc { - void *addr; - int len; - s32 rel32; - u8 opcode; - const u8 text[POKE_MAX_OPCODE_SIZE]; -}; - extern void text_poke_early(void *addr, const void *opcode, size_t len); /* @@ -50,21 +42,13 @@ extern void text_poke_early(void *addr, const void *opcode, size_t len); * an inconsistent instruction while you patch. */ extern void *text_poke(void *addr, const void *opcode, size_t len); +extern void text_poke_sync(void); extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len); extern int poke_int3_handler(struct pt_regs *regs); extern void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate); -extern void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries); -extern void text_poke_loc_init(struct text_poke_loc *tp, void *addr, - const void *opcode, size_t len, const void *emulate); -extern int after_bootmem; -extern __ro_after_init struct mm_struct *poking_mm; -extern __ro_after_init unsigned long poking_addr; -#ifndef CONFIG_UML_X86 -static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip) -{ - regs->ip = ip; -} +extern void text_poke_queue(void *addr, const void *opcode, size_t len, const void *emulate); +extern void text_poke_finish(void); #define INT3_INSN_SIZE 1 #define INT3_INSN_OPCODE 0xCC @@ -78,6 +62,67 @@ static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip) #define JMP8_INSN_SIZE 2 #define JMP8_INSN_OPCODE 0xEB +#define DISP32_SIZE 4 + +static inline int text_opcode_size(u8 opcode) +{ + int size = 0; + +#define __CASE(insn) \ + case insn##_INSN_OPCODE: size = insn##_INSN_SIZE; break + + switch(opcode) { + __CASE(INT3); + __CASE(CALL); + __CASE(JMP32); + __CASE(JMP8); + } + +#undef __CASE + + return size; +} + +union text_poke_insn { + u8 text[POKE_MAX_OPCODE_SIZE]; + struct { + u8 opcode; + s32 disp; + } __attribute__((packed)); +}; + +static __always_inline +void *text_gen_insn(u8 opcode, const void *addr, const void *dest) +{ + static union text_poke_insn insn; /* per instance */ + int size = text_opcode_size(opcode); + + insn.opcode = opcode; + + if (size > 1) { + insn.disp = (long)dest - (long)(addr + size); + if (size == 2) { + /* + * Ensure that for JMP9 the displacement + * actually fits the signed byte. + */ + BUG_ON((insn.disp >> 31) != (insn.disp >> 7)); + } + } + + return &insn.text; +} + +extern int after_bootmem; +extern __ro_after_init struct mm_struct *poking_mm; +extern __ro_after_init unsigned long poking_addr; + +#ifndef CONFIG_UML_X86 +static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip) +{ + regs->ip = ip; +} + static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val) { /* @@ -85,6 +130,9 @@ static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val) * stack where the break point happened, and the saving of * pt_regs. We can extend the original stack because of * this gap. See the idtentry macro's create_gap option. + * + * Similarly entry_32.S will have a gap on the stack for (any) hardware + * exception and pt_regs; see FIXUP_FRAME. */ regs->sp -= sizeof(unsigned long); *(unsigned long *)regs->sp = val; diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 9ec463fe96f2..30e86730655c 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -936,27 +936,45 @@ static void do_sync_core(void *info) sync_core(); } +void text_poke_sync(void) +{ + on_each_cpu(do_sync_core, NULL, 1); +} + +struct text_poke_loc { + s32 rel_addr; /* addr := _stext + rel_addr */ + s32 rel32; + u8 opcode; + const u8 text[POKE_MAX_OPCODE_SIZE]; +}; + static struct bp_patching_desc { struct text_poke_loc *vec; int nr_entries; } bp_patching; -static int patch_cmp(const void *key, const void *elt) +static inline void *text_poke_addr(struct text_poke_loc *tp) +{ + return _stext + tp->rel_addr; +} + +static int notrace patch_cmp(const void *key, const void *elt) { struct text_poke_loc *tp = (struct text_poke_loc *) elt; - if (key < tp->addr) + if (key < text_poke_addr(tp)) return -1; - if (key > tp->addr) + if (key > text_poke_addr(tp)) return 1; return 0; } NOKPROBE_SYMBOL(patch_cmp); -int poke_int3_handler(struct pt_regs *regs) +int notrace poke_int3_handler(struct pt_regs *regs) { struct text_poke_loc *tp; void *ip; + int len; /* * Having observed our INT3 instruction, we now must observe @@ -992,11 +1010,12 @@ int poke_int3_handler(struct pt_regs *regs) return 0; } else { tp = bp_patching.vec; - if (tp->addr != ip) + if (text_poke_addr(tp) != ip) return 0; } - ip += tp->len; + len = text_opcode_size(tp->opcode); + ip += len; switch (tp->opcode) { case INT3_INSN_OPCODE: @@ -1023,6 +1042,10 @@ int poke_int3_handler(struct pt_regs *regs) } NOKPROBE_SYMBOL(poke_int3_handler); +#define TP_VEC_MAX (PAGE_SIZE / sizeof(struct text_poke_loc)) +static struct text_poke_loc tp_vec[TP_VEC_MAX]; +static int tp_vec_nr; + /** * text_poke_bp_batch() -- update instructions on live kernel on SMP * @tp: vector of instructions to patch @@ -1044,7 +1067,7 @@ NOKPROBE_SYMBOL(poke_int3_handler); * replacing opcode * - sync cores */ -void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries) +static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries) { unsigned char int3 = INT3_INSN_OPCODE; unsigned int i; @@ -1065,18 +1088,20 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries) * First step: add a int3 trap to the address that will be patched. */ for (i = 0; i < nr_entries; i++) - text_poke(tp[i].addr, &int3, sizeof(int3)); + text_poke(text_poke_addr(&tp[i]), &int3, INT3_INSN_SIZE); - on_each_cpu(do_sync_core, NULL, 1); + text_poke_sync(); /* * Second step: update all but the first byte of the patched range. */ for (do_sync = 0, i = 0; i < nr_entries; i++) { - if (tp[i].len - sizeof(int3) > 0) { - text_poke((char *)tp[i].addr + sizeof(int3), - (const char *)tp[i].text + sizeof(int3), - tp[i].len - sizeof(int3)); + int len = text_opcode_size(tp[i].opcode); + + if (len - INT3_INSN_SIZE > 0) { + text_poke(text_poke_addr(&tp[i]) + INT3_INSN_SIZE, + (const char *)tp[i].text + INT3_INSN_SIZE, + len - INT3_INSN_SIZE); do_sync++; } } @@ -1087,7 +1112,7 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries) * not necessary and we'd be safe even without it. But * better safe than sorry (plus there's not only Intel). */ - on_each_cpu(do_sync_core, NULL, 1); + text_poke_sync(); } /* @@ -1098,19 +1123,25 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries) if (tp[i].text[0] == INT3_INSN_OPCODE) continue; - text_poke(tp[i].addr, tp[i].text, sizeof(int3)); + text_poke(text_poke_addr(&tp[i]), tp[i].text, INT3_INSN_SIZE); do_sync++; } if (do_sync) - on_each_cpu(do_sync_core, NULL, 1); + text_poke_sync(); /* * sync_core() implies an smp_mb() and orders this store against * the writing of the new instruction. */ - bp_patching.vec = NULL; bp_patching.nr_entries = 0; + /* + * This sync_core () call ensures that all INT3 handlers in progress + * have finished. This allows poke_int3_handler() after this to + * avoid touching bp_paching.vec by checking nr_entries == 0. + */ + text_poke_sync(); + bp_patching.vec = NULL; } void text_poke_loc_init(struct text_poke_loc *tp, void *addr, @@ -1118,11 +1149,7 @@ void text_poke_loc_init(struct text_poke_loc *tp, void *addr, { struct insn insn; - if (!opcode) - opcode = (void *)tp->text; - else - memcpy((void *)tp->text, opcode, len); - + memcpy((void *)tp->text, opcode, len); if (!emulate) emulate = opcode; @@ -1132,8 +1159,7 @@ void text_poke_loc_init(struct text_poke_loc *tp, void *addr, BUG_ON(!insn_complete(&insn)); BUG_ON(len != insn.length); - tp->addr = addr; - tp->len = len; + tp->rel_addr = addr - (void *)_stext; tp->opcode = insn.opcode.bytes[0]; switch (tp->opcode) { @@ -1167,6 +1193,55 @@ void text_poke_loc_init(struct text_poke_loc *tp, void *addr, } } +/* + * We hard rely on the tp_vec being ordered; ensure this is so by flushing + * early if needed. + */ +static bool tp_order_fail(void *addr) +{ + struct text_poke_loc *tp; + + if (!tp_vec_nr) + return false; + + if (!addr) /* force */ + return true; + + tp = &tp_vec[tp_vec_nr - 1]; + if ((unsigned long)text_poke_addr(tp) > (unsigned long)addr) + return true; + + return false; +} + +static void text_poke_flush(void *addr) +{ + if (tp_vec_nr == TP_VEC_MAX || tp_order_fail(addr)) { + text_poke_bp_batch(tp_vec, tp_vec_nr); + tp_vec_nr = 0; + } +} + +void text_poke_finish(void) +{ + text_poke_flush(NULL); +} + +void __ref text_poke_queue(void *addr, const void *opcode, size_t len, const void *emulate) +{ + struct text_poke_loc *tp; + + if (unlikely(system_state == SYSTEM_BOOTING)) { + text_poke_early(addr, opcode, len); + return; + } + + text_poke_flush(addr); + + tp = &tp_vec[tp_vec_nr++]; + text_poke_loc_init(tp, addr, opcode, len, emulate); +} + /** * text_poke_bp() -- update instructions on live kernel on SMP * @addr: address to patch @@ -1178,10 +1253,15 @@ void text_poke_loc_init(struct text_poke_loc *tp, void *addr, * dynamically allocated memory. This function should be used when it is * not possible to allocate memory. */ -void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate) +void __ref text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate) { struct text_poke_loc tp; + if (unlikely(system_state == SYSTEM_BOOTING)) { + text_poke_early(addr, opcode, len); + return; + } + text_poke_loc_init(&tp, addr, opcode, len, emulate); text_poke_bp_batch(&tp, 1); } diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 024c3053dbba..108ee96f8b66 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -34,6 +34,8 @@ #ifdef CONFIG_DYNAMIC_FTRACE +static int ftrace_poke_late = 0; + int ftrace_arch_code_modify_prepare(void) __acquires(&text_mutex) { @@ -43,84 +45,37 @@ int ftrace_arch_code_modify_prepare(void) * ftrace has it set to "read/write". */ mutex_lock(&text_mutex); - set_kernel_text_rw(); - set_all_modules_text_rw(); + ftrace_poke_late = 1; return 0; } int ftrace_arch_code_modify_post_process(void) __releases(&text_mutex) { - set_all_modules_text_ro(); - set_kernel_text_ro(); + /* + * ftrace_make_{call,nop}() may be called during + * module load, and we need to finish the text_poke_queue() + * that they do, here. + */ + text_poke_finish(); + ftrace_poke_late = 0; mutex_unlock(&text_mutex); return 0; } -union ftrace_code_union { - char code[MCOUNT_INSN_SIZE]; - struct { - unsigned char op; - int offset; - } __attribute__((packed)); -}; - -static int ftrace_calc_offset(long ip, long addr) -{ - return (int)(addr - ip); -} - -static unsigned char * -ftrace_text_replace(unsigned char op, unsigned long ip, unsigned long addr) +static const char *ftrace_nop_replace(void) { - static union ftrace_code_union calc; - - calc.op = op; - calc.offset = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr); - - return calc.code; -} - -static unsigned char * -ftrace_call_replace(unsigned long ip, unsigned long addr) -{ - return ftrace_text_replace(0xe8, ip, addr); -} - -static inline int -within(unsigned long addr, unsigned long start, unsigned long end) -{ - return addr >= start && addr < end; + return ideal_nops[NOP_ATOMIC5]; } -static unsigned long text_ip_addr(unsigned long ip) +static const char *ftrace_call_replace(unsigned long ip, unsigned long addr) { - /* - * On x86_64, kernel text mappings are mapped read-only, so we use - * the kernel identity mapping instead of the kernel text mapping - * to modify the kernel text. - * - * For 32bit kernels, these mappings are same and we can use - * kernel identity mapping to modify code. - */ - if (within(ip, (unsigned long)_text, (unsigned long)_etext)) - ip = (unsigned long)__va(__pa_symbol(ip)); - - return ip; + return text_gen_insn(CALL_INSN_OPCODE, (void *)ip, (void *)addr); } -static const unsigned char *ftrace_nop_replace(void) +static int ftrace_verify_code(unsigned long ip, const char *old_code) { - return ideal_nops[NOP_ATOMIC5]; -} - -static int -ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code, - unsigned const char *new_code) -{ - unsigned char replaced[MCOUNT_INSN_SIZE]; - - ftrace_expected = old_code; + char cur_code[MCOUNT_INSN_SIZE]; /* * Note: @@ -129,31 +84,46 @@ ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code, * Carefully read and modify the code with probe_kernel_*(), and make * sure what we read is what we expected it to be before modifying it. */ - /* read the text we want to modify */ - if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) + if (probe_kernel_read(cur_code, (void *)ip, MCOUNT_INSN_SIZE)) { + WARN_ON(1); return -EFAULT; + } /* Make sure it is what we expect it to be */ - if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0) + if (memcmp(cur_code, old_code, MCOUNT_INSN_SIZE) != 0) { + WARN_ON(1); return -EINVAL; + } - ip = text_ip_addr(ip); - - /* replace the text with the new text */ - if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE)) - return -EPERM; + return 0; +} - sync_core(); +/* + * Marked __ref because it calls text_poke_early() which is .init.text. That is + * ok because that call will happen early, during boot, when .init sections are + * still present. + */ +static int __ref +ftrace_modify_code_direct(unsigned long ip, const char *old_code, + const char *new_code) +{ + int ret = ftrace_verify_code(ip, old_code); + if (ret) + return ret; + /* replace the text with the new text */ + if (ftrace_poke_late) + text_poke_queue((void *)ip, new_code, MCOUNT_INSN_SIZE, NULL); + else + text_poke_early((void *)ip, new_code, MCOUNT_INSN_SIZE); return 0; } -int ftrace_make_nop(struct module *mod, - struct dyn_ftrace *rec, unsigned long addr) +int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { - unsigned const char *new, *old; unsigned long ip = rec->ip; + const char *new, *old; old = ftrace_call_replace(ip, addr); new = ftrace_nop_replace(); @@ -167,19 +137,20 @@ int ftrace_make_nop(struct module *mod, * just modify the code directly. */ if (addr == MCOUNT_ADDR) - return ftrace_modify_code_direct(rec->ip, old, new); + return ftrace_modify_code_direct(ip, old, new); - ftrace_expected = NULL; - - /* Normal cases use add_brk_on_nop */ + /* + * x86 overrides ftrace_replace_code -- this function will never be used + * in this case. + */ WARN_ONCE(1, "invalid use of ftrace_make_nop"); return -EINVAL; } int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { - unsigned const char *new, *old; unsigned long ip = rec->ip; + const char *new, *old; old = ftrace_nop_replace(); new = ftrace_call_replace(ip, addr); @@ -189,43 +160,6 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) } /* - * The modifying_ftrace_code is used to tell the breakpoint - * handler to call ftrace_int3_handler(). If it fails to - * call this handler for a breakpoint added by ftrace, then - * the kernel may crash. - * - * As atomic_writes on x86 do not need a barrier, we do not - * need to add smp_mb()s for this to work. It is also considered - * that we can not read the modifying_ftrace_code before - * executing the breakpoint. That would be quite remarkable if - * it could do that. Here's the flow that is required: - * - * CPU-0 CPU-1 - * - * atomic_inc(mfc); - * write int3s - * <trap-int3> // implicit (r)mb - * if (atomic_read(mfc)) - * call ftrace_int3_handler() - * - * Then when we are finished: - * - * atomic_dec(mfc); - * - * If we hit a breakpoint that was not set by ftrace, it does not - * matter if ftrace_int3_handler() is called or not. It will - * simply be ignored. But it is crucial that a ftrace nop/caller - * breakpoint is handled. No other user should ever place a - * breakpoint on an ftrace nop/caller location. It must only - * be done by this code. - */ -atomic_t modifying_ftrace_code __read_mostly; - -static int -ftrace_modify_code(unsigned long ip, unsigned const char *old_code, - unsigned const char *new_code); - -/* * Should never be called: * As it is only called by __ftrace_replace_code() which is called by * ftrace_replace_code() that x86 overrides, and by ftrace_update_code() @@ -237,452 +171,84 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { WARN_ON(1); - ftrace_expected = NULL; return -EINVAL; } -static unsigned long ftrace_update_func; -static unsigned long ftrace_update_func_call; - -static int update_ftrace_func(unsigned long ip, void *new) -{ - unsigned char old[MCOUNT_INSN_SIZE]; - int ret; - - memcpy(old, (void *)ip, MCOUNT_INSN_SIZE); - - ftrace_update_func = ip; - /* Make sure the breakpoints see the ftrace_update_func update */ - smp_wmb(); - - /* See comment above by declaration of modifying_ftrace_code */ - atomic_inc(&modifying_ftrace_code); - - ret = ftrace_modify_code(ip, old, new); - - atomic_dec(&modifying_ftrace_code); - - return ret; -} - int ftrace_update_ftrace_func(ftrace_func_t func) { - unsigned long ip = (unsigned long)(&ftrace_call); - unsigned char *new; - int ret; - - ftrace_update_func_call = (unsigned long)func; - - new = ftrace_call_replace(ip, (unsigned long)func); - ret = update_ftrace_func(ip, new); - - /* Also update the regs callback function */ - if (!ret) { - ip = (unsigned long)(&ftrace_regs_call); - new = ftrace_call_replace(ip, (unsigned long)func); - ret = update_ftrace_func(ip, new); - } - - return ret; -} - -static nokprobe_inline int is_ftrace_caller(unsigned long ip) -{ - if (ip == ftrace_update_func) - return 1; - - return 0; -} - -/* - * A breakpoint was added to the code address we are about to - * modify, and this is the handle that will just skip over it. - * We are either changing a nop into a trace call, or a trace - * call to a nop. While the change is taking place, we treat - * it just like it was a nop. - */ -int ftrace_int3_handler(struct pt_regs *regs) -{ unsigned long ip; + const char *new; - if (WARN_ON_ONCE(!regs)) - return 0; - - ip = regs->ip - INT3_INSN_SIZE; - - if (ftrace_location(ip)) { - int3_emulate_call(regs, (unsigned long)ftrace_regs_caller); - return 1; - } else if (is_ftrace_caller(ip)) { - if (!ftrace_update_func_call) { - int3_emulate_jmp(regs, ip + CALL_INSN_SIZE); - return 1; - } - int3_emulate_call(regs, ftrace_update_func_call); - return 1; - } - - return 0; -} -NOKPROBE_SYMBOL(ftrace_int3_handler); - -static int ftrace_write(unsigned long ip, const char *val, int size) -{ - ip = text_ip_addr(ip); - - if (probe_kernel_write((void *)ip, val, size)) - return -EPERM; - - return 0; -} - -static int add_break(unsigned long ip, const char *old) -{ - unsigned char replaced[MCOUNT_INSN_SIZE]; - unsigned char brk = BREAKPOINT_INSTRUCTION; - - if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) - return -EFAULT; - - ftrace_expected = old; - - /* Make sure it is what we expect it to be */ - if (memcmp(replaced, old, MCOUNT_INSN_SIZE) != 0) - return -EINVAL; - - return ftrace_write(ip, &brk, 1); -} - -static int add_brk_on_call(struct dyn_ftrace *rec, unsigned long addr) -{ - unsigned const char *old; - unsigned long ip = rec->ip; - - old = ftrace_call_replace(ip, addr); - - return add_break(rec->ip, old); -} - - -static int add_brk_on_nop(struct dyn_ftrace *rec) -{ - unsigned const char *old; - - old = ftrace_nop_replace(); - - return add_break(rec->ip, old); -} - -static int add_breakpoints(struct dyn_ftrace *rec, bool enable) -{ - unsigned long ftrace_addr; - int ret; - - ftrace_addr = ftrace_get_addr_curr(rec); - - ret = ftrace_test_record(rec, enable); - - switch (ret) { - case FTRACE_UPDATE_IGNORE: - return 0; - - case FTRACE_UPDATE_MAKE_CALL: - /* converting nop to call */ - return add_brk_on_nop(rec); - - case FTRACE_UPDATE_MODIFY_CALL: - case FTRACE_UPDATE_MAKE_NOP: - /* converting a call to a nop */ - return add_brk_on_call(rec, ftrace_addr); - } - return 0; -} - -/* - * On error, we need to remove breakpoints. This needs to - * be done caefully. If the address does not currently have a - * breakpoint, we know we are done. Otherwise, we look at the - * remaining 4 bytes of the instruction. If it matches a nop - * we replace the breakpoint with the nop. Otherwise we replace - * it with the call instruction. - */ -static int remove_breakpoint(struct dyn_ftrace *rec) -{ - unsigned char ins[MCOUNT_INSN_SIZE]; - unsigned char brk = BREAKPOINT_INSTRUCTION; - const unsigned char *nop; - unsigned long ftrace_addr; - unsigned long ip = rec->ip; - - /* If we fail the read, just give up */ - if (probe_kernel_read(ins, (void *)ip, MCOUNT_INSN_SIZE)) - return -EFAULT; - - /* If this does not have a breakpoint, we are done */ - if (ins[0] != brk) - return 0; - - nop = ftrace_nop_replace(); - - /* - * If the last 4 bytes of the instruction do not match - * a nop, then we assume that this is a call to ftrace_addr. - */ - if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) { - /* - * For extra paranoidism, we check if the breakpoint is on - * a call that would actually jump to the ftrace_addr. - * If not, don't touch the breakpoint, we make just create - * a disaster. - */ - ftrace_addr = ftrace_get_addr_new(rec); - nop = ftrace_call_replace(ip, ftrace_addr); - - if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) == 0) - goto update; - - /* Check both ftrace_addr and ftrace_old_addr */ - ftrace_addr = ftrace_get_addr_curr(rec); - nop = ftrace_call_replace(ip, ftrace_addr); - - ftrace_expected = nop; - - if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) - return -EINVAL; - } - - update: - return ftrace_write(ip, nop, 1); -} - -static int add_update_code(unsigned long ip, unsigned const char *new) -{ - /* skip breakpoint */ - ip++; - new++; - return ftrace_write(ip, new, MCOUNT_INSN_SIZE - 1); -} - -static int add_update_call(struct dyn_ftrace *rec, unsigned long addr) -{ - unsigned long ip = rec->ip; - unsigned const char *new; - - new = ftrace_call_replace(ip, addr); - return add_update_code(ip, new); -} - -static int add_update_nop(struct dyn_ftrace *rec) -{ - unsigned long ip = rec->ip; - unsigned const char *new; - - new = ftrace_nop_replace(); - return add_update_code(ip, new); -} - -static int add_update(struct dyn_ftrace *rec, bool enable) -{ - unsigned long ftrace_addr; - int ret; - - ret = ftrace_test_record(rec, enable); - - ftrace_addr = ftrace_get_addr_new(rec); - - switch (ret) { - case FTRACE_UPDATE_IGNORE: - return 0; - - case FTRACE_UPDATE_MODIFY_CALL: - case FTRACE_UPDATE_MAKE_CALL: - /* converting nop to call */ - return add_update_call(rec, ftrace_addr); - - case FTRACE_UPDATE_MAKE_NOP: - /* converting a call to a nop */ - return add_update_nop(rec); - } - - return 0; -} - -static int finish_update_call(struct dyn_ftrace *rec, unsigned long addr) -{ - unsigned long ip = rec->ip; - unsigned const char *new; - - new = ftrace_call_replace(ip, addr); - - return ftrace_write(ip, new, 1); -} - -static int finish_update_nop(struct dyn_ftrace *rec) -{ - unsigned long ip = rec->ip; - unsigned const char *new; - - new = ftrace_nop_replace(); - - return ftrace_write(ip, new, 1); -} - -static int finish_update(struct dyn_ftrace *rec, bool enable) -{ - unsigned long ftrace_addr; - int ret; - - ret = ftrace_update_record(rec, enable); - - ftrace_addr = ftrace_get_addr_new(rec); - - switch (ret) { - case FTRACE_UPDATE_IGNORE: - return 0; - - case FTRACE_UPDATE_MODIFY_CALL: - case FTRACE_UPDATE_MAKE_CALL: - /* converting nop to call */ - return finish_update_call(rec, ftrace_addr); + ip = (unsigned long)(&ftrace_call); + new = ftrace_call_replace(ip, (unsigned long)func); + text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL); - case FTRACE_UPDATE_MAKE_NOP: - /* converting a call to a nop */ - return finish_update_nop(rec); - } + ip = (unsigned long)(&ftrace_regs_call); + new = ftrace_call_replace(ip, (unsigned long)func); + text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL); return 0; } -static void do_sync_core(void *data) -{ - sync_core(); -} - -static void run_sync(void) -{ - int enable_irqs; - - /* No need to sync if there's only one CPU */ - if (num_online_cpus() == 1) - return; - - enable_irqs = irqs_disabled(); - - /* We may be called with interrupts disabled (on bootup). */ - if (enable_irqs) - local_irq_enable(); - on_each_cpu(do_sync_core, NULL, 1); - if (enable_irqs) - local_irq_disable(); -} - void ftrace_replace_code(int enable) { struct ftrace_rec_iter *iter; struct dyn_ftrace *rec; - const char *report = "adding breakpoints"; - int count = 0; + const char *new, *old; int ret; for_ftrace_rec_iter(iter) { rec = ftrace_rec_iter_record(iter); - ret = add_breakpoints(rec, enable); - if (ret) - goto remove_breakpoints; - count++; - } - - run_sync(); + switch (ftrace_test_record(rec, enable)) { + case FTRACE_UPDATE_IGNORE: + default: + continue; - report = "updating code"; - count = 0; + case FTRACE_UPDATE_MAKE_CALL: + old = ftrace_nop_replace(); + break; - for_ftrace_rec_iter(iter) { - rec = ftrace_rec_iter_record(iter); + case FTRACE_UPDATE_MODIFY_CALL: + case FTRACE_UPDATE_MAKE_NOP: + old = ftrace_call_replace(rec->ip, ftrace_get_addr_curr(rec)); + break; + } - ret = add_update(rec, enable); - if (ret) - goto remove_breakpoints; - count++; + ret = ftrace_verify_code(rec->ip, old); + if (ret) { + ftrace_bug(ret, rec); + return; + } } - run_sync(); - - report = "removing breakpoints"; - count = 0; - for_ftrace_rec_iter(iter) { rec = ftrace_rec_iter_record(iter); - ret = finish_update(rec, enable); - if (ret) - goto remove_breakpoints; - count++; - } + switch (ftrace_test_record(rec, enable)) { + case FTRACE_UPDATE_IGNORE: + default: + continue; - run_sync(); + case FTRACE_UPDATE_MAKE_CALL: + case FTRACE_UPDATE_MODIFY_CALL: + new = ftrace_call_replace(rec->ip, ftrace_get_addr_new(rec)); + break; - return; + case FTRACE_UPDATE_MAKE_NOP: + new = ftrace_nop_replace(); + break; + } - remove_breakpoints: - pr_warn("Failed on %s (%d):\n", report, count); - ftrace_bug(ret, rec); - for_ftrace_rec_iter(iter) { - rec = ftrace_rec_iter_record(iter); - /* - * Breakpoints are handled only when this function is in - * progress. The system could not work with them. - */ - if (remove_breakpoint(rec)) - BUG(); + text_poke_queue((void *)rec->ip, new, MCOUNT_INSN_SIZE, NULL); + ftrace_update_record(rec, enable); } - run_sync(); -} - -static int -ftrace_modify_code(unsigned long ip, unsigned const char *old_code, - unsigned const char *new_code) -{ - int ret; - - ret = add_break(ip, old_code); - if (ret) - goto out; - - run_sync(); - - ret = add_update_code(ip, new_code); - if (ret) - goto fail_update; - - run_sync(); - - ret = ftrace_write(ip, new_code, 1); - /* - * The breakpoint is handled only when this function is in progress. - * The system could not work if we could not remove it. - */ - BUG_ON(ret); - out: - run_sync(); - return ret; - - fail_update: - /* Also here the system could not work with the breakpoint */ - if (ftrace_write(ip, old_code, 1)) - BUG(); - goto out; + text_poke_finish(); } void arch_ftrace_update_code(int command) { - /* See comment above by declaration of modifying_ftrace_code */ - atomic_inc(&modifying_ftrace_code); - ftrace_modify_all_code(command); - - atomic_dec(&modifying_ftrace_code); } int __init ftrace_dyn_arch_init(void) @@ -747,6 +313,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) unsigned long start_offset; unsigned long end_offset; unsigned long op_offset; + unsigned long call_offset; unsigned long offset; unsigned long npages; unsigned long size; @@ -763,10 +330,12 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) start_offset = (unsigned long)ftrace_regs_caller; end_offset = (unsigned long)ftrace_regs_caller_end; op_offset = (unsigned long)ftrace_regs_caller_op_ptr; + call_offset = (unsigned long)ftrace_regs_call; } else { start_offset = (unsigned long)ftrace_caller; end_offset = (unsigned long)ftrace_epilogue; op_offset = (unsigned long)ftrace_caller_op_ptr; + call_offset = (unsigned long)ftrace_call; } size = end_offset - start_offset; @@ -823,16 +392,21 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) /* put in the new offset to the ftrace_ops */ memcpy(trampoline + op_offset, &op_ptr, OP_REF_SIZE); + /* put in the call to the function */ + mutex_lock(&text_mutex); + call_offset -= start_offset; + memcpy(trampoline + call_offset, + text_gen_insn(CALL_INSN_OPCODE, + trampoline + call_offset, + ftrace_ops_get_func(ops)), CALL_INSN_SIZE); + mutex_unlock(&text_mutex); + /* ALLOC_TRAMP flags lets us know we created it */ ops->flags |= FTRACE_OPS_FL_ALLOC_TRAMP; set_vm_flush_reset_perms(trampoline); - /* - * Module allocation needs to be completed by making the page - * executable. The page is still writable, which is a security hazard, - * but anyhow ftrace breaks W^X completely. - */ + set_memory_ro((unsigned long)trampoline, npages); set_memory_x((unsigned long)trampoline, npages); return (unsigned long)trampoline; fail: @@ -859,62 +433,54 @@ static unsigned long calc_trampoline_call_offset(bool save_regs) void arch_ftrace_update_trampoline(struct ftrace_ops *ops) { ftrace_func_t func; - unsigned char *new; unsigned long offset; unsigned long ip; unsigned int size; - int ret, npages; + const char *new; - if (ops->trampoline) { - /* - * The ftrace_ops caller may set up its own trampoline. - * In such a case, this code must not modify it. - */ - if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP)) - return; - npages = PAGE_ALIGN(ops->trampoline_size) >> PAGE_SHIFT; - set_memory_rw(ops->trampoline, npages); - } else { + if (!ops->trampoline) { ops->trampoline = create_trampoline(ops, &size); if (!ops->trampoline) return; ops->trampoline_size = size; - npages = PAGE_ALIGN(size) >> PAGE_SHIFT; + return; } + /* + * The ftrace_ops caller may set up its own trampoline. + * In such a case, this code must not modify it. + */ + if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP)) + return; + offset = calc_trampoline_call_offset(ops->flags & FTRACE_OPS_FL_SAVE_REGS); ip = ops->trampoline + offset; - func = ftrace_ops_get_func(ops); - ftrace_update_func_call = (unsigned long)func; - + mutex_lock(&text_mutex); /* Do a safe modify in case the trampoline is executing */ new = ftrace_call_replace(ip, (unsigned long)func); - ret = update_ftrace_func(ip, new); - set_memory_ro(ops->trampoline, npages); - - /* The update should never fail */ - WARN_ON(ret); + text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL); + mutex_unlock(&text_mutex); } /* Return the address of the function the trampoline calls */ static void *addr_from_call(void *ptr) { - union ftrace_code_union calc; + union text_poke_insn call; int ret; - ret = probe_kernel_read(&calc, ptr, MCOUNT_INSN_SIZE); + ret = probe_kernel_read(&call, ptr, CALL_INSN_SIZE); if (WARN_ON_ONCE(ret < 0)) return NULL; /* Make sure this is a call */ - if (WARN_ON_ONCE(calc.op != 0xe8)) { - pr_warn("Expected e8, got %x\n", calc.op); + if (WARN_ON_ONCE(call.opcode != CALL_INSN_OPCODE)) { + pr_warn("Expected E8, got %x\n", call.opcode); return NULL; } - return ptr + MCOUNT_INSN_SIZE + calc.offset; + return ptr + CALL_INSN_SIZE + call.disp; } void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, @@ -981,19 +547,18 @@ void arch_ftrace_trampoline_free(struct ftrace_ops *ops) #ifdef CONFIG_DYNAMIC_FTRACE extern void ftrace_graph_call(void); -static unsigned char *ftrace_jmp_replace(unsigned long ip, unsigned long addr) +static const char *ftrace_jmp_replace(unsigned long ip, unsigned long addr) { - return ftrace_text_replace(0xe9, ip, addr); + return text_gen_insn(JMP32_INSN_OPCODE, (void *)ip, (void *)addr); } static int ftrace_mod_jmp(unsigned long ip, void *func) { - unsigned char *new; + const char *new; - ftrace_update_func_call = 0UL; new = ftrace_jmp_replace(ip, (unsigned long)func); - - return update_ftrace_func(ip, new); + text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL); + return 0; } int ftrace_enable_ftrace_graph_caller(void) @@ -1019,10 +584,9 @@ int ftrace_disable_ftrace_graph_caller(void) void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, unsigned long frame_pointer) { + unsigned long return_hooker = (unsigned long)&return_to_handler; unsigned long old; int faulted; - unsigned long return_hooker = (unsigned long) - &return_to_handler; /* * When resuming from suspend-to-ram, this function can be indirectly diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index c1a8b9e71408..9c4498ea0b3c 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c @@ -16,15 +16,7 @@ #include <asm/alternative.h> #include <asm/text-patching.h> -union jump_code_union { - char code[JUMP_LABEL_NOP_SIZE]; - struct { - char jump; - int offset; - } __attribute__((packed)); -}; - -static void bug_at(unsigned char *ip, int line) +static void bug_at(const void *ip, int line) { /* * The location is not an op that we were expecting. @@ -35,42 +27,42 @@ static void bug_at(unsigned char *ip, int line) BUG(); } -static void __jump_label_set_jump_code(struct jump_entry *entry, - enum jump_label_type type, - union jump_code_union *code, - int init) +static const void * +__jump_label_set_jump_code(struct jump_entry *entry, enum jump_label_type type, int init) { const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP }; const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5]; - const void *expect; + const void *expect, *code; + const void *addr, *dest; int line; - code->jump = 0xe9; - code->offset = jump_entry_target(entry) - - (jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE); + addr = (void *)jump_entry_code(entry); + dest = (void *)jump_entry_target(entry); + + code = text_gen_insn(JMP32_INSN_OPCODE, addr, dest); if (init) { expect = default_nop; line = __LINE__; } else if (type == JUMP_LABEL_JMP) { expect = ideal_nop; line = __LINE__; } else { - expect = code->code; line = __LINE__; + expect = code; line = __LINE__; } - if (memcmp((void *)jump_entry_code(entry), expect, JUMP_LABEL_NOP_SIZE)) - bug_at((void *)jump_entry_code(entry), line); + if (memcmp(addr, expect, JUMP_LABEL_NOP_SIZE)) + bug_at(addr, line); if (type == JUMP_LABEL_NOP) - memcpy(code, ideal_nop, JUMP_LABEL_NOP_SIZE); + code = ideal_nop; + + return code; } -static void __ref __jump_label_transform(struct jump_entry *entry, - enum jump_label_type type, - int init) +static void inline __jump_label_transform(struct jump_entry *entry, + enum jump_label_type type, + int init) { - union jump_code_union code; - - __jump_label_set_jump_code(entry, type, &code, init); + const void *opcode = __jump_label_set_jump_code(entry, type, init); /* * As long as only a single processor is running and the code is still @@ -84,31 +76,33 @@ static void __ref __jump_label_transform(struct jump_entry *entry, * always nop being the 'currently valid' instruction */ if (init || system_state == SYSTEM_BOOTING) { - text_poke_early((void *)jump_entry_code(entry), &code, + text_poke_early((void *)jump_entry_code(entry), opcode, JUMP_LABEL_NOP_SIZE); return; } - text_poke_bp((void *)jump_entry_code(entry), &code, JUMP_LABEL_NOP_SIZE, NULL); + text_poke_bp((void *)jump_entry_code(entry), opcode, JUMP_LABEL_NOP_SIZE, NULL); } -void arch_jump_label_transform(struct jump_entry *entry, - enum jump_label_type type) +static void __ref jump_label_transform(struct jump_entry *entry, + enum jump_label_type type, + int init) { mutex_lock(&text_mutex); - __jump_label_transform(entry, type, 0); + __jump_label_transform(entry, type, init); mutex_unlock(&text_mutex); } -#define TP_VEC_MAX (PAGE_SIZE / sizeof(struct text_poke_loc)) -static struct text_poke_loc tp_vec[TP_VEC_MAX]; -static int tp_vec_nr; +void arch_jump_label_transform(struct jump_entry *entry, + enum jump_label_type type) +{ + jump_label_transform(entry, type, 0); +} bool arch_jump_label_transform_queue(struct jump_entry *entry, enum jump_label_type type) { - struct text_poke_loc *tp; - void *entry_code; + const void *opcode; if (system_state == SYSTEM_BOOTING) { /* @@ -118,53 +112,19 @@ bool arch_jump_label_transform_queue(struct jump_entry *entry, return true; } - /* - * No more space in the vector, tell upper layer to apply - * the queue before continuing. - */ - if (tp_vec_nr == TP_VEC_MAX) - return false; - - tp = &tp_vec[tp_vec_nr]; - - entry_code = (void *)jump_entry_code(entry); - - /* - * The INT3 handler will do a bsearch in the queue, so we need entries - * to be sorted. We can survive an unsorted list by rejecting the entry, - * forcing the generic jump_label code to apply the queue. Warning once, - * to raise the attention to the case of an unsorted entry that is - * better not happen, because, in the worst case we will perform in the - * same way as we do without batching - with some more overhead. - */ - if (tp_vec_nr > 0) { - int prev = tp_vec_nr - 1; - struct text_poke_loc *prev_tp = &tp_vec[prev]; - - if (WARN_ON_ONCE(prev_tp->addr > entry_code)) - return false; - } - - __jump_label_set_jump_code(entry, type, - (union jump_code_union *)&tp->text, 0); - - text_poke_loc_init(tp, entry_code, NULL, JUMP_LABEL_NOP_SIZE, NULL); - - tp_vec_nr++; - + mutex_lock(&text_mutex); + opcode = __jump_label_set_jump_code(entry, type, 0); + text_poke_queue((void *)jump_entry_code(entry), + opcode, JUMP_LABEL_NOP_SIZE, NULL); + mutex_unlock(&text_mutex); return true; } void arch_jump_label_transform_apply(void) { - if (!tp_vec_nr) - return; - mutex_lock(&text_mutex); - text_poke_bp_batch(tp_vec, tp_vec_nr); + text_poke_finish(); mutex_unlock(&text_mutex); - - tp_vec_nr = 0; } static enum { @@ -193,5 +153,5 @@ __init_or_module void arch_jump_label_transform_static(struct jump_entry *entry, jlstate = JL_STATE_NO_UPDATE; } if (jlstate == JL_STATE_UPDATE) - __jump_label_transform(entry, type, 1); + jump_label_transform(entry, type, 1); } diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 4f13af7cbcdb..579d30e91a36 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -119,14 +119,14 @@ __synthesize_relative_insn(void *dest, void *from, void *to, u8 op) /* Insert a jump instruction at address 'from', which jumps to address 'to'.*/ void synthesize_reljump(void *dest, void *from, void *to) { - __synthesize_relative_insn(dest, from, to, RELATIVEJUMP_OPCODE); + __synthesize_relative_insn(dest, from, to, JMP32_INSN_OPCODE); } NOKPROBE_SYMBOL(synthesize_reljump); /* Insert a call instruction at address 'from', which calls address 'to'.*/ void synthesize_relcall(void *dest, void *from, void *to) { - __synthesize_relative_insn(dest, from, to, RELATIVECALL_OPCODE); + __synthesize_relative_insn(dest, from, to, CALL_INSN_OPCODE); } NOKPROBE_SYMBOL(synthesize_relcall); @@ -301,7 +301,7 @@ static int can_probe(unsigned long paddr) * Another debugging subsystem might insert this breakpoint. * In that case, we can't recover it. */ - if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) + if (insn.opcode.bytes[0] == INT3_INSN_OPCODE) return 0; addr += insn.length; } @@ -356,7 +356,7 @@ int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn) return 0; /* Another subsystem puts a breakpoint, failed to recover */ - if (insn->opcode.bytes[0] == BREAKPOINT_INSTRUCTION) + if (insn->opcode.bytes[0] == INT3_INSN_OPCODE) return 0; /* We should not singlestep on the exception masking instructions */ @@ -400,14 +400,14 @@ static int prepare_boost(kprobe_opcode_t *buf, struct kprobe *p, int len = insn->length; if (can_boost(insn, p->addr) && - MAX_INSN_SIZE - len >= RELATIVEJUMP_SIZE) { + MAX_INSN_SIZE - len >= JMP32_INSN_SIZE) { /* * These instructions can be executed directly if it * jumps back to correct address. */ synthesize_reljump(buf + len, p->ainsn.insn + len, p->addr + insn->length); - len += RELATIVEJUMP_SIZE; + len += JMP32_INSN_SIZE; p->ainsn.boostable = true; } else { p->ainsn.boostable = false; @@ -501,12 +501,14 @@ int arch_prepare_kprobe(struct kprobe *p) void arch_arm_kprobe(struct kprobe *p) { - text_poke(p->addr, ((unsigned char []){BREAKPOINT_INSTRUCTION}), 1); + text_poke(p->addr, ((unsigned char []){INT3_INSN_OPCODE}), 1); + text_poke_sync(); } void arch_disarm_kprobe(struct kprobe *p) { text_poke(p->addr, &p->opcode, 1); + text_poke_sync(); } void arch_remove_kprobe(struct kprobe *p) @@ -609,7 +611,7 @@ static void setup_singlestep(struct kprobe *p, struct pt_regs *regs, regs->flags |= X86_EFLAGS_TF; regs->flags &= ~X86_EFLAGS_IF; /* single step inline if the instruction is an int3 */ - if (p->opcode == BREAKPOINT_INSTRUCTION) + if (p->opcode == INT3_INSN_OPCODE) regs->ip = (unsigned long)p->addr; else regs->ip = (unsigned long)p->ainsn.insn; @@ -695,7 +697,7 @@ int kprobe_int3_handler(struct pt_regs *regs) reset_current_kprobe(); return 1; } - } else if (*addr != BREAKPOINT_INSTRUCTION) { + } else if (*addr != INT3_INSN_OPCODE) { /* * The breakpoint instruction was removed right * after we hit it. Another cpu has removed diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 8900329c28a7..3f45b5c43a71 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -38,7 +38,7 @@ unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr) long offs; int i; - for (i = 0; i < RELATIVEJUMP_SIZE; i++) { + for (i = 0; i < JMP32_INSN_SIZE; i++) { kp = get_kprobe((void *)addr - i); /* This function only handles jump-optimized kprobe */ if (kp && kprobe_optimized(kp)) { @@ -62,10 +62,10 @@ found: if (addr == (unsigned long)kp->addr) { buf[0] = kp->opcode; - memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); + memcpy(buf + 1, op->optinsn.copied_insn, DISP32_SIZE); } else { offs = addr - (unsigned long)kp->addr - 1; - memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs); + memcpy(buf, op->optinsn.copied_insn + offs, DISP32_SIZE - offs); } return (unsigned long)buf; @@ -141,8 +141,6 @@ STACK_FRAME_NON_STANDARD(optprobe_template_func); #define TMPL_END_IDX \ ((long)optprobe_template_end - (long)optprobe_template_entry) -#define INT3_SIZE sizeof(kprobe_opcode_t) - /* Optimized kprobe call back function: called from optinsn */ static void optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) @@ -162,7 +160,7 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) regs->cs |= get_kernel_rpl(); regs->gs = 0; #endif - regs->ip = (unsigned long)op->kp.addr + INT3_SIZE; + regs->ip = (unsigned long)op->kp.addr + INT3_INSN_SIZE; regs->orig_ax = ~0UL; __this_cpu_write(current_kprobe, &op->kp); @@ -179,7 +177,7 @@ static int copy_optimized_instructions(u8 *dest, u8 *src, u8 *real) struct insn insn; int len = 0, ret; - while (len < RELATIVEJUMP_SIZE) { + while (len < JMP32_INSN_SIZE) { ret = __copy_instruction(dest + len, src + len, real + len, &insn); if (!ret || !can_boost(&insn, src + len)) return -EINVAL; @@ -271,7 +269,7 @@ static int can_optimize(unsigned long paddr) return 0; /* Check there is enough space for a relative jump. */ - if (size - offset < RELATIVEJUMP_SIZE) + if (size - offset < JMP32_INSN_SIZE) return 0; /* Decode instructions */ @@ -290,15 +288,15 @@ static int can_optimize(unsigned long paddr) kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE); insn_get_length(&insn); /* Another subsystem puts a breakpoint */ - if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) + if (insn.opcode.bytes[0] == INT3_INSN_OPCODE) return 0; /* Recover address */ insn.kaddr = (void *)addr; insn.next_byte = (void *)(addr + insn.length); /* Check any instructions don't jump into target */ if (insn_is_indirect_jump(&insn) || - insn_jump_into_range(&insn, paddr + INT3_SIZE, - RELATIVE_ADDR_SIZE)) + insn_jump_into_range(&insn, paddr + INT3_INSN_SIZE, + DISP32_SIZE)) return 0; addr += insn.length; } @@ -374,7 +372,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, * Verify if the address gap is in 2GB range, because this uses * a relative jump. */ - rel = (long)slot - (long)op->kp.addr + RELATIVEJUMP_SIZE; + rel = (long)slot - (long)op->kp.addr + JMP32_INSN_SIZE; if (abs(rel) > 0x7fffffff) { ret = -ERANGE; goto err; @@ -401,7 +399,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, /* Set returning jmp instruction at the tail of out-of-line buffer */ synthesize_reljump(buf + len, slot + len, (u8 *)op->kp.addr + op->optinsn.size); - len += RELATIVEJUMP_SIZE; + len += JMP32_INSN_SIZE; /* We have to use text_poke() for instruction buffer because it is RO */ text_poke(slot, buf, len); @@ -416,49 +414,50 @@ err: } /* - * Replace breakpoints (int3) with relative jumps. + * Replace breakpoints (INT3) with relative jumps (JMP.d32). * Caller must call with locking kprobe_mutex and text_mutex. + * + * The caller will have installed a regular kprobe and after that issued + * syncrhonize_rcu_tasks(), this ensures that the instruction(s) that live in + * the 4 bytes after the INT3 are unused and can now be overwritten. */ void arch_optimize_kprobes(struct list_head *oplist) { struct optimized_kprobe *op, *tmp; - u8 insn_buff[RELATIVEJUMP_SIZE]; + u8 insn_buff[JMP32_INSN_SIZE]; list_for_each_entry_safe(op, tmp, oplist, list) { s32 rel = (s32)((long)op->optinsn.insn - - ((long)op->kp.addr + RELATIVEJUMP_SIZE)); + ((long)op->kp.addr + JMP32_INSN_SIZE)); WARN_ON(kprobe_disabled(&op->kp)); /* Backup instructions which will be replaced by jump address */ - memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, - RELATIVE_ADDR_SIZE); + memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_INSN_SIZE, + DISP32_SIZE); - insn_buff[0] = RELATIVEJUMP_OPCODE; + insn_buff[0] = JMP32_INSN_OPCODE; *(s32 *)(&insn_buff[1]) = rel; - text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE, NULL); + text_poke_bp(op->kp.addr, insn_buff, JMP32_INSN_SIZE, NULL); list_del_init(&op->list); } } -/* Replace a relative jump with a breakpoint (int3). */ +/* + * Replace a relative jump (JMP.d32) with a breakpoint (INT3). + * + * After that, we can restore the 4 bytes after the INT3 to undo what + * arch_optimize_kprobes() scribbled. This is safe since those bytes will be + * unused once the INT3 lands. + */ void arch_unoptimize_kprobe(struct optimized_kprobe *op) { - u8 insn_buff[RELATIVEJUMP_SIZE]; - u8 emulate_buff[RELATIVEJUMP_SIZE]; - - /* Set int3 to first byte for kprobes */ - insn_buff[0] = BREAKPOINT_INSTRUCTION; - memcpy(insn_buff + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); - - emulate_buff[0] = RELATIVEJUMP_OPCODE; - *(s32 *)(&emulate_buff[1]) = (s32)((long)op->optinsn.insn - - ((long)op->kp.addr + RELATIVEJUMP_SIZE)); - - text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE, - emulate_buff); + arch_arm_kprobe(&op->kp); + text_poke(op->kp.addr + INT3_INSN_SIZE, + op->optinsn.copied_insn, DISP32_SIZE); + text_poke_sync(); } /* diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 05da6b5b167b..f19de6f45d48 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -572,15 +572,6 @@ NOKPROBE_SYMBOL(do_general_protection); dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) { -#ifdef CONFIG_DYNAMIC_FTRACE - /* - * ftrace must be first, everything else may cause a recursive crash. - * See note by declaration of modifying_ftrace_code in ftrace.c - */ - if (unlikely(atomic_read(&modifying_ftrace_code)) && - ftrace_int3_handler(regs)) - return; -#endif if (poke_int3_handler(regs)) return; diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 930edeb41ec3..e9f239e25bb0 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -874,34 +874,6 @@ void arch_remove_memory(int nid, u64 start, u64 size, int kernel_set_to_readonly __read_mostly; -void set_kernel_text_rw(void) -{ - unsigned long start = PFN_ALIGN(_text); - unsigned long size = PFN_ALIGN(_etext) - start; - - if (!kernel_set_to_readonly) - return; - - pr_debug("Set kernel text: %lx - %lx for read write\n", - start, start+size); - - set_pages_rw(virt_to_page(start), size >> PAGE_SHIFT); -} - -void set_kernel_text_ro(void) -{ - unsigned long start = PFN_ALIGN(_text); - unsigned long size = PFN_ALIGN(_etext) - start; - - if (!kernel_set_to_readonly) - return; - - pr_debug("Set kernel text: %lx - %lx for read only\n", - start, start+size); - - set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); -} - static void mark_nxdata_nx(void) { /* diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index dcb9bc961b39..b326f1440219 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1260,42 +1260,6 @@ void __init mem_init(void) int kernel_set_to_readonly; -void set_kernel_text_rw(void) -{ - unsigned long start = PFN_ALIGN(_text); - unsigned long end = PFN_ALIGN(_etext); - - if (!kernel_set_to_readonly) - return; - - pr_debug("Set kernel text: %lx - %lx for read write\n", - start, end); - - /* - * Make the kernel identity mapping for text RW. Kernel text - * mapping will always be RO. Refer to the comment in - * static_protections() in pageattr.c - */ - set_memory_rw(start, (end - start) >> PAGE_SHIFT); -} - -void set_kernel_text_ro(void) -{ - unsigned long start = PFN_ALIGN(_text); - unsigned long end = PFN_ALIGN(_etext); - - if (!kernel_set_to_readonly) - return; - - pr_debug("Set kernel text: %lx - %lx for read only\n", - start, end); - - /* - * Set the kernel identity mapping for text RO. - */ - set_memory_ro(start, (end - start) >> PAGE_SHIFT); -} - void mark_rodata_ro(void) { unsigned long start = PFN_ALIGN(_text); diff --git a/drivers/infiniband/hw/hfi1/trace_tid.h b/drivers/infiniband/hw/hfi1/trace_tid.h index 343fb9894a82..985ffa9cc958 100644 --- a/drivers/infiniband/hw/hfi1/trace_tid.h +++ b/drivers/infiniband/hw/hfi1/trace_tid.h @@ -138,10 +138,10 @@ TRACE_EVENT(/* put_tid */ TP_ARGS(dd, index, type, pa, order), TP_STRUCT__entry(/* entry */ DD_DEV_ENTRY(dd) - __field(unsigned long, pa); - __field(u32, index); - __field(u32, type); - __field(u16, order); + __field(unsigned long, pa) + __field(u32, index) + __field(u32, type) + __field(u16, order) ), TP_fast_assign(/* assign */ DD_DEV_ASSIGN(dd); diff --git a/drivers/infiniband/hw/hfi1/trace_tx.h b/drivers/infiniband/hw/hfi1/trace_tx.h index 09eb0c9ada00..769e5e4710c6 100644 --- a/drivers/infiniband/hw/hfi1/trace_tx.h +++ b/drivers/infiniband/hw/hfi1/trace_tx.h @@ -588,7 +588,7 @@ TRACE_EVENT(hfi1_sdma_user_reqinfo, TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 *i), TP_ARGS(dd, ctxt, subctxt, i), TP_STRUCT__entry( - DD_DEV_ENTRY(dd); + DD_DEV_ENTRY(dd) __field(u16, ctxt) __field(u8, subctxt) __field(u8, ver_opcode) diff --git a/drivers/lightnvm/pblk-trace.h b/drivers/lightnvm/pblk-trace.h index 9534503b69d9..47b67c6bff7a 100644 --- a/drivers/lightnvm/pblk-trace.h +++ b/drivers/lightnvm/pblk-trace.h @@ -46,7 +46,7 @@ TRACE_EVENT(pblk_chunk_reset, TP_STRUCT__entry( __string(name, name) __field(u64, ppa) - __field(int, state); + __field(int, state) ), TP_fast_assign( @@ -72,7 +72,7 @@ TRACE_EVENT(pblk_chunk_state, TP_STRUCT__entry( __string(name, name) __field(u64, ppa) - __field(int, state); + __field(int, state) ), TP_fast_assign( @@ -98,7 +98,7 @@ TRACE_EVENT(pblk_line_state, TP_STRUCT__entry( __string(name, name) __field(int, line) - __field(int, state); + __field(int, state) ), TP_fast_assign( @@ -121,7 +121,7 @@ TRACE_EVENT(pblk_state, TP_STRUCT__entry( __string(name, name) - __field(int, state); + __field(int, state) ), TP_fast_assign( diff --git a/drivers/net/fjes/fjes_trace.h b/drivers/net/fjes/fjes_trace.h index c611b6a80b20..9237b69d8e21 100644 --- a/drivers/net/fjes/fjes_trace.h +++ b/drivers/net/fjes/fjes_trace.h @@ -28,7 +28,7 @@ TRACE_EVENT(fjes_hw_issue_request_command, __field(u8, cs_busy) __field(u8, cs_complete) __field(int, timeout) - __field(int, ret); + __field(int, ret) ), TP_fast_assign( __entry->cr_req = cr->bits.req_code; diff --git a/drivers/net/wireless/ath/ath10k/trace.h b/drivers/net/wireless/ath/ath10k/trace.h index ab916459d237..842e42ec814f 100644 --- a/drivers/net/wireless/ath/ath10k/trace.h +++ b/drivers/net/wireless/ath/ath10k/trace.h @@ -239,7 +239,7 @@ TRACE_EVENT(ath10k_wmi_dbglog, TP_STRUCT__entry( __string(device, dev_name(ar->dev)) __string(driver, dev_driver_string(ar->dev)) - __field(u8, hw_type); + __field(u8, hw_type) __field(size_t, buf_len) __dynamic_array(u8, buf, buf_len) ), @@ -269,7 +269,7 @@ TRACE_EVENT(ath10k_htt_pktlog, TP_STRUCT__entry( __string(device, dev_name(ar->dev)) __string(driver, dev_driver_string(ar->dev)) - __field(u8, hw_type); + __field(u8, hw_type) __field(u16, buf_len) __dynamic_array(u8, pktlog, buf_len) ), @@ -435,7 +435,7 @@ TRACE_EVENT(ath10k_htt_rx_desc, TP_STRUCT__entry( __string(device, dev_name(ar->dev)) __string(driver, dev_driver_string(ar->dev)) - __field(u8, hw_type); + __field(u8, hw_type) __field(u16, len) __dynamic_array(u8, rxdesc, len) ), diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index 3362bae28b46..096203119934 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -329,7 +329,7 @@ TRACE_EVENT(xchk_btree_op_error, __field(int, level) __field(xfs_agnumber_t, agno) __field(xfs_agblock_t, bno) - __field(int, ptr); + __field(int, ptr) __field(int, error) __field(void *, ret_ip) ), @@ -414,7 +414,7 @@ TRACE_EVENT(xchk_btree_error, __field(int, level) __field(xfs_agnumber_t, agno) __field(xfs_agblock_t, bno) - __field(int, ptr); + __field(int, ptr) __field(void *, ret_ip) ), TP_fast_assign( @@ -452,7 +452,7 @@ TRACE_EVENT(xchk_ifork_btree_error, __field(int, level) __field(xfs_agnumber_t, agno) __field(xfs_agblock_t, bno) - __field(int, ptr); + __field(int, ptr) __field(void *, ret_ip) ), TP_fast_assign( diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index a86be7f807ee..e242988f57fb 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -218,8 +218,8 @@ DECLARE_EVENT_CLASS(xfs_bmap_class, TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ino) - __field(void *, leaf); - __field(int, pos); + __field(void *, leaf) + __field(int, pos) __field(xfs_fileoff_t, startoff) __field(xfs_fsblock_t, startblock) __field(xfs_filblks_t, blockcount) diff --git a/include/linux/module.h b/include/linux/module.h index bd165ba68617..0c7366c317bd 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -849,13 +849,9 @@ extern int module_sysfs_initialized; #define __MODULE_STRING(x) __stringify(x) #ifdef CONFIG_STRICT_MODULE_RWX -extern void set_all_modules_text_rw(void); -extern void set_all_modules_text_ro(void); extern void module_enable_ro(const struct module *mod, bool after_init); extern void module_disable_ro(const struct module *mod); #else -static inline void set_all_modules_text_rw(void) { } -static inline void set_all_modules_text_ro(void) { } static inline void module_enable_ro(const struct module *mod, bool after_init) { } static inline void module_disable_ro(const struct module *mod) { } #endif diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 4c6e15605766..13ea7f7d54ac 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -192,6 +192,22 @@ enum trace_reg { struct trace_event_call; +#define TRACE_FUNCTION_TYPE ((const char *)~0UL) + +struct trace_event_fields { + const char *type; + union { + struct { + const char *name; + const int size; + const int align; + const int is_signed; + const int filter_type; + }; + int (*define_fields)(struct trace_event_call *); + }; +}; + struct trace_event_class { const char *system; void *probe; @@ -200,7 +216,7 @@ struct trace_event_class { #endif int (*reg)(struct trace_event_call *event, enum trace_reg type, void *data); - int (*define_fields)(struct trace_event_call *); + struct trace_event_fields *fields_array; struct list_head *(*get_fields)(struct trace_event_call *); struct list_head fields; int (*raw_init)(struct trace_event_call *); diff --git a/include/trace/events/filemap.h b/include/trace/events/filemap.h index ee05db7ee8d2..796053e162d2 100644 --- a/include/trace/events/filemap.h +++ b/include/trace/events/filemap.h @@ -85,7 +85,7 @@ TRACE_EVENT(file_check_and_advance_wb_err, TP_ARGS(file, old), TP_STRUCT__entry( - __field(struct file *, file); + __field(struct file *, file) __field(unsigned long, i_ino) __field(dev_t, s_dev) __field(errseq_t, old) diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h index 472b33d23a10..96d77e5e0664 100644 --- a/include/trace/trace_events.h +++ b/include/trace/trace_events.h @@ -400,22 +400,16 @@ static struct trace_event_functions trace_event_type_funcs_##call = { \ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) #undef __field_ext -#define __field_ext(type, item, filter_type) \ - ret = trace_define_field(event_call, #type, #item, \ - offsetof(typeof(field), item), \ - sizeof(field.item), \ - is_signed_type(type), filter_type); \ - if (ret) \ - return ret; +#define __field_ext(_type, _item, _filter_type) { \ + .type = #_type, .name = #_item, \ + .size = sizeof(_type), .align = __alignof__(_type), \ + .is_signed = is_signed_type(_type), .filter_type = _filter_type }, #undef __field_struct_ext -#define __field_struct_ext(type, item, filter_type) \ - ret = trace_define_field(event_call, #type, #item, \ - offsetof(typeof(field), item), \ - sizeof(field.item), \ - 0, filter_type); \ - if (ret) \ - return ret; +#define __field_struct_ext(_type, _item, _filter_type) { \ + .type = #_type, .name = #_item, \ + .size = sizeof(_type), .align = __alignof__(_type), \ + 0, .filter_type = _filter_type }, #undef __field #define __field(type, item) __field_ext(type, item, FILTER_OTHER) @@ -424,25 +418,16 @@ static struct trace_event_functions trace_event_type_funcs_##call = { \ #define __field_struct(type, item) __field_struct_ext(type, item, FILTER_OTHER) #undef __array -#define __array(type, item, len) \ - do { \ - char *type_str = #type"["__stringify(len)"]"; \ - BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ - BUILD_BUG_ON(len <= 0); \ - ret = trace_define_field(event_call, type_str, #item, \ - offsetof(typeof(field), item), \ - sizeof(field.item), \ - is_signed_type(type), FILTER_OTHER); \ - if (ret) \ - return ret; \ - } while (0); +#define __array(_type, _item, _len) { \ + .type = #_type"["__stringify(_len)"]", .name = #_item, \ + .size = sizeof(_type[_len]), .align = __alignof__(_type), \ + .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER }, #undef __dynamic_array -#define __dynamic_array(type, item, len) \ - ret = trace_define_field(event_call, "__data_loc " #type "[]", #item, \ - offsetof(typeof(field), __data_loc_##item), \ - sizeof(field.__data_loc_##item), \ - is_signed_type(type), FILTER_OTHER); +#define __dynamic_array(_type, _item, _len) { \ + .type = "__data_loc " #_type "[]", .name = #_item, \ + .size = 4, .align = 4, \ + .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER }, #undef __string #define __string(item, src) __dynamic_array(char, item, -1) @@ -452,16 +437,9 @@ static struct trace_event_functions trace_event_type_funcs_##call = { \ #undef DECLARE_EVENT_CLASS #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print) \ -static int notrace __init \ -trace_event_define_fields_##call(struct trace_event_call *event_call) \ -{ \ - struct trace_event_raw_##call field; \ - int ret; \ - \ - tstruct; \ - \ - return ret; \ -} +static struct trace_event_fields trace_event_fields_##call[] = { \ + tstruct \ + {} }; #undef DEFINE_EVENT #define DEFINE_EVENT(template, name, proto, args) @@ -619,7 +597,7 @@ static inline notrace int trace_event_get_offsets_##call( \ * * static struct trace_event_class __used event_class_<template> = { * .system = "<system>", - * .define_fields = trace_event_define_fields_<call>, + * .fields_array = trace_event_fields_<call>, * .fields = LIST_HEAD_INIT(event_class_##call.fields), * .raw_init = trace_event_raw_init, * .probe = trace_event_raw_event_##call, @@ -768,7 +746,7 @@ _TRACE_PERF_PROTO(call, PARAMS(proto)); \ static char print_fmt_##call[] = print; \ static struct trace_event_class __used __refdata event_class_##call = { \ .system = TRACE_SYSTEM_STRING, \ - .define_fields = trace_event_define_fields_##call, \ + .fields_array = trace_event_fields_##call, \ .fields = LIST_HEAD_INIT(event_class_##call.fields),\ .raw_init = trace_event_raw_init, \ .probe = trace_event_raw_event_##call, \ diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 53534aa258a6..34e28b236d68 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -510,6 +510,8 @@ static void do_unoptimize_kprobes(void) arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list); /* Loop free_list for disarming */ list_for_each_entry_safe(op, tmp, &freeing_list, list) { + /* Switching from detour code to origin */ + op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; /* Disarm probes if marked disabled */ if (kprobe_disabled(&op->kp)) arch_disarm_kprobe(&op->kp); @@ -649,6 +651,7 @@ static void force_unoptimize_kprobe(struct optimized_kprobe *op) { lockdep_assert_cpus_held(); arch_unoptimize_kprobe(op); + op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; if (kprobe_disabled(&op->kp)) arch_disarm_kprobe(&op->kp); } @@ -676,7 +679,6 @@ static void unoptimize_kprobe(struct kprobe *p, bool force) return; } - op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; if (!list_empty(&op->list)) { /* Dequeue from the optimization queue */ list_del_init(&op->list); diff --git a/kernel/module.c b/kernel/module.c index b56f3224b161..ac058a5ad1d1 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2031,49 +2031,6 @@ static void module_enable_nx(const struct module *mod) frob_writable_data(&mod->init_layout, set_memory_nx); } -/* Iterate through all modules and set each module's text as RW */ -void set_all_modules_text_rw(void) -{ - struct module *mod; - - if (!rodata_enabled) - return; - - mutex_lock(&module_mutex); - list_for_each_entry_rcu(mod, &modules, list) { - if (mod->state == MODULE_STATE_UNFORMED) - continue; - - frob_text(&mod->core_layout, set_memory_rw); - frob_text(&mod->init_layout, set_memory_rw); - } - mutex_unlock(&module_mutex); -} - -/* Iterate through all modules and set each module's text as RO */ -void set_all_modules_text_ro(void) -{ - struct module *mod; - - if (!rodata_enabled) - return; - - mutex_lock(&module_mutex); - list_for_each_entry_rcu(mod, &modules, list) { - /* - * Ignore going modules since it's possible that ro - * protection has already been disabled, otherwise we'll - * run into protection faults at module deallocation. - */ - if (mod->state == MODULE_STATE_UNFORMED || - mod->state == MODULE_STATE_GOING) - continue; - - frob_text(&mod->core_layout, set_memory_ro); - frob_text(&mod->init_layout, set_memory_ro); - } - mutex_unlock(&module_mutex); -} #else /* !CONFIG_STRICT_MODULE_RWX */ static void module_enable_nx(const struct module *mod) { } #endif /* CONFIG_STRICT_MODULE_RWX */ diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 63bf60f79398..a98dce1b3334 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -52,6 +52,9 @@ enum trace_type { #undef __field #define __field(type, item) type item; +#undef __field_fn +#define __field_fn(type, item) type item; + #undef __field_struct #define __field_struct(type, item) __field(type, item) @@ -71,26 +74,22 @@ enum trace_type { #define F_STRUCT(args...) args #undef FTRACE_ENTRY -#define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter) \ +#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ struct struct_name { \ struct trace_entry ent; \ tstruct \ } #undef FTRACE_ENTRY_DUP -#define FTRACE_ENTRY_DUP(name, name_struct, id, tstruct, printk, filter) +#define FTRACE_ENTRY_DUP(name, name_struct, id, tstruct, printk) #undef FTRACE_ENTRY_REG -#define FTRACE_ENTRY_REG(name, struct_name, id, tstruct, print, \ - filter, regfn) \ - FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \ - filter) +#define FTRACE_ENTRY_REG(name, struct_name, id, tstruct, print, regfn) \ + FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print)) #undef FTRACE_ENTRY_PACKED -#define FTRACE_ENTRY_PACKED(name, struct_name, id, tstruct, print, \ - filter) \ - FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \ - filter) __packed +#define FTRACE_ENTRY_PACKED(name, struct_name, id, tstruct, print) \ + FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print)) __packed #include "trace_entries.h" @@ -1917,17 +1916,15 @@ extern void tracing_log_err(struct trace_array *tr, #define internal_trace_puts(str) __trace_puts(_THIS_IP_, str, strlen(str)) #undef FTRACE_ENTRY -#define FTRACE_ENTRY(call, struct_name, id, tstruct, print, filter) \ +#define FTRACE_ENTRY(call, struct_name, id, tstruct, print) \ extern struct trace_event_call \ __aligned(4) event_##call; #undef FTRACE_ENTRY_DUP -#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print, filter) \ - FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print), \ - filter) +#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print) \ + FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print)) #undef FTRACE_ENTRY_PACKED -#define FTRACE_ENTRY_PACKED(call, struct_name, id, tstruct, print, filter) \ - FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print), \ - filter) +#define FTRACE_ENTRY_PACKED(call, struct_name, id, tstruct, print) \ + FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print)) #include "trace_entries.h" diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index fc8e97328e54..3e9d81608284 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -61,15 +61,13 @@ FTRACE_ENTRY_REG(function, ftrace_entry, TRACE_FN, F_STRUCT( - __field( unsigned long, ip ) - __field( unsigned long, parent_ip ) + __field_fn( unsigned long, ip ) + __field_fn( unsigned long, parent_ip ) ), F_printk(" %ps <-- %ps", (void *)__entry->ip, (void *)__entry->parent_ip), - FILTER_TRACE_FN, - perf_ftrace_event_register ); @@ -84,9 +82,7 @@ FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry, __field_desc( int, graph_ent, depth ) ), - F_printk("--> %ps (%d)", (void *)__entry->func, __entry->depth), - - FILTER_OTHER + F_printk("--> %ps (%d)", (void *)__entry->func, __entry->depth) ); /* Function return entry */ @@ -97,18 +93,16 @@ FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry, F_STRUCT( __field_struct( struct ftrace_graph_ret, ret ) __field_desc( unsigned long, ret, func ) + __field_desc( unsigned long, ret, overrun ) __field_desc( unsigned long long, ret, calltime) __field_desc( unsigned long long, ret, rettime ) - __field_desc( unsigned long, ret, overrun ) __field_desc( int, ret, depth ) ), F_printk("<-- %ps (%d) (start: %llx end: %llx) over: %d", (void *)__entry->func, __entry->depth, __entry->calltime, __entry->rettime, - __entry->depth), - - FILTER_OTHER + __entry->depth) ); /* @@ -137,9 +131,7 @@ FTRACE_ENTRY(context_switch, ctx_switch_entry, F_printk("%u:%u:%u ==> %u:%u:%u [%03u]", __entry->prev_pid, __entry->prev_prio, __entry->prev_state, __entry->next_pid, __entry->next_prio, __entry->next_state, - __entry->next_cpu), - - FILTER_OTHER + __entry->next_cpu) ); /* @@ -157,9 +149,7 @@ FTRACE_ENTRY_DUP(wakeup, ctx_switch_entry, F_printk("%u:%u:%u ==+ %u:%u:%u [%03u]", __entry->prev_pid, __entry->prev_prio, __entry->prev_state, __entry->next_pid, __entry->next_prio, __entry->next_state, - __entry->next_cpu), - - FILTER_OTHER + __entry->next_cpu) ); /* @@ -183,9 +173,7 @@ FTRACE_ENTRY(kernel_stack, stack_entry, (void *)__entry->caller[0], (void *)__entry->caller[1], (void *)__entry->caller[2], (void *)__entry->caller[3], (void *)__entry->caller[4], (void *)__entry->caller[5], - (void *)__entry->caller[6], (void *)__entry->caller[7]), - - FILTER_OTHER + (void *)__entry->caller[6], (void *)__entry->caller[7]) ); FTRACE_ENTRY(user_stack, userstack_entry, @@ -203,9 +191,7 @@ FTRACE_ENTRY(user_stack, userstack_entry, (void *)__entry->caller[0], (void *)__entry->caller[1], (void *)__entry->caller[2], (void *)__entry->caller[3], (void *)__entry->caller[4], (void *)__entry->caller[5], - (void *)__entry->caller[6], (void *)__entry->caller[7]), - - FILTER_OTHER + (void *)__entry->caller[6], (void *)__entry->caller[7]) ); /* @@ -222,9 +208,7 @@ FTRACE_ENTRY(bprint, bprint_entry, ), F_printk("%ps: %s", - (void *)__entry->ip, __entry->fmt), - - FILTER_OTHER + (void *)__entry->ip, __entry->fmt) ); FTRACE_ENTRY_REG(print, print_entry, @@ -239,8 +223,6 @@ FTRACE_ENTRY_REG(print, print_entry, F_printk("%ps: %s", (void *)__entry->ip, __entry->buf), - FILTER_OTHER, - ftrace_event_register ); @@ -254,9 +236,7 @@ FTRACE_ENTRY(raw_data, raw_data_entry, ), F_printk("id:%04x %08x", - __entry->id, (int)__entry->buf[0]), - - FILTER_OTHER + __entry->id, (int)__entry->buf[0]) ); FTRACE_ENTRY(bputs, bputs_entry, @@ -269,9 +249,7 @@ FTRACE_ENTRY(bputs, bputs_entry, ), F_printk("%ps: %s", - (void *)__entry->ip, __entry->str), - - FILTER_OTHER + (void *)__entry->ip, __entry->str) ); FTRACE_ENTRY(mmiotrace_rw, trace_mmiotrace_rw, @@ -283,16 +261,14 @@ FTRACE_ENTRY(mmiotrace_rw, trace_mmiotrace_rw, __field_desc( resource_size_t, rw, phys ) __field_desc( unsigned long, rw, value ) __field_desc( unsigned long, rw, pc ) - __field_desc( int, rw, map_id ) + __field_desc( int, rw, map_id ) __field_desc( unsigned char, rw, opcode ) __field_desc( unsigned char, rw, width ) ), F_printk("%lx %lx %lx %d %x %x", (unsigned long)__entry->phys, __entry->value, __entry->pc, - __entry->map_id, __entry->opcode, __entry->width), - - FILTER_OTHER + __entry->map_id, __entry->opcode, __entry->width) ); FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map, @@ -304,15 +280,13 @@ FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map, __field_desc( resource_size_t, map, phys ) __field_desc( unsigned long, map, virt ) __field_desc( unsigned long, map, len ) - __field_desc( int, map, map_id ) + __field_desc( int, map, map_id ) __field_desc( unsigned char, map, opcode ) ), F_printk("%lx %lx %lx %d %x", (unsigned long)__entry->phys, __entry->virt, __entry->len, - __entry->map_id, __entry->opcode), - - FILTER_OTHER + __entry->map_id, __entry->opcode) ); @@ -334,9 +308,7 @@ FTRACE_ENTRY(branch, trace_branch, F_printk("%u:%s:%s (%u)%s", __entry->line, __entry->func, __entry->file, __entry->correct, - __entry->constant ? " CONSTANT" : ""), - - FILTER_OTHER + __entry->constant ? " CONSTANT" : "") ); @@ -362,7 +334,5 @@ FTRACE_ENTRY(hwlat, hwlat_entry, __entry->duration, __entry->outer_duration, __entry->nmi_total_ts, - __entry->nmi_count), - - FILTER_OTHER + __entry->nmi_count) ); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index a5b614cc3887..c8622a44d300 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -24,6 +24,7 @@ #include <linux/delay.h> #include <trace/events/sched.h> +#include <trace/syscall.h> #include <asm/setup.h> @@ -2017,7 +2018,24 @@ event_create_dir(struct dentry *parent, struct trace_event_file *file) */ head = trace_get_fields(call); if (list_empty(head)) { - ret = call->class->define_fields(call); + struct trace_event_fields *field = call->class->fields_array; + unsigned int offset = sizeof(struct trace_entry); + + for (; field->type; field++) { + if (field->type == TRACE_FUNCTION_TYPE) { + ret = field->define_fields(call); + break; + } + + offset = ALIGN(offset, field->align); + ret = trace_define_field(call, field->type, field->name, + offset, field->size, + field->is_signed, field->filter_type); + if (ret) + break; + + offset += field->size; + } if (ret < 0) { pr_warn("Could not initialize trace point events/%s\n", name); diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index f62de5f43e79..ce376c78439f 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -1154,6 +1154,12 @@ static struct synth_event *find_synth_event(const char *name) return NULL; } +static struct trace_event_fields synth_event_fields_array[] = { + { .type = TRACE_FUNCTION_TYPE, + .define_fields = synth_event_define_fields }, + {} +}; + static int register_synth_event(struct synth_event *event) { struct trace_event_call *call = &event->call; @@ -1175,7 +1181,7 @@ static int register_synth_event(struct synth_event *event) INIT_LIST_HEAD(&call->class->fields); call->event.funcs = &synth_event_funcs; - call->class->define_fields = synth_event_define_fields; + call->class->fields_array = synth_event_fields_array; ret = register_trace_event(&call->event); if (!ret) { diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 2e6d2e9741cc..77ce5a3b6773 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -29,10 +29,8 @@ static int ftrace_event_register(struct trace_event_call *call, * function and thus become accesible via perf. */ #undef FTRACE_ENTRY_REG -#define FTRACE_ENTRY_REG(name, struct_name, id, tstruct, print, \ - filter, regfn) \ - FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \ - filter) +#define FTRACE_ENTRY_REG(name, struct_name, id, tstruct, print, regfn) \ + FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print)) /* not needed for this file */ #undef __field_struct @@ -41,6 +39,9 @@ static int ftrace_event_register(struct trace_event_call *call, #undef __field #define __field(type, item) type item; +#undef __field_fn +#define __field_fn(type, item) type item; + #undef __field_desc #define __field_desc(type, container, item) type item; @@ -60,7 +61,7 @@ static int ftrace_event_register(struct trace_event_call *call, #define F_printk(fmt, args...) fmt, args #undef FTRACE_ENTRY -#define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter) \ +#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ struct ____ftrace_##name { \ tstruct \ }; \ @@ -73,76 +74,46 @@ static void __always_unused ____ftrace_check_##name(void) \ } #undef FTRACE_ENTRY_DUP -#define FTRACE_ENTRY_DUP(name, struct_name, id, tstruct, print, filter) \ - FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \ - filter) +#define FTRACE_ENTRY_DUP(name, struct_name, id, tstruct, print) \ + FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print)) #include "trace_entries.h" +#undef __field_ext +#define __field_ext(_type, _item, _filter_type) { \ + .type = #_type, .name = #_item, \ + .size = sizeof(_type), .align = __alignof__(_type), \ + is_signed_type(_type), .filter_type = _filter_type }, + #undef __field -#define __field(type, item) \ - ret = trace_define_field(event_call, #type, #item, \ - offsetof(typeof(field), item), \ - sizeof(field.item), \ - is_signed_type(type), filter_type); \ - if (ret) \ - return ret; +#define __field(_type, _item) __field_ext(_type, _item, FILTER_OTHER) + +#undef __field_fn +#define __field_fn(_type, _item) __field_ext(_type, _item, FILTER_TRACE_FN) #undef __field_desc -#define __field_desc(type, container, item) \ - ret = trace_define_field(event_call, #type, #item, \ - offsetof(typeof(field), \ - container.item), \ - sizeof(field.container.item), \ - is_signed_type(type), filter_type); \ - if (ret) \ - return ret; +#define __field_desc(_type, _container, _item) __field_ext(_type, _item, FILTER_OTHER) #undef __array -#define __array(type, item, len) \ - do { \ - char *type_str = #type"["__stringify(len)"]"; \ - BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ - ret = trace_define_field(event_call, type_str, #item, \ - offsetof(typeof(field), item), \ - sizeof(field.item), \ - is_signed_type(type), filter_type); \ - if (ret) \ - return ret; \ - } while (0); +#define __array(_type, _item, _len) { \ + .type = #_type"["__stringify(_len)"]", .name = #_item, \ + .size = sizeof(_type[_len]), .align = __alignof__(_type), \ + is_signed_type(_type), .filter_type = FILTER_OTHER }, #undef __array_desc -#define __array_desc(type, container, item, len) \ - BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ - ret = trace_define_field(event_call, #type "[" #len "]", #item, \ - offsetof(typeof(field), \ - container.item), \ - sizeof(field.container.item), \ - is_signed_type(type), filter_type); \ - if (ret) \ - return ret; +#define __array_desc(_type, _container, _item, _len) __array(_type, _item, _len) #undef __dynamic_array -#define __dynamic_array(type, item) \ - ret = trace_define_field(event_call, #type "[]", #item, \ - offsetof(typeof(field), item), \ - 0, is_signed_type(type), filter_type);\ - if (ret) \ - return ret; +#define __dynamic_array(_type, _item) { \ + .type = #_type "[]", .name = #_item, \ + .size = 0, .align = __alignof__(_type), \ + is_signed_type(_type), .filter_type = FILTER_OTHER }, #undef FTRACE_ENTRY -#define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter) \ -static int __init \ -ftrace_define_fields_##name(struct trace_event_call *event_call) \ -{ \ - struct struct_name field; \ - int ret; \ - int filter_type = filter; \ - \ - tstruct; \ - \ - return ret; \ -} +#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ +static struct trace_event_fields ftrace_event_fields_##name[] = { \ + tstruct \ + {} }; #include "trace_entries.h" @@ -152,6 +123,9 @@ ftrace_define_fields_##name(struct trace_event_call *event_call) \ #undef __field #define __field(type, item) +#undef __field_fn +#define __field_fn(type, item) + #undef __field_desc #define __field_desc(type, container, item) @@ -168,12 +142,10 @@ ftrace_define_fields_##name(struct trace_event_call *event_call) \ #define F_printk(fmt, args...) __stringify(fmt) ", " __stringify(args) #undef FTRACE_ENTRY_REG -#define FTRACE_ENTRY_REG(call, struct_name, etype, tstruct, print, filter,\ - regfn) \ - \ +#define FTRACE_ENTRY_REG(call, struct_name, etype, tstruct, print, regfn) \ static struct trace_event_class __refdata event_class_ftrace_##call = { \ .system = __stringify(TRACE_SYSTEM), \ - .define_fields = ftrace_define_fields_##call, \ + .fields_array = ftrace_event_fields_##call, \ .fields = LIST_HEAD_INIT(event_class_ftrace_##call.fields),\ .reg = regfn, \ }; \ @@ -191,9 +163,9 @@ static struct trace_event_call __used \ __attribute__((section("_ftrace_events"))) *__event_##call = &event_##call; #undef FTRACE_ENTRY -#define FTRACE_ENTRY(call, struct_name, etype, tstruct, print, filter) \ +#define FTRACE_ENTRY(call, struct_name, etype, tstruct, print) \ FTRACE_ENTRY_REG(call, struct_name, etype, \ - PARAMS(tstruct), PARAMS(print), filter, NULL) + PARAMS(tstruct), PARAMS(print), NULL) bool ftrace_event_is_function(struct trace_event_call *call) { diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 7f890262c8a3..cbdc4f4e64c7 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1555,16 +1555,28 @@ static struct trace_event_functions kprobe_funcs = { .trace = print_kprobe_event }; +static struct trace_event_fields kretprobe_fields_array[] = { + { .type = TRACE_FUNCTION_TYPE, + .define_fields = kretprobe_event_define_fields }, + {} +}; + +static struct trace_event_fields kprobe_fields_array[] = { + { .type = TRACE_FUNCTION_TYPE, + .define_fields = kprobe_event_define_fields }, + {} +}; + static inline void init_trace_event_call(struct trace_kprobe *tk) { struct trace_event_call *call = trace_probe_event_call(&tk->tp); if (trace_kprobe_is_return(tk)) { call->event.funcs = &kretprobe_funcs; - call->class->define_fields = kretprobe_event_define_fields; + call->class->fields_array = kretprobe_fields_array; } else { call->event.funcs = &kprobe_funcs; - call->class->define_fields = kprobe_event_define_fields; + call->class->fields_array = kprobe_fields_array; } call->flags = TRACE_EVENT_FL_KPROBE; diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 16fa218556fa..73140d80dd46 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -203,11 +203,10 @@ print_syscall_exit(struct trace_iterator *iter, int flags, extern char *__bad_type_size(void); -#define SYSCALL_FIELD(type, field, name) \ - sizeof(type) != sizeof(trace.field) ? \ - __bad_type_size() : \ - #type, #name, offsetof(typeof(trace), field), \ - sizeof(trace.field), is_signed_type(type) +#define SYSCALL_FIELD(_type, _name) { \ + .type = #_type, .name = #_name, \ + .size = sizeof(_type), .align = __alignof__(_type), \ + .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER } static int __init __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len) @@ -274,42 +273,22 @@ static int __init syscall_enter_define_fields(struct trace_event_call *call) { struct syscall_trace_enter trace; struct syscall_metadata *meta = call->data; - int ret; - int i; int offset = offsetof(typeof(trace), args); - - ret = trace_define_field(call, SYSCALL_FIELD(int, nr, __syscall_nr), - FILTER_OTHER); - if (ret) - return ret; + int ret, i; for (i = 0; i < meta->nb_args; i++) { ret = trace_define_field(call, meta->types[i], meta->args[i], offset, sizeof(unsigned long), 0, FILTER_OTHER); + if (ret) + break; offset += sizeof(unsigned long); } return ret; } -static int __init syscall_exit_define_fields(struct trace_event_call *call) -{ - struct syscall_trace_exit trace; - int ret; - - ret = trace_define_field(call, SYSCALL_FIELD(int, nr, __syscall_nr), - FILTER_OTHER); - if (ret) - return ret; - - ret = trace_define_field(call, SYSCALL_FIELD(long, ret, ret), - FILTER_OTHER); - - return ret; -} - static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) { struct trace_array *tr = data; @@ -507,6 +486,13 @@ static int __init init_syscall_trace(struct trace_event_call *call) return id; } +static struct trace_event_fields __refdata syscall_enter_fields_array[] = { + SYSCALL_FIELD(int, __syscall_nr), + { .type = TRACE_FUNCTION_TYPE, + .define_fields = syscall_enter_define_fields }, + {} +}; + struct trace_event_functions enter_syscall_print_funcs = { .trace = print_syscall_enter, }; @@ -518,7 +504,7 @@ struct trace_event_functions exit_syscall_print_funcs = { struct trace_event_class __refdata event_class_syscall_enter = { .system = "syscalls", .reg = syscall_enter_register, - .define_fields = syscall_enter_define_fields, + .fields_array = syscall_enter_fields_array, .get_fields = syscall_get_enter_fields, .raw_init = init_syscall_trace, }; @@ -526,7 +512,11 @@ struct trace_event_class __refdata event_class_syscall_enter = { struct trace_event_class __refdata event_class_syscall_exit = { .system = "syscalls", .reg = syscall_exit_register, - .define_fields = syscall_exit_define_fields, + .fields_array = (struct trace_event_fields[]){ + SYSCALL_FIELD(int, __syscall_nr), + SYSCALL_FIELD(long, ret), + {} + }, .fields = LIST_HEAD_INIT(event_class_syscall_exit.fields), .raw_init = init_syscall_trace, }; diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 352073d36585..476a382f1f1b 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -1507,12 +1507,17 @@ static struct trace_event_functions uprobe_funcs = { .trace = print_uprobe_event }; +static struct trace_event_fields uprobe_fields_array[] = { + { .type = TRACE_FUNCTION_TYPE, + .define_fields = uprobe_event_define_fields }, + {} +}; + static inline void init_trace_event_call(struct trace_uprobe *tu) { struct trace_event_call *call = trace_probe_event_call(&tu->tp); - call->event.funcs = &uprobe_funcs; - call->class->define_fields = uprobe_event_define_fields; + call->class->fields_array = uprobe_fields_array; call->flags = TRACE_EVENT_FL_UPROBE | TRACE_EVENT_FL_CAP_ANY; call->class->reg = trace_uprobe_register; diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 4768322dc202..427f51a0a994 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -408,20 +408,20 @@ TRACE_EVENT(drv_bss_info_changed, __field(u32, basic_rates) __array(int, mcast_rate, NUM_NL80211_BANDS) __field(u16, ht_operation_mode) - __field(s32, cqm_rssi_thold); - __field(s32, cqm_rssi_hyst); - __field(u32, channel_width); - __field(u32, channel_cfreq1); + __field(s32, cqm_rssi_thold) + __field(s32, cqm_rssi_hyst) + __field(u32, channel_width) + __field(u32, channel_cfreq1) __dynamic_array(u32, arp_addr_list, info->arp_addr_cnt > IEEE80211_BSS_ARP_ADDR_LIST_LEN ? IEEE80211_BSS_ARP_ADDR_LIST_LEN : - info->arp_addr_cnt); - __field(int, arp_addr_cnt); - __field(bool, qos); - __field(bool, idle); - __field(bool, ps); - __dynamic_array(u8, ssid, info->ssid_len); - __field(bool, hidden_ssid); + info->arp_addr_cnt) + __field(int, arp_addr_cnt) + __field(bool, qos) + __field(bool, idle) + __field(bool, ps) + __dynamic_array(u8, ssid, info->ssid_len) + __field(bool, hidden_ssid) __field(int, txpower) __field(u8, p2p_oppps_ctwindow) ), @@ -1672,8 +1672,8 @@ TRACE_EVENT(drv_start_ap, VIF_ENTRY __field(u8, dtimper) __field(u16, bcnint) - __dynamic_array(u8, ssid, info->ssid_len); - __field(bool, hidden_ssid); + __dynamic_array(u8, ssid, info->ssid_len) + __field(bool, hidden_ssid) ), TP_fast_assign( @@ -1739,7 +1739,7 @@ TRACE_EVENT(drv_join_ibss, VIF_ENTRY __field(u8, dtimper) __field(u16, bcnint) - __dynamic_array(u8, ssid, info->ssid_len); + __dynamic_array(u8, ssid, info->ssid_len) ), TP_fast_assign( diff --git a/net/wireless/trace.h b/net/wireless/trace.h index d98ad2b3143b..d5eaa4928baa 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -2009,7 +2009,7 @@ TRACE_EVENT(rdev_start_nan, WIPHY_ENTRY WDEV_ENTRY __field(u8, master_pref) - __field(u8, bands); + __field(u8, bands) ), TP_fast_assign( WIPHY_ASSIGN; @@ -2031,8 +2031,8 @@ TRACE_EVENT(rdev_nan_change_conf, WIPHY_ENTRY WDEV_ENTRY __field(u8, master_pref) - __field(u8, bands); - __field(u32, changes); + __field(u8, bands) + __field(u32, changes) ), TP_fast_assign( WIPHY_ASSIGN; |