From 63fef14fc98a8b4fad777fd3bef4d068802b3f14 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 18 Aug 2017 17:24:00 +0900 Subject: kprobes/x86: Make insn buffer always ROX and use text_poke() Make insn buffer always ROX and use text_poke() to write the copied instructions instead of set_memory_*(). This makes instruction buffer stronger against other kernel subsystems because there is no window time to modify the buffer. Suggested-by: Ingo Molnar Signed-off-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Cc: David S . Miller Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/150304463032.17009.14195368040691676813.stgit@devbox Signed-off-by: Ingo Molnar --- arch/x86/kernel/kprobes/common.h | 6 ++-- arch/x86/kernel/kprobes/core.c | 61 ++++++++++++++++++++++++------------- arch/x86/kernel/kprobes/opt.c | 65 +++++++++++++++++++++++----------------- 3 files changed, 80 insertions(+), 52 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h index db2182d63ed0..e2c2a1970869 100644 --- a/arch/x86/kernel/kprobes/common.h +++ b/arch/x86/kernel/kprobes/common.h @@ -75,11 +75,11 @@ extern unsigned long recover_probed_instruction(kprobe_opcode_t *buf, * Copy an instruction and adjust the displacement if the instruction * uses the %rip-relative addressing mode. */ -extern int __copy_instruction(u8 *dest, u8 *src, struct insn *insn); +extern int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn); /* Generate a relative-jump/call instruction */ -extern void synthesize_reljump(void *from, void *to); -extern void synthesize_relcall(void *from, void *to); +extern void synthesize_reljump(void *dest, void *from, void *to); +extern void synthesize_relcall(void *dest, void *from, void *to); #ifdef CONFIG_OPTPROBES extern int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter); diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index f0153714ddac..b48e0efd668e 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -119,29 +119,29 @@ struct kretprobe_blackpoint kretprobe_blacklist[] = { const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); static nokprobe_inline void -__synthesize_relative_insn(void *from, void *to, u8 op) +__synthesize_relative_insn(void *dest, void *from, void *to, u8 op) { struct __arch_relative_insn { u8 op; s32 raddr; } __packed *insn; - insn = (struct __arch_relative_insn *)from; + insn = (struct __arch_relative_insn *)dest; insn->raddr = (s32)((long)(to) - ((long)(from) + 5)); insn->op = op; } /* Insert a jump instruction at address 'from', which jumps to address 'to'.*/ -void synthesize_reljump(void *from, void *to) +void synthesize_reljump(void *dest, void *from, void *to) { - __synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE); + __synthesize_relative_insn(dest, from, to, RELATIVEJUMP_OPCODE); } NOKPROBE_SYMBOL(synthesize_reljump); /* Insert a call instruction at address 'from', which calls address 'to'.*/ -void synthesize_relcall(void *from, void *to) +void synthesize_relcall(void *dest, void *from, void *to) { - __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE); + __synthesize_relative_insn(dest, from, to, RELATIVECALL_OPCODE); } NOKPROBE_SYMBOL(synthesize_relcall); @@ -346,10 +346,11 @@ static int is_IF_modifier(kprobe_opcode_t *insn) /* * Copy an instruction with recovering modified instruction by kprobes * and adjust the displacement if the instruction uses the %rip-relative - * addressing mode. + * addressing mode. Note that since @real will be the final place of copied + * instruction, displacement must be adjust by @real, not @dest. * This returns the length of copied instruction, or 0 if it has an error. */ -int __copy_instruction(u8 *dest, u8 *src, struct insn *insn) +int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn) { kprobe_opcode_t buf[MAX_INSN_SIZE]; unsigned long recovered_insn = @@ -387,11 +388,11 @@ int __copy_instruction(u8 *dest, u8 *src, struct insn *insn) * have given. */ newdisp = (u8 *) src + (s64) insn->displacement.value - - (u8 *) dest; + - (u8 *) real; if ((s64) (s32) newdisp != newdisp) { pr_err("Kprobes error: new displacement does not fit into s32 (%llx)\n", newdisp); pr_err("\tSrc: %p, Dest: %p, old disp: %x\n", - src, dest, insn->displacement.value); + src, real, insn->displacement.value); return 0; } disp = (u8 *) dest + insn_offset_displacement(insn); @@ -402,20 +403,38 @@ int __copy_instruction(u8 *dest, u8 *src, struct insn *insn) } /* Prepare reljump right after instruction to boost */ -static void prepare_boost(struct kprobe *p, struct insn *insn) +static int prepare_boost(kprobe_opcode_t *buf, struct kprobe *p, + struct insn *insn) { + int len = insn->length; + if (can_boost(insn, p->addr) && - MAX_INSN_SIZE - insn->length >= RELATIVEJUMP_SIZE) { + MAX_INSN_SIZE - len >= RELATIVEJUMP_SIZE) { /* * These instructions can be executed directly if it * jumps back to correct address. */ - synthesize_reljump(p->ainsn.insn + insn->length, + synthesize_reljump(buf + len, p->ainsn.insn + len, p->addr + insn->length); + len += RELATIVEJUMP_SIZE; p->ainsn.boostable = true; } else { p->ainsn.boostable = false; } + + return len; +} + +/* Make page to RO mode when allocate it */ +void *alloc_insn_page(void) +{ + void *page; + + page = module_alloc(PAGE_SIZE); + if (page) + set_memory_ro((unsigned long)page & PAGE_MASK, 1); + + return page; } /* Recover page to RW mode before releasing it */ @@ -429,12 +448,11 @@ void free_insn_page(void *page) static int arch_copy_kprobe(struct kprobe *p) { struct insn insn; + kprobe_opcode_t buf[MAX_INSN_SIZE]; int len; - set_memory_rw((unsigned long)p->ainsn.insn & PAGE_MASK, 1); - /* Copy an instruction with recovering if other optprobe modifies it.*/ - len = __copy_instruction(p->ainsn.insn, p->addr, &insn); + len = __copy_instruction(buf, p->addr, p->ainsn.insn, &insn); if (!len) return -EINVAL; @@ -442,15 +460,16 @@ static int arch_copy_kprobe(struct kprobe *p) * __copy_instruction can modify the displacement of the instruction, * but it doesn't affect boostable check. */ - prepare_boost(p, &insn); - - set_memory_ro((unsigned long)p->ainsn.insn & PAGE_MASK, 1); + len = prepare_boost(buf, p, &insn); /* Check whether the instruction modifies Interrupt Flag or not */ - p->ainsn.if_modifier = is_IF_modifier(p->ainsn.insn); + p->ainsn.if_modifier = is_IF_modifier(buf); /* Also, displacement change doesn't affect the first byte */ - p->opcode = p->ainsn.insn[0]; + p->opcode = buf[0]; + + /* OK, write back the instruction(s) into ROX insn buffer */ + text_poke(p->ainsn.insn, buf, len); return 0; } diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 4f98aad38237..22e65f0b8b34 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -184,13 +184,13 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) } NOKPROBE_SYMBOL(optimized_callback); -static int copy_optimized_instructions(u8 *dest, u8 *src) +static int copy_optimized_instructions(u8 *dest, u8 *src, u8 *real) { struct insn insn; int len = 0, ret; while (len < RELATIVEJUMP_SIZE) { - ret = __copy_instruction(dest + len, src + len, &insn); + ret = __copy_instruction(dest + len, src + len, real, &insn); if (!ret || !can_boost(&insn, src + len)) return -EINVAL; len += ret; @@ -343,57 +343,66 @@ void arch_remove_optimized_kprobe(struct optimized_kprobe *op) int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *__unused) { - u8 *buf; - int ret; + u8 *buf = NULL, *slot; + int ret, len; long rel; if (!can_optimize((unsigned long)op->kp.addr)) return -EILSEQ; - op->optinsn.insn = get_optinsn_slot(); - if (!op->optinsn.insn) + buf = kzalloc(MAX_OPTINSN_SIZE, GFP_KERNEL); + if (!buf) return -ENOMEM; + op->optinsn.insn = slot = get_optinsn_slot(); + if (!slot) { + ret = -ENOMEM; + goto out; + } + /* * Verify if the address gap is in 2GB range, because this uses * a relative jump. */ - rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE; + rel = (long)slot - (long)op->kp.addr + RELATIVEJUMP_SIZE; if (abs(rel) > 0x7fffffff) { - __arch_remove_optimized_kprobe(op, 0); - return -ERANGE; + ret = -ERANGE; + goto err; } - buf = (u8 *)op->optinsn.insn; - set_memory_rw((unsigned long)buf & PAGE_MASK, 1); + /* Copy arch-dep-instance from template */ + memcpy(buf, &optprobe_template_entry, TMPL_END_IDX); /* Copy instructions into the out-of-line buffer */ - ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr); - if (ret < 0) { - __arch_remove_optimized_kprobe(op, 0); - return ret; - } + ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr, + slot + TMPL_END_IDX); + if (ret < 0) + goto err; op->optinsn.size = ret; - - /* Copy arch-dep-instance from template */ - memcpy(buf, &optprobe_template_entry, TMPL_END_IDX); + len = TMPL_END_IDX + op->optinsn.size; /* Set probe information */ synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op); /* Set probe function call */ - synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback); + synthesize_relcall(buf + TMPL_CALL_IDX, + slot + TMPL_CALL_IDX, optimized_callback); /* Set returning jmp instruction at the tail of out-of-line buffer */ - synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size, + synthesize_reljump(buf + len, slot + len, (u8 *)op->kp.addr + op->optinsn.size); - - set_memory_ro((unsigned long)buf & PAGE_MASK, 1); - - flush_icache_range((unsigned long) buf, - (unsigned long) buf + TMPL_END_IDX + - op->optinsn.size + RELATIVEJUMP_SIZE); - return 0; + len += RELATIVEJUMP_SIZE; + + /* We have to use text_poke for instuction buffer because it is RO */ + text_poke(slot, buf, len); + ret = 0; +out: + kfree(buf); + return ret; + +err: + __arch_remove_optimized_kprobe(op, 0); + goto out; } /* -- cgit v1.2.3 From a8976fc84b644e3b567ea2bafad3b53b21ed6b6c Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 18 Aug 2017 17:25:08 +0900 Subject: kprobes/x86: Remove addressof() operators The following commit: 54a7d50b9205 ("x86: mark kprobe templates as character arrays, not single characters") changed optprobe_template_* to arrays, so we can remove the addressof() operators from those symbols. Signed-off-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Cc: David S . Miller Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/150304469798.17009.15886717935027472863.stgit@devbox Signed-off-by: Ingo Molnar --- arch/x86/include/asm/kprobes.h | 4 ++-- arch/x86/kernel/kprobes/opt.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 6cf65437b5e5..9f2e3102e0bb 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -58,8 +58,8 @@ extern __visible kprobe_opcode_t optprobe_template_call[]; extern __visible kprobe_opcode_t optprobe_template_end[]; #define MAX_OPTIMIZED_LENGTH (MAX_INSN_SIZE + RELATIVE_ADDR_SIZE) #define MAX_OPTINSN_SIZE \ - (((unsigned long)&optprobe_template_end - \ - (unsigned long)&optprobe_template_entry) + \ + (((unsigned long)optprobe_template_end - \ + (unsigned long)optprobe_template_entry) + \ MAX_OPTIMIZED_LENGTH + RELATIVEJUMP_SIZE) extern const int kretprobe_blacklist_size; diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 22e65f0b8b34..0cae7c0f32ec 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -142,11 +142,11 @@ void optprobe_template_func(void); STACK_FRAME_NON_STANDARD(optprobe_template_func); #define TMPL_MOVE_IDX \ - ((long)&optprobe_template_val - (long)&optprobe_template_entry) + ((long)optprobe_template_val - (long)optprobe_template_entry) #define TMPL_CALL_IDX \ - ((long)&optprobe_template_call - (long)&optprobe_template_entry) + ((long)optprobe_template_call - (long)optprobe_template_entry) #define TMPL_END_IDX \ - ((long)&optprobe_template_end - (long)&optprobe_template_entry) + ((long)optprobe_template_end - (long)optprobe_template_entry) #define INT3_SIZE sizeof(kprobe_opcode_t) @@ -371,7 +371,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, } /* Copy arch-dep-instance from template */ - memcpy(buf, &optprobe_template_entry, TMPL_END_IDX); + memcpy(buf, optprobe_template_entry, TMPL_END_IDX); /* Copy instructions into the out-of-line buffer */ ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr, -- cgit v1.2.3 From cd52edad55fbcd8064877a77d31445b2fb4b85c3 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 19 Sep 2017 18:59:39 +0900 Subject: kprobes/x86: Move the get_kprobe_ctlblk() into irq-disabled block Since get_kprobe_ctlblk() accesses per-cpu variables which calls smp_processor_id(), it must be called under preempt-disabled or irq-disabled. Signed-off-by: Masami Hiramatsu Cc: Alexei Starovoitov Cc: Alexei Starovoitov Cc: Ananth N Mavinakayanahalli Cc: Linus Torvalds Cc: Paul E . McKenney Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/150581517952.32348.2655896843219158446.stgit@devbox Signed-off-by: Ingo Molnar --- arch/x86/kernel/kprobes/opt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 0cae7c0f32ec..f55810305f9a 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -154,7 +154,6 @@ STACK_FRAME_NON_STANDARD(optprobe_template_func); static void optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) { - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); unsigned long flags; /* This is possible if op is under delayed unoptimizing */ @@ -165,6 +164,7 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) if (kprobe_running()) { kprobes_inc_nmissed_count(&op->kp); } else { + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); /* Save skipped registers */ #ifdef CONFIG_X86_64 regs->cs = __KERNEL_CS; -- cgit v1.2.3 From 9a09f261a4fa52de916b0db34a36956c95f78fdc Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 19 Sep 2017 19:00:59 +0900 Subject: kprobes/x86: Disable preemption in optprobe Disable preemption in optprobe handler as described in Documentation/kprobes.txt, which says: "Probe handlers are run with preemption disabled." Signed-off-by: Masami Hiramatsu Cc: Alexei Starovoitov Cc: Alexei Starovoitov Cc: Ananth N Mavinakayanahalli Cc: Linus Torvalds Cc: Paul E . McKenney Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/150581525942.32348.6359217983269060829.stgit@devbox Signed-off-by: Ingo Molnar --- arch/x86/kernel/kprobes/opt.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index f55810305f9a..32c35cb3550c 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -161,6 +161,7 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) return; local_irq_save(flags); + preempt_disable(); if (kprobe_running()) { kprobes_inc_nmissed_count(&op->kp); } else { @@ -180,6 +181,7 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) opt_pre_handler(&op->kp, regs); __this_cpu_write(current_kprobe, NULL); } + preempt_enable_no_resched(); local_irq_restore(flags); } NOKPROBE_SYMBOL(optimized_callback); -- cgit v1.2.3 From 5bb4fc2d8641219732eb2bb654206775a4219aca Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 19 Sep 2017 19:01:40 +0900 Subject: kprobes/x86: Disable preemption in ftrace-based jprobes Disable preemption in ftrace-based jprobe handlers as described in Documentation/kprobes.txt: "Probe handlers are run with preemption disabled." This will fix jprobes behavior when CONFIG_PREEMPT=y. Signed-off-by: Masami Hiramatsu Cc: Alexei Starovoitov Cc: Alexei Starovoitov Cc: Ananth N Mavinakayanahalli Cc: Linus Torvalds Cc: Paul E . McKenney Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/150581530024.32348.9863783558598926771.stgit@devbox Signed-off-by: Ingo Molnar --- arch/x86/kernel/kprobes/ftrace.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c index 041f7b6dfa0f..bcfee4f69b0e 100644 --- a/arch/x86/kernel/kprobes/ftrace.c +++ b/arch/x86/kernel/kprobes/ftrace.c @@ -26,7 +26,7 @@ #include "common.h" static nokprobe_inline -int __skip_singlestep(struct kprobe *p, struct pt_regs *regs, +void __skip_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb, unsigned long orig_ip) { /* @@ -41,20 +41,21 @@ int __skip_singlestep(struct kprobe *p, struct pt_regs *regs, __this_cpu_write(current_kprobe, NULL); if (orig_ip) regs->ip = orig_ip; - return 1; } int skip_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { - if (kprobe_ftrace(p)) - return __skip_singlestep(p, regs, kcb, 0); - else - return 0; + if (kprobe_ftrace(p)) { + __skip_singlestep(p, regs, kcb, 0); + preempt_enable_no_resched(); + return 1; + } + return 0; } NOKPROBE_SYMBOL(skip_singlestep); -/* Ftrace callback handler for kprobes */ +/* Ftrace callback handler for kprobes -- called under preepmt disabed */ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ops, struct pt_regs *regs) { @@ -77,13 +78,17 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, /* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */ regs->ip = ip + sizeof(kprobe_opcode_t); + /* To emulate trap based kprobes, preempt_disable here */ + preempt_disable(); __this_cpu_write(current_kprobe, p); kcb->kprobe_status = KPROBE_HIT_ACTIVE; - if (!p->pre_handler || !p->pre_handler(p, regs)) + if (!p->pre_handler || !p->pre_handler(p, regs)) { __skip_singlestep(p, regs, kcb, orig_ip); + preempt_enable_no_resched(); + } /* * If pre_handler returns !0, it sets regs->ip and - * resets current kprobe. + * resets current kprobe, and keep preempt count +1. */ } end: -- cgit v1.2.3 From a19b2e3d783964d48d2b494439648e929bcdc976 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 19 Sep 2017 19:02:20 +0900 Subject: kprobes/x86: Remove IRQ disabling from ftrace-based/optimized kprobes Kkprobes don't need to disable IRQs if they are called from the ftrace/jump trampoline code, because Documentation/kprobes.txt says: ----- Probe handlers are run with preemption disabled. Depending on the architecture and optimization state, handlers may also run with interrupts disabled (e.g., kretprobe handlers and optimized kprobe handlers run without interrupt disabled on x86/x86-64). ----- So let's remove IRQ disabling from those handlers. Signed-off-by: Masami Hiramatsu Cc: Alexei Starovoitov Cc: Alexei Starovoitov Cc: Ananth N Mavinakayanahalli Cc: Linus Torvalds Cc: Paul E . McKenney Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/150581534039.32348.11331736206004264553.stgit@devbox Signed-off-by: Ingo Molnar --- arch/x86/kernel/kprobes/ftrace.c | 9 ++------- arch/x86/kernel/kprobes/opt.c | 4 ---- 2 files changed, 2 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c index bcfee4f69b0e..8dc0161cec8f 100644 --- a/arch/x86/kernel/kprobes/ftrace.c +++ b/arch/x86/kernel/kprobes/ftrace.c @@ -61,14 +61,11 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, { struct kprobe *p; struct kprobe_ctlblk *kcb; - unsigned long flags; - - /* Disable irq for emulating a breakpoint and avoiding preempt */ - local_irq_save(flags); + /* Preempt is disabled by ftrace */ p = get_kprobe((kprobe_opcode_t *)ip); if (unlikely(!p) || kprobe_disabled(p)) - goto end; + return; kcb = get_kprobe_ctlblk(); if (kprobe_running()) { @@ -91,8 +88,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, * resets current kprobe, and keep preempt count +1. */ } -end: - local_irq_restore(flags); } NOKPROBE_SYMBOL(kprobe_ftrace_handler); diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 32c35cb3550c..e941136e24d8 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -154,13 +154,10 @@ STACK_FRAME_NON_STANDARD(optprobe_template_func); static void optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) { - unsigned long flags; - /* This is possible if op is under delayed unoptimizing */ if (kprobe_disabled(&op->kp)) return; - local_irq_save(flags); preempt_disable(); if (kprobe_running()) { kprobes_inc_nmissed_count(&op->kp); @@ -182,7 +179,6 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) __this_cpu_write(current_kprobe, NULL); } preempt_enable_no_resched(); - local_irq_restore(flags); } NOKPROBE_SYMBOL(optimized_callback); -- cgit v1.2.3 From a47ba4d77e1236d214e5116b5631bc4c2d6e6369 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 31 Aug 2017 14:46:30 -0700 Subject: perf/x86: Enable free running PEBS for REGS_USER/INTR Currently free running PEBS is disabled when user or interrupt registers are requested. Most of the registers are actually available in the PEBS record and can be supported. So we just need to check for the supported registers and then allow it: it is all except for the segment register. For user registers this only works when the counter is limited to ring 3 only, so this also needs to be checked. Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170831214630.21892-1-andi@firstfloor.org Signed-off-by: Ingo Molnar --- arch/x86/events/intel/core.c | 4 ++++ arch/x86/events/perf_event.h | 24 +++++++++++++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 829e89cfcee2..cfd91a03304d 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2958,6 +2958,10 @@ static unsigned long intel_pmu_free_running_flags(struct perf_event *event) if (event->attr.use_clockid) flags &= ~PERF_SAMPLE_TIME; + if (!event->attr.exclude_kernel) + flags &= ~PERF_SAMPLE_REGS_USER; + if (event->attr.sample_regs_user & ~PEBS_REGS) + flags &= ~(PERF_SAMPLE_REGS_USER | PERF_SAMPLE_REGS_INTR); return flags; } diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 4196f81ec0e1..f7aaadf9331f 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -85,13 +85,15 @@ struct amd_nb { * Flags PEBS can handle without an PMI. * * TID can only be handled by flushing at context switch. + * REGS_USER can be handled for events limited to ring 3. * */ #define PEBS_FREERUNNING_FLAGS \ (PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \ PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \ PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \ - PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR) + PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \ + PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER) /* * A debug store configuration. @@ -110,6 +112,26 @@ struct debug_store { u64 pebs_event_reset[MAX_PEBS_EVENTS]; }; +#define PEBS_REGS \ + (PERF_REG_X86_AX | \ + PERF_REG_X86_BX | \ + PERF_REG_X86_CX | \ + PERF_REG_X86_DX | \ + PERF_REG_X86_DI | \ + PERF_REG_X86_SI | \ + PERF_REG_X86_SP | \ + PERF_REG_X86_BP | \ + PERF_REG_X86_IP | \ + PERF_REG_X86_FLAGS | \ + PERF_REG_X86_R8 | \ + PERF_REG_X86_R9 | \ + PERF_REG_X86_R10 | \ + PERF_REG_X86_R11 | \ + PERF_REG_X86_R12 | \ + PERF_REG_X86_R13 | \ + PERF_REG_X86_R14 | \ + PERF_REG_X86_R15) + /* * Per register state. */ -- cgit v1.2.3 From a30b85df7d599f626973e9cd3056fe755bd778e0 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 20 Oct 2017 08:43:39 +0900 Subject: kprobes: Use synchronize_rcu_tasks() for optprobe with CONFIG_PREEMPT=y We want to wait for all potentially preempted kprobes trampoline execution to have completed. This guarantees that any freed trampoline memory is not in use by any task in the system anymore. synchronize_rcu_tasks() gives such a guarantee, so use it. Also, this guarantees to wait for all potentially preempted tasks on the instructions which will be replaced with a jump. Since this becomes a problem only when CONFIG_PREEMPT=y, enable CONFIG_TASKS_RCU=y for synchronize_rcu_tasks() in that case. Signed-off-by: Masami Hiramatsu Acked-by: Paul E. McKenney Cc: Ananth N Mavinakayanahalli Cc: Linus Torvalds Cc: Naveen N . Rao Cc: Paul E . McKenney Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/150845661962.5443.17724352636247312231.stgit@devbox Signed-off-by: Ingo Molnar --- arch/Kconfig | 2 +- kernel/kprobes.c | 14 ++++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/Kconfig b/arch/Kconfig index 1aafb4efbb51..f75c8e8a229b 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -90,7 +90,7 @@ config STATIC_KEYS_SELFTEST config OPTPROBES def_bool y depends on KPROBES && HAVE_OPTPROBES - depends on !PREEMPT + select TASKS_RCU if PREEMPT config KPROBES_ON_FTRACE def_bool y diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 15fba7fe57c8..a8fc1492b308 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -573,13 +573,15 @@ static void kprobe_optimizer(struct work_struct *work) do_unoptimize_kprobes(); /* - * Step 2: Wait for quiesence period to ensure all running interrupts - * are done. Because optprobe may modify multiple instructions - * there is a chance that Nth instruction is interrupted. In that - * case, running interrupt can return to 2nd-Nth byte of jump - * instruction. This wait is for avoiding it. + * Step 2: Wait for quiesence period to ensure all potentially + * preempted tasks to have normally scheduled. Because optprobe + * may modify multiple instructions, there is a chance that Nth + * instruction is preempted. In that case, such tasks can return + * to 2nd-Nth byte of jump instruction. This wait is for avoiding it. + * Note that on non-preemptive kernel, this is transparently converted + * to synchronoze_sched() to wait for all interrupts to have completed. */ - synchronize_sched(); + synchronize_rcu_tasks(); /* Step 3: Optimize kprobes after quiesence period */ do_optimize_kprobes(); -- cgit v1.2.3 From 4650209b166789182657c8eb0612cecd5b54d591 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 4 Nov 2017 13:30:52 +0900 Subject: arm/kprobes: Fix kretprobe test to check correct counter test_kretprobe() uses jprobe_func_called at the last test, but it must check kretprobe_handler_called. Signed-off-by: Masami Hiramatsu Cc: Arnd Bergmann Cc: Jon Medhurst Cc: Linus Torvalds Cc: Mark Brown Cc: Peter Zijlstra Cc: Russell King Cc: Stephen Rothwell Cc: Thomas Gleixner Cc: Wang Nan Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/150976985182.2012.15495311380682779381.stgit@devbox Signed-off-by: Ingo Molnar --- arch/arm/probes/kprobes/test-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/probes/kprobes/test-core.c b/arch/arm/probes/kprobes/test-core.c index 1c98a87786ca..9c3ceba69015 100644 --- a/arch/arm/probes/kprobes/test-core.c +++ b/arch/arm/probes/kprobes/test-core.c @@ -451,7 +451,7 @@ static int test_kretprobe(long (*func)(long, long)) } if (!call_test_func(func, false)) return -EINVAL; - if (jprobe_func_called == test_func_instance) { + if (kretprobe_handler_called == test_func_instance) { pr_err("FAIL: kretprobe called after unregistering\n"); return -EINVAL; } -- cgit v1.2.3 From a443026a48ad7a8b1b966b00fb5d7111b81a219b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 4 Nov 2017 13:31:21 +0900 Subject: arm/kprobes: Remove jprobe test case Remove the jprobes test case because jprobes is a deprecated feature. Signed-off-by: Masami Hiramatsu Cc: Arnd Bergmann Cc: Jon Medhurst Cc: Linus Torvalds Cc: Mark Brown Cc: Peter Zijlstra Cc: Russell King Cc: Stephen Rothwell Cc: Thomas Gleixner Cc: Wang Nan Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/150976988105.2012.13618117383683725047.stgit@devbox Signed-off-by: Ingo Molnar --- arch/arm/probes/kprobes/test-core.c | 57 ------------------------------------- 1 file changed, 57 deletions(-) (limited to 'arch') diff --git a/arch/arm/probes/kprobes/test-core.c b/arch/arm/probes/kprobes/test-core.c index 9c3ceba69015..9ed0129bed3c 100644 --- a/arch/arm/probes/kprobes/test-core.c +++ b/arch/arm/probes/kprobes/test-core.c @@ -227,7 +227,6 @@ static bool test_regs_ok; static int test_func_instance; static int pre_handler_called; static int post_handler_called; -static int jprobe_func_called; static int kretprobe_handler_called; static int tests_failed; @@ -370,50 +369,6 @@ static int test_kprobe(long (*func)(long, long)) return 0; } -static void __kprobes jprobe_func(long r0, long r1) -{ - jprobe_func_called = test_func_instance; - if (r0 == FUNC_ARG1 && r1 == FUNC_ARG2) - test_regs_ok = true; - jprobe_return(); -} - -static struct jprobe the_jprobe = { - .entry = jprobe_func, -}; - -static int test_jprobe(long (*func)(long, long)) -{ - int ret; - - the_jprobe.kp.addr = (kprobe_opcode_t *)func; - ret = register_jprobe(&the_jprobe); - if (ret < 0) { - pr_err("FAIL: register_jprobe failed with %d\n", ret); - return ret; - } - - ret = call_test_func(func, true); - - unregister_jprobe(&the_jprobe); - the_jprobe.kp.flags = 0; /* Clear disable flag to allow reuse */ - - if (!ret) - return -EINVAL; - if (jprobe_func_called != test_func_instance) { - pr_err("FAIL: jprobe handler function not called\n"); - return -EINVAL; - } - if (!call_test_func(func, false)) - return -EINVAL; - if (jprobe_func_called == test_func_instance) { - pr_err("FAIL: probe called after unregistering\n"); - return -EINVAL; - } - - return 0; -} - static int __kprobes kretprobe_handler(struct kretprobe_instance *ri, struct pt_regs *regs) { @@ -468,18 +423,6 @@ static int run_api_tests(long (*func)(long, long)) if (ret < 0) return ret; - pr_info(" jprobe\n"); - ret = test_jprobe(func); -#if defined(CONFIG_THUMB2_KERNEL) && !defined(MODULE) - if (ret == -EINVAL) { - pr_err("FAIL: Known longtime bug with jprobe on Thumb kernels\n"); - tests_failed = ret; - ret = 0; - } -#endif - if (ret < 0) - return ret; - pr_info(" kretprobe\n"); ret = test_kretprobe(func); if (ret < 0) -- cgit v1.2.3