From f281769e81b49840f1857f6dfac049350e678350 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 17 Mar 2013 18:54:44 +0100 Subject: uprobes: Use file_inode() Cleanup. Now that we have f_inode/file_inode() we can use it instead of vm_file->f_mapping->host. This should not make any difference for uprobes, but in theory this change is more correct. We use this inode as a key, to compare it with uprobe->inode set by uprobe_register(inode), and the caller uses d_inode. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index a567c8c7ef31..26bc2e24e9e3 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -758,7 +758,7 @@ register_for_each_vma(struct uprobe *uprobe, struct uprobe_consumer *new) down_write(&mm->mmap_sem); vma = find_vma(mm, info->vaddr); if (!vma || !valid_vma(vma, is_register) || - vma->vm_file->f_mapping->host != uprobe->inode) + file_inode(vma->vm_file) != uprobe->inode) goto unlock; if (vma->vm_start > info->vaddr || @@ -917,7 +917,7 @@ static int unapply_uprobe(struct uprobe *uprobe, struct mm_struct *mm) loff_t offset; if (!valid_vma(vma, false) || - vma->vm_file->f_mapping->host != uprobe->inode) + file_inode(vma->vm_file) != uprobe->inode) continue; offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT; @@ -1010,7 +1010,7 @@ int uprobe_mmap(struct vm_area_struct *vma) if (no_uprobe_events() || !valid_vma(vma, true)) return 0; - inode = vma->vm_file->f_mapping->host; + inode = file_inode(vma->vm_file); if (!inode) return 0; @@ -1041,7 +1041,7 @@ vma_has_uprobes(struct vm_area_struct *vma, unsigned long start, unsigned long e struct inode *inode; struct rb_node *n; - inode = vma->vm_file->f_mapping->host; + inode = file_inode(vma->vm_file); min = vaddr_to_offset(vma, start); max = min + (end - start) - 1; @@ -1465,7 +1465,7 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) vma = find_vma(mm, bp_vaddr); if (vma && vma->vm_start <= bp_vaddr) { if (valid_vma(vma, false)) { - struct inode *inode = vma->vm_file->f_mapping->host; + struct inode *inode = file_inode(vma->vm_file); loff_t offset = vaddr_to_offset(vma, bp_vaddr); uprobe = find_uprobe(inode, offset); -- cgit v1.2.3 From 0908ad6e56b5a6e86745680bc324bdbfac64d0b6 Mon Sep 17 00:00:00 2001 From: Ananth N Mavinakayanahalli Date: Fri, 22 Mar 2013 20:46:27 +0530 Subject: uprobes: Add trap variant helper Some architectures like powerpc have multiple variants of the trap instruction. Introduce an additional helper is_trap_insn() for run-time handling of non-uprobe traps on such architectures. While there, change is_swbp_at_addr() to is_trap_at_addr() for reading clarity. With this change, the uprobe registration path will supercede any trap instruction inserted at the requested location, while taking care of delivering the SIGTRAP for cases where the trap notification came in for an address without a uprobe. See [1] for a more detailed explanation. [1] https://lists.ozlabs.org/pipermail/linuxppc-dev/2013-March/104771.html This change was suggested by Oleg Nesterov. Signed-off-by: Ananth N Mavinakayanahalli Acked-by: Srikar Dronamraju Signed-off-by: Oleg Nesterov --- include/linux/uprobes.h | 1 + kernel/events/uprobes.c | 34 +++++++++++++++++++++++++++++----- 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 02b83db8e2c5..19612881399a 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -100,6 +100,7 @@ struct uprobes_state { extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); extern bool __weak is_swbp_insn(uprobe_opcode_t *insn); +extern bool __weak is_trap_insn(uprobe_opcode_t *insn); extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool); extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 26bc2e24e9e3..ca9012930ce7 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -173,6 +173,20 @@ bool __weak is_swbp_insn(uprobe_opcode_t *insn) return *insn == UPROBE_SWBP_INSN; } +/** + * is_trap_insn - check if instruction is breakpoint instruction. + * @insn: instruction to be checked. + * Default implementation of is_trap_insn + * Returns true if @insn is a breakpoint instruction. + * + * This function is needed for the case where an architecture has multiple + * trap instructions (like powerpc). + */ +bool __weak is_trap_insn(uprobe_opcode_t *insn) +{ + return is_swbp_insn(insn); +} + static void copy_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t *opcode) { void *kaddr = kmap_atomic(page); @@ -185,6 +199,15 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t uprobe_opcode_t old_opcode; bool is_swbp; + /* + * Note: We only check if the old_opcode is UPROBE_SWBP_INSN here. + * We do not check if it is any other 'trap variant' which could + * be conditional trap instruction such as the one powerpc supports. + * + * The logic is that we do not care if the underlying instruction + * is a trap variant; uprobes always wins over any other (gdb) + * breakpoint. + */ copy_opcode(page, vaddr, &old_opcode); is_swbp = is_swbp_insn(&old_opcode); @@ -204,7 +227,7 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t * Expect the breakpoint instruction to be the smallest size instruction for * the architecture. If an arch has variable length instruction and the * breakpoint instruction is not of the smallest length instruction - * supported by that architecture then we need to modify is_swbp_at_addr and + * supported by that architecture then we need to modify is_trap_at_addr and * write_opcode accordingly. This would never be a problem for archs that * have fixed length instructions. */ @@ -550,7 +573,7 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file, goto out; ret = -ENOTSUPP; - if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn)) + if (is_trap_insn((uprobe_opcode_t *)uprobe->arch.insn)) goto out; ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, vaddr); @@ -1431,7 +1454,7 @@ static void mmf_recalc_uprobes(struct mm_struct *mm) clear_bit(MMF_HAS_UPROBES, &mm->flags); } -static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr) +static int is_trap_at_addr(struct mm_struct *mm, unsigned long vaddr) { struct page *page; uprobe_opcode_t opcode; @@ -1452,7 +1475,8 @@ static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr) copy_opcode(page, vaddr, &opcode); put_page(page); out: - return is_swbp_insn(&opcode); + /* This needs to return true for any variant of the trap insn */ + return is_trap_insn(&opcode); } static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) @@ -1472,7 +1496,7 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) } if (!uprobe) - *is_swbp = is_swbp_at_addr(mm, bp_vaddr); + *is_swbp = is_trap_at_addr(mm, bp_vaddr); } else { *is_swbp = -EFAULT; } -- cgit v1.2.3 From ab07e807be533d6317ea0971900bdf547962effd Mon Sep 17 00:00:00 2001 From: Ananth N Mavinakayanahalli Date: Fri, 22 Mar 2013 20:48:38 +0530 Subject: uprobes/powerpc: Teach uprobes to ignore gdb breakpoints Powerpc has many trap variants that could be used by entities like gdb. Currently, running gdb on a program being traced by uprobes causes an endless loop since uprobes doesn't understand that the trap was inserted by some other entity and a SIGTRAP needs to be delivered. Teach uprobes to ignore breakpoints that do not belong to it. Signed-off-by: Ananth N Mavinakayanahalli Acked-by: Srikar Dronamraju Signed-off-by: Oleg Nesterov --- arch/powerpc/kernel/uprobes.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c index bc77834dbf43..cb7f63da140c 100644 --- a/arch/powerpc/kernel/uprobes.c +++ b/arch/powerpc/kernel/uprobes.c @@ -30,6 +30,16 @@ #define UPROBE_TRAP_NR UINT_MAX +/** + * is_trap_insn - check if the instruction is a trap variant + * @insn: instruction to be checked. + * Returns true if @insn is a trap variant. + */ +bool is_trap_insn(uprobe_opcode_t *insn) +{ + return (is_trap(*insn)); +} + /** * arch_uprobe_analyze_insn * @mm: the probed address space. -- cgit v1.2.3 From 3c9eb54f71fed86b761a6da4ad3eedc9566ceb8d Mon Sep 17 00:00:00 2001 From: Ananth N Mavinakayanahalli Date: Fri, 22 Mar 2013 20:49:46 +0530 Subject: uprobes/powerpc: Remove additional trap instruction check prepare_uprobe() already checks if the underlying unstruction (on file) is a trap variant. We don't need to check this again. Signed-off-by: Ananth N Mavinakayanahalli Acked-by: Srikar Dronamraju Signed-off-by: Oleg Nesterov --- arch/powerpc/kernel/uprobes.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c index cb7f63da140c..2ecdbe304f46 100644 --- a/arch/powerpc/kernel/uprobes.c +++ b/arch/powerpc/kernel/uprobes.c @@ -53,12 +53,6 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, if (addr & 0x03) return -EINVAL; - /* - * We currently don't support a uprobe on an already - * existing breakpoint instruction underneath - */ - if (is_trap(auprobe->ainsn)) - return -ENOTSUPP; return 0; } -- cgit v1.2.3 From ab0d805c7b9089f9a9b291f33ab95301d6604868 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 24 Mar 2013 18:24:37 +0100 Subject: uprobes: Turn copy_opcode() into copy_from_page() No functional changes. Rename copy_opcode() into copy_from_page() and add the new "int len" argument to make it more more generic for the new users. Signed-off-by: Oleg Nesterov Acked-by: Anton Arapov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index ca9012930ce7..850eb9eb179b 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -187,10 +187,10 @@ bool __weak is_trap_insn(uprobe_opcode_t *insn) return is_swbp_insn(insn); } -static void copy_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t *opcode) +static void copy_from_page(struct page *page, unsigned long vaddr, void *dst, int len) { void *kaddr = kmap_atomic(page); - memcpy(opcode, kaddr + (vaddr & ~PAGE_MASK), UPROBE_SWBP_INSN_SIZE); + memcpy(dst, kaddr + (vaddr & ~PAGE_MASK), len); kunmap_atomic(kaddr); } @@ -208,7 +208,7 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t * is a trap variant; uprobes always wins over any other (gdb) * breakpoint. */ - copy_opcode(page, vaddr, &old_opcode); + copy_from_page(page, vaddr, &old_opcode, UPROBE_SWBP_INSN_SIZE); is_swbp = is_swbp_insn(&old_opcode); if (is_swbp_insn(new_opcode)) { @@ -1472,7 +1472,7 @@ static int is_trap_at_addr(struct mm_struct *mm, unsigned long vaddr) if (result < 0) return result; - copy_opcode(page, vaddr, &opcode); + copy_from_page(page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE); put_page(page); out: /* This needs to return true for any variant of the trap insn */ -- cgit v1.2.3 From 2edb7b5574d447354202ba365fb8ca6a1bac1d1c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 24 Mar 2013 18:37:48 +0100 Subject: uprobes: Change __copy_insn() to use copy_from_page() Change __copy_insn() to use copy_from_page() and simplify the code. Signed-off-by: Oleg Nesterov Acked-by: Anton Arapov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 850eb9eb179b..ba6acfe7c0de 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -500,30 +500,21 @@ __copy_insn(struct address_space *mapping, struct file *filp, char *insn, unsigned long nbytes, loff_t offset) { struct page *page; - void *vaddr; - unsigned long off; - pgoff_t idx; if (!filp) return -EINVAL; if (!mapping->a_ops->readpage) return -EIO; - - idx = offset >> PAGE_CACHE_SHIFT; - off = offset & ~PAGE_MASK; - /* * Ensure that the page that has the original instruction is * populated and in page-cache. */ - page = read_mapping_page(mapping, idx, filp); + page = read_mapping_page(mapping, offset >> PAGE_CACHE_SHIFT, filp); if (IS_ERR(page)) return PTR_ERR(page); - vaddr = kmap_atomic(page); - memcpy(insn, vaddr + off, nbytes); - kunmap_atomic(vaddr); + copy_from_page(page, offset, insn, nbytes); page_cache_release(page); return 0; -- cgit v1.2.3 From 98763a1bb1515f8a8d7f1d9ae42604e19872364b Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 24 Mar 2013 18:45:44 +0100 Subject: uprobes: Kill the unnecesary filp != NULL check in __copy_insn() __copy_insn(filp) can only be called after valid_vma() returns T, vma->vm_file passed as "filp" can not be NULL. Signed-off-by: Oleg Nesterov Acked-by: Anton Arapov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index ba6acfe7c0de..093866547fe3 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -501,9 +501,6 @@ __copy_insn(struct address_space *mapping, struct file *filp, char *insn, { struct page *page; - if (!filp) - return -EINVAL; - if (!mapping->a_ops->readpage) return -EIO; /* -- cgit v1.2.3 From 5669ccee21d87622f30a724b3fe0d04ec5b0afae Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 24 Mar 2013 18:58:04 +0100 Subject: uprobes: Introduce copy_to_page() Extract the kmap_atomic/memcpy/kunmap_atomic code from xol_get_insn_slot() into the new simple helper, copy_to_page(). It will have more users soon. Signed-off-by: Oleg Nesterov Acked-by: Anton Arapov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 093866547fe3..b8255eaca190 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -194,6 +194,13 @@ static void copy_from_page(struct page *page, unsigned long vaddr, void *dst, in kunmap_atomic(kaddr); } +static void copy_to_page(struct page *page, unsigned long vaddr, const void *src, int len) +{ + void *kaddr = kmap_atomic(page); + memcpy(kaddr + (vaddr & ~PAGE_MASK), src, len); + kunmap_atomic(kaddr); +} + static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t *new_opcode) { uprobe_opcode_t old_opcode; @@ -1227,9 +1234,7 @@ static unsigned long xol_take_insn_slot(struct xol_area *area) static unsigned long xol_get_insn_slot(struct uprobe *uprobe) { struct xol_area *area; - unsigned long offset; unsigned long xol_vaddr; - void *vaddr; area = get_xol_area(); if (!area) @@ -1240,10 +1245,7 @@ static unsigned long xol_get_insn_slot(struct uprobe *uprobe) return 0; /* Initialize the slot */ - offset = xol_vaddr & ~PAGE_MASK; - vaddr = kmap_atomic(area->page); - memcpy(vaddr + offset, uprobe->arch.insn, MAX_UINSN_BYTES); - kunmap_atomic(vaddr); + copy_to_page(area->page, xol_vaddr, uprobe->arch.insn, MAX_UINSN_BYTES); /* * We probably need flush_icache_user_range() but it needs vma. * This should work on supported architectures too. -- cgit v1.2.3 From 3f47107c5c2972ca47f216889080f6ef818b25e3 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 24 Mar 2013 19:04:36 +0100 Subject: uprobes: Change write_opcode() to use copy_*page() Change write_opcode() to use copy_highpage() + copy_to_page() and simplify the code. Signed-off-by: Oleg Nesterov Acked-by: Anton Arapov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index b8255eaca190..7312503caf2e 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -255,7 +255,6 @@ static int write_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t opcode) { struct page *old_page, *new_page; - void *vaddr_old, *vaddr_new; struct vm_area_struct *vma; int ret; @@ -276,15 +275,8 @@ retry: __SetPageUptodate(new_page); - /* copy the page now that we've got it stable */ - vaddr_old = kmap_atomic(old_page); - vaddr_new = kmap_atomic(new_page); - - memcpy(vaddr_new, vaddr_old, PAGE_SIZE); - memcpy(vaddr_new + (vaddr & ~PAGE_MASK), &opcode, UPROBE_SWBP_INSN_SIZE); - - kunmap_atomic(vaddr_new); - kunmap_atomic(vaddr_old); + copy_highpage(new_page, old_page); + copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE); ret = anon_vma_prepare(vma); if (ret) -- cgit v1.2.3 From ea024870cf10687b3fded66a9deb6253888f30b7 Mon Sep 17 00:00:00 2001 From: Anton Arapov Date: Wed, 3 Apr 2013 18:00:31 +0200 Subject: uretprobes: Introduce uprobe_consumer->ret_handler() Enclose return probes implementation, introduce ->ret_handler() and update existing code to rely on ->handler() *and* ->ret_handler() for uprobe and uretprobe respectively. Signed-off-by: Anton Arapov Acked-by: Srikar Dronamraju Signed-off-by: Oleg Nesterov --- include/linux/uprobes.h | 3 +++ kernel/events/uprobes.c | 17 ++++++++++++++--- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 19612881399a..5c8d3290df41 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -46,6 +46,9 @@ enum uprobe_filter_ctx { struct uprobe_consumer { int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs); + int (*ret_handler)(struct uprobe_consumer *self, + unsigned long func, + struct pt_regs *regs); bool (*filter)(struct uprobe_consumer *self, enum uprobe_filter_ctx ctx, struct mm_struct *mm); diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 7312503caf2e..eb384e90ac92 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -838,6 +838,14 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer * struct uprobe *uprobe; int ret; + /* Uprobe must have at least one set consumer */ + if (!uc->handler && !uc->ret_handler) + return -EINVAL; + + /* TODO: Implement return probes */ + if (uc->ret_handler) + return -ENOSYS; + /* Racy, just to catch the obvious mistakes */ if (offset > i_size_read(inode)) return -EINVAL; @@ -1497,10 +1505,13 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs) down_read(&uprobe->register_rwsem); for (uc = uprobe->consumers; uc; uc = uc->next) { - int rc = uc->handler(uc, regs); + int rc = 0; - WARN(rc & ~UPROBE_HANDLER_MASK, - "bad rc=0x%x from %pf()\n", rc, uc->handler); + if (uc->handler) { + rc = uc->handler(uc, regs); + WARN(rc & ~UPROBE_HANDLER_MASK, + "bad rc=0x%x from %pf()\n", rc, uc->handler); + } remove &= rc; } -- cgit v1.2.3 From e78aebfd27256ca59ccd3e6cf62cfad2a80e02d3 Mon Sep 17 00:00:00 2001 From: Anton Arapov Date: Wed, 3 Apr 2013 18:00:32 +0200 Subject: uretprobes: Reserve the first slot in xol_vma for trampoline Allocate trampoline page, as the very first one in uprobed task xol area, and fill it with breakpoint opcode. Also introduce get_trampoline_vaddr() helper, to wrap the trampoline address extraction from area->vaddr. That removes confusion and eases the debug experience in case ->vaddr notion will be changed. Signed-off-by: Anton Arapov Acked-by: Srikar Dronamraju Signed-off-by: Oleg Nesterov --- kernel/events/uprobes.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index eb384e90ac92..d345b7c6cb2d 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1132,6 +1132,7 @@ static struct xol_area *get_xol_area(void) { struct mm_struct *mm = current->mm; struct xol_area *area; + uprobe_opcode_t insn = UPROBE_SWBP_INSN; area = mm->uprobes_state.xol_area; if (area) @@ -1149,7 +1150,12 @@ static struct xol_area *get_xol_area(void) if (!area->page) goto free_bitmap; + /* allocate first slot of task's xol_area for the return probes */ + set_bit(0, area->bitmap); + copy_to_page(area->page, 0, &insn, UPROBE_SWBP_INSN_SIZE); + atomic_set(&area->slot_count, 1); init_waitqueue_head(&area->wq); + if (!xol_add_vma(area)) return area; @@ -1346,6 +1352,25 @@ static struct uprobe_task *get_utask(void) return current->utask; } +/* + * Current area->vaddr notion assume the trampoline address is always + * equal area->vaddr. + * + * Returns -1 in case the xol_area is not allocated. + */ +static unsigned long get_trampoline_vaddr(void) +{ + struct xol_area *area; + unsigned long trampoline_vaddr = -1; + + area = current->mm->uprobes_state.xol_area; + smp_read_barrier_depends(); + if (area) + trampoline_vaddr = area->vaddr; + + return trampoline_vaddr; +} + /* Prepare to single-step probed instruction out of line. */ static int pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long bp_vaddr) -- cgit v1.2.3 From 791eca10107f2886c1915d91c99a3b022a75909c Mon Sep 17 00:00:00 2001 From: Anton Arapov Date: Wed, 3 Apr 2013 18:00:33 +0200 Subject: uretprobes/x86: Hijack return address Hijack the return address and replace it with a trampoline address. Signed-off-by: Anton Arapov Acked-by: Srikar Dronamraju Signed-off-by: Oleg Nesterov --- arch/x86/include/asm/uprobes.h | 1 + arch/x86/kernel/uprobes.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h index 8ff8be7835ab..6e5197910fd8 100644 --- a/arch/x86/include/asm/uprobes.h +++ b/arch/x86/include/asm/uprobes.h @@ -55,4 +55,5 @@ extern int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs); extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk); extern int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data); extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs); +extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs); #endif /* _ASM_UPROBES_H */ diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 0ba4cfb4f412..2ed845928b5f 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -697,3 +697,32 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) send_sig(SIGTRAP, current, 0); return ret; } + +unsigned long +arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs) +{ + int rasize, ncopied; + unsigned long orig_ret_vaddr = 0; /* clear high bits for 32-bit apps */ + + rasize = is_ia32_task() ? 4 : 8; + ncopied = copy_from_user(&orig_ret_vaddr, (void __user *)regs->sp, rasize); + if (unlikely(ncopied)) + return -1; + + /* check whether address has been already hijacked */ + if (orig_ret_vaddr == trampoline_vaddr) + return orig_ret_vaddr; + + ncopied = copy_to_user((void __user *)regs->sp, &trampoline_vaddr, rasize); + if (likely(!ncopied)) + return orig_ret_vaddr; + + if (ncopied != rasize) { + pr_err("uprobe: return address clobbered: pid=%d, %%sp=%#lx, " + "%%ip=%#lx\n", current->pid, regs->sp, regs->ip); + + force_sig_info(SIGSEGV, SEND_SIG_FORCED, current); + } + + return -1; +} -- cgit v1.2.3 From f15706b79d6f71e016cd06afa21ee31500029067 Mon Sep 17 00:00:00 2001 From: Anton Arapov Date: Wed, 3 Apr 2013 18:00:34 +0200 Subject: uretprobes/powerpc: Hijack return address Hijack the return address and replace it with a trampoline address. PowerPC implementation. Signed-off-by: Anton Arapov Acked-by: Ananth N Mavinakayanahalli Acked-by: Srikar Dronamraju Signed-off-by: Oleg Nesterov --- arch/powerpc/include/asm/uprobes.h | 1 + arch/powerpc/kernel/uprobes.c | 13 +++++++++++++ 2 files changed, 14 insertions(+) diff --git a/arch/powerpc/include/asm/uprobes.h b/arch/powerpc/include/asm/uprobes.h index b532060d0916..23016020915e 100644 --- a/arch/powerpc/include/asm/uprobes.h +++ b/arch/powerpc/include/asm/uprobes.h @@ -51,4 +51,5 @@ extern int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs); extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk); extern int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data); extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs); +extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs); #endif /* _ASM_UPROBES_H */ diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c index 2ecdbe304f46..59f419b935f2 100644 --- a/arch/powerpc/kernel/uprobes.c +++ b/arch/powerpc/kernel/uprobes.c @@ -192,3 +192,16 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) return false; } + +unsigned long +arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs) +{ + unsigned long orig_ret_vaddr; + + orig_ret_vaddr = regs->link; + + /* Replace the return addr with trampoline addr */ + regs->link = trampoline_vaddr; + + return orig_ret_vaddr; +} -- cgit v1.2.3 From 0dfd0eb8e4d72ded8b21f4fee74ba5547408cbe9 Mon Sep 17 00:00:00 2001 From: Anton Arapov Date: Wed, 3 Apr 2013 18:00:35 +0200 Subject: uretprobes: Return probe entry, prepare_uretprobe() When a uprobe with return probe consumer is hit, prepare_uretprobe() function is invoked. It creates return_instance, hijacks return address and replaces it with the trampoline. * Return instances are kept as stack per uprobed task. * Return instance is chained, when the original return address is trampoline's page vaddr (e.g. recursive call of the probed function). Signed-off-by: Anton Arapov Acked-by: Srikar Dronamraju Signed-off-by: Oleg Nesterov --- include/linux/uprobes.h | 1 + kernel/events/uprobes.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 92 insertions(+), 1 deletion(-) diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 5c8d3290df41..b0507f24eeb0 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -71,6 +71,7 @@ struct uprobe_task { enum uprobe_task_state state; struct arch_uprobe_task autask; + struct return_instance *return_instances; struct uprobe *active_uprobe; unsigned long xol_vaddr; diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index d345b7c6cb2d..3798947b3b58 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -75,6 +75,15 @@ struct uprobe { struct arch_uprobe arch; }; +struct return_instance { + struct uprobe *uprobe; + unsigned long func; + unsigned long orig_ret_vaddr; /* original return address */ + bool chained; /* true, if instance is nested */ + + struct return_instance *next; /* keep as stack */ +}; + /* * valid_vma: Verify if the specified vma is an executable vma * Relax restrictions while unregistering: vm_flags might have @@ -1317,6 +1326,7 @@ unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs) void uprobe_free_utask(struct task_struct *t) { struct uprobe_task *utask = t->utask; + struct return_instance *ri, *tmp; if (!utask) return; @@ -1324,6 +1334,15 @@ void uprobe_free_utask(struct task_struct *t) if (utask->active_uprobe) put_uprobe(utask->active_uprobe); + ri = utask->return_instances; + while (ri) { + tmp = ri; + ri = ri->next; + + put_uprobe(tmp->uprobe); + kfree(tmp); + } + xol_free_insn_slot(t); kfree(utask); t->utask = NULL; @@ -1371,6 +1390,65 @@ static unsigned long get_trampoline_vaddr(void) return trampoline_vaddr; } +static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs) +{ + struct return_instance *ri; + struct uprobe_task *utask; + unsigned long orig_ret_vaddr, trampoline_vaddr; + bool chained = false; + + if (!get_xol_area()) + return; + + utask = get_utask(); + if (!utask) + return; + + ri = kzalloc(sizeof(struct return_instance), GFP_KERNEL); + if (!ri) + goto fail; + + trampoline_vaddr = get_trampoline_vaddr(); + orig_ret_vaddr = arch_uretprobe_hijack_return_addr(trampoline_vaddr, regs); + if (orig_ret_vaddr == -1) + goto fail; + + /* + * We don't want to keep trampoline address in stack, rather keep the + * original return address of first caller thru all the consequent + * instances. This also makes breakpoint unwrapping easier. + */ + if (orig_ret_vaddr == trampoline_vaddr) { + if (!utask->return_instances) { + /* + * This situation is not possible. Likely we have an + * attack from user-space. + */ + pr_warn("uprobe: unable to set uretprobe pid/tgid=%d/%d\n", + current->pid, current->tgid); + goto fail; + } + + chained = true; + orig_ret_vaddr = utask->return_instances->orig_ret_vaddr; + } + + atomic_inc(&uprobe->ref); + ri->uprobe = uprobe; + ri->func = instruction_pointer(regs); + ri->orig_ret_vaddr = orig_ret_vaddr; + ri->chained = chained; + + /* add instance to the stack */ + ri->next = utask->return_instances; + utask->return_instances = ri; + + return; + + fail: + kfree(ri); +} + /* Prepare to single-step probed instruction out of line. */ static int pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long bp_vaddr) @@ -1527,6 +1605,7 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs) { struct uprobe_consumer *uc; int remove = UPROBE_HANDLER_REMOVE; + bool need_prep = false; /* prepare return uprobe, when needed */ down_read(&uprobe->register_rwsem); for (uc = uprobe->consumers; uc; uc = uc->next) { @@ -1537,9 +1616,16 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs) WARN(rc & ~UPROBE_HANDLER_MASK, "bad rc=0x%x from %pf()\n", rc, uc->handler); } + + if (uc->ret_handler) + need_prep = true; + remove &= rc; } + if (need_prep && !remove) + prepare_uretprobe(uprobe, regs); /* put bp at return */ + if (remove && uprobe->consumers) { WARN_ON(!uprobe_is_active(uprobe)); unapply_uprobe(uprobe, current->mm); @@ -1658,7 +1744,11 @@ void uprobe_notify_resume(struct pt_regs *regs) */ int uprobe_pre_sstep_notifier(struct pt_regs *regs) { - if (!current->mm || !test_bit(MMF_HAS_UPROBES, ¤t->mm->flags)) + if (!current->mm) + return 0; + + if (!test_bit(MMF_HAS_UPROBES, ¤t->mm->flags) && + (!current->utask || !current->utask->return_instances)) return 0; set_thread_flag(TIF_UPROBE); -- cgit v1.2.3 From fec8898d86ad0fb4d2161fc89885fa52da1e8b72 Mon Sep 17 00:00:00 2001 From: Anton Arapov Date: Wed, 3 Apr 2013 18:00:36 +0200 Subject: uretprobes: Return probe exit, invoke handlers Uretprobe handlers are invoked when the trampoline is hit, on completion the trampoline is replaced with the saved return address and the uretprobe instance deleted. TODO: handle_trampoline() assumes that ->return_instances is always valid. We should teach it to handle longjmp() which can invalidate the pending return_instance's. This is nontrivial, we will try to do this in a separate series. Signed-off-by: Anton Arapov Acked-by: Srikar Dronamraju Signed-off-by: Oleg Nesterov --- kernel/events/uprobes.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 64 insertions(+), 1 deletion(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 3798947b3b58..65429ad2ce51 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1633,6 +1633,62 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs) up_read(&uprobe->register_rwsem); } +static void +handle_uretprobe_chain(struct return_instance *ri, struct pt_regs *regs) +{ + struct uprobe *uprobe = ri->uprobe; + struct uprobe_consumer *uc; + + down_read(&uprobe->register_rwsem); + for (uc = uprobe->consumers; uc; uc = uc->next) { + if (uc->ret_handler) + uc->ret_handler(uc, ri->func, regs); + } + up_read(&uprobe->register_rwsem); +} + +static bool handle_trampoline(struct pt_regs *regs) +{ + struct uprobe_task *utask; + struct return_instance *ri, *tmp; + bool chained; + + utask = current->utask; + if (!utask) + return false; + + ri = utask->return_instances; + if (!ri) + return false; + + /* + * TODO: we should throw out return_instance's invalidated by + * longjmp(), currently we assume that the probed function always + * returns. + */ + instruction_pointer_set(regs, ri->orig_ret_vaddr); + + for (;;) { + handle_uretprobe_chain(ri, regs); + + chained = ri->chained; + put_uprobe(ri->uprobe); + + tmp = ri; + ri = ri->next; + kfree(tmp); + + if (!chained) + break; + + BUG_ON(!ri); + } + + utask->return_instances = ri; + + return true; +} + /* * Run handler and ask thread to singlestep. * Ensure all non-fatal signals cannot interrupt thread while it singlesteps. @@ -1644,8 +1700,15 @@ static void handle_swbp(struct pt_regs *regs) int uninitialized_var(is_swbp); bp_vaddr = uprobe_get_swbp_addr(regs); - uprobe = find_active_uprobe(bp_vaddr, &is_swbp); + if (bp_vaddr == get_trampoline_vaddr()) { + if (handle_trampoline(regs)) + return; + pr_warn("uprobe: unable to handle uretprobe pid/tgid=%d/%d\n", + current->pid, current->tgid); + } + + uprobe = find_active_uprobe(bp_vaddr, &is_swbp); if (!uprobe) { if (is_swbp > 0) { /* No matching uprobe; signal SIGTRAP. */ -- cgit v1.2.3 From ded49c55309a37129dc30a5f0e85b8a64e5c1716 Mon Sep 17 00:00:00 2001 From: Anton Arapov Date: Wed, 3 Apr 2013 18:00:37 +0200 Subject: uretprobes: Limit the depth of return probe nestedness Unlike the kretprobes we can't trust userspace, thus must have protection from user space attacks. User-space have "unlimited" stack, and this patch limits the return probes nestedness as a simple remedy for it. Note that this implementation leaks return_instance on siglongjmp until exit()/exec(). The intention is to have KISS and bare minimum solution for the initial implementation in order to not complicate the uretprobes code. In the future we may come up with more sophisticated solution that remove this depth limitation. It is not easy task and lays beyond this patchset. Signed-off-by: Anton Arapov Acked-by: Srikar Dronamraju Signed-off-by: Oleg Nesterov --- include/linux/uprobes.h | 3 +++ kernel/events/uprobes.c | 11 +++++++++++ 2 files changed, 14 insertions(+) diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index b0507f24eeb0..06f28beed7c2 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -38,6 +38,8 @@ struct inode; #define UPROBE_HANDLER_REMOVE 1 #define UPROBE_HANDLER_MASK 1 +#define MAX_URETPROBE_DEPTH 64 + enum uprobe_filter_ctx { UPROBE_FILTER_REGISTER, UPROBE_FILTER_UNREGISTER, @@ -72,6 +74,7 @@ struct uprobe_task { struct arch_uprobe_task autask; struct return_instance *return_instances; + unsigned int depth; struct uprobe *active_uprobe; unsigned long xol_vaddr; diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 65429ad2ce51..6ab00e090c87 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1404,6 +1404,13 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs) if (!utask) return; + if (utask->depth >= MAX_URETPROBE_DEPTH) { + printk_ratelimited(KERN_INFO "uprobe: omit uretprobe due to" + " nestedness limit pid/tgid=%d/%d\n", + current->pid, current->tgid); + return; + } + ri = kzalloc(sizeof(struct return_instance), GFP_KERNEL); if (!ri) goto fail; @@ -1439,6 +1446,8 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs) ri->orig_ret_vaddr = orig_ret_vaddr; ri->chained = chained; + utask->depth++; + /* add instance to the stack */ ri->next = utask->return_instances; utask->return_instances = ri; @@ -1681,6 +1690,8 @@ static bool handle_trampoline(struct pt_regs *regs) if (!chained) break; + utask->depth--; + BUG_ON(!ri); } -- cgit v1.2.3 From a0d60aef4be10c498809c2985fc9c28fd503880d Mon Sep 17 00:00:00 2001 From: Anton Arapov Date: Wed, 3 Apr 2013 18:00:38 +0200 Subject: uretprobes: Remove -ENOSYS as return probes implemented Enclose return probes implementation. Signed-off-by: Anton Arapov Acked-by: Srikar Dronamraju Signed-off-by: Oleg Nesterov --- kernel/events/uprobes.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 6ab00e090c87..f3569747d629 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -851,10 +851,6 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer * if (!uc->handler && !uc->ret_handler) return -EINVAL; - /* TODO: Implement return probes */ - if (uc->ret_handler) - return -ENOSYS; - /* Racy, just to catch the obvious mistakes */ if (offset > i_size_read(inode)) return -EINVAL; -- cgit v1.2.3 From decc6bfb49d1eda6cec67d79b73a73fa3aaad6e5 Mon Sep 17 00:00:00 2001 From: Anton Arapov Date: Wed, 3 Apr 2013 18:00:39 +0200 Subject: uretprobes: Documentation update add the uretprobe syntax and update an example Signed-off-by: Anton Arapov Acked-by: Srikar Dronamraju Signed-off-by: Oleg Nesterov --- Documentation/trace/uprobetracer.txt | 114 ++++++++++++++++++++--------------- 1 file changed, 67 insertions(+), 47 deletions(-) diff --git a/Documentation/trace/uprobetracer.txt b/Documentation/trace/uprobetracer.txt index 24ce6823a09e..d9c3e682312c 100644 --- a/Documentation/trace/uprobetracer.txt +++ b/Documentation/trace/uprobetracer.txt @@ -1,6 +1,8 @@ - Uprobe-tracer: Uprobe-based Event Tracing - ========================================= - Documentation written by Srikar Dronamraju + Uprobe-tracer: Uprobe-based Event Tracing + ========================================= + + Documentation written by Srikar Dronamraju + Overview -------- @@ -13,78 +15,94 @@ current_tracer. Instead of that, add probe points via /sys/kernel/debug/tracing/events/uprobes//enabled. However unlike kprobe-event tracer, the uprobe event interface expects the -user to calculate the offset of the probepoint in the object +user to calculate the offset of the probepoint in the object. Synopsis of uprobe_tracer ------------------------- - p[:[GRP/]EVENT] PATH:SYMBOL[+offs] [FETCHARGS] : Set a probe + p[:[GRP/]EVENT] PATH:SYMBOL[+offs] [FETCHARGS] : Set a uprobe + r[:[GRP/]EVENT] PATH:SYMBOL[+offs] [FETCHARGS] : Set a return uprobe (uretprobe) + -:[GRP/]EVENT : Clear uprobe or uretprobe event - GRP : Group name. If omitted, use "uprobes" for it. - EVENT : Event name. If omitted, the event name is generated - based on SYMBOL+offs. - PATH : path to an executable or a library. - SYMBOL[+offs] : Symbol+offset where the probe is inserted. + GRP : Group name. If omitted, "uprobes" is the default value. + EVENT : Event name. If omitted, the event name is generated based + on SYMBOL+offs. + PATH : Path to an executable or a library. + SYMBOL[+offs] : Symbol+offset where the probe is inserted. - FETCHARGS : Arguments. Each probe can have up to 128 args. - %REG : Fetch register REG + FETCHARGS : Arguments. Each probe can have up to 128 args. + %REG : Fetch register REG Event Profiling --------------- - You can check the total number of probe hits and probe miss-hits via +You can check the total number of probe hits and probe miss-hits via /sys/kernel/debug/tracing/uprobe_profile. - The first column is event name, the second is the number of probe hits, +The first column is event name, the second is the number of probe hits, the third is the number of probe miss-hits. Usage examples -------------- -To add a probe as a new event, write a new definition to uprobe_events -as below. + * Add a probe as a new uprobe event, write a new definition to uprobe_events +as below: (sets a uprobe at an offset of 0x4245c0 in the executable /bin/bash) + + echo 'p: /bin/bash:0x4245c0' > /sys/kernel/debug/tracing/uprobe_events + + * Add a probe as a new uretprobe event: + + echo 'r: /bin/bash:0x4245c0' > /sys/kernel/debug/tracing/uprobe_events + + * Unset registered event: - echo 'p: /bin/bash:0x4245c0' > /sys/kernel/debug/tracing/uprobe_events + echo '-:bash_0x4245c0' >> /sys/kernel/debug/tracing/uprobe_events - This sets a uprobe at an offset of 0x4245c0 in the executable /bin/bash + * Print out the events that are registered: - echo > /sys/kernel/debug/tracing/uprobe_events + cat /sys/kernel/debug/tracing/uprobe_events - This clears all probe points. + * Clear all events: -The following example shows how to dump the instruction pointer and %ax -a register at the probed text address. Here we are trying to probe -function zfree in /bin/zsh + echo > /sys/kernel/debug/tracing/uprobe_events + +Following example shows how to dump the instruction pointer and %ax register +at the probed text address. Probe zfree function in /bin/zsh: # cd /sys/kernel/debug/tracing/ - # cat /proc/`pgrep zsh`/maps | grep /bin/zsh | grep r-xp + # cat /proc/`pgrep zsh`/maps | grep /bin/zsh | grep r-xp 00400000-0048a000 r-xp 00000000 08:03 130904 /bin/zsh # objdump -T /bin/zsh | grep -w zfree 0000000000446420 g DF .text 0000000000000012 Base zfree -0x46420 is the offset of zfree in object /bin/zsh that is loaded at -0x00400000. Hence the command to probe would be : + 0x46420 is the offset of zfree in object /bin/zsh that is loaded at + 0x00400000. Hence the command to uprobe would be: + + # echo 'p:zfree_entry /bin/zsh:0x46420 %ip %ax' > uprobe_events + + And the same for the uretprobe would be: - # echo 'p /bin/zsh:0x46420 %ip %ax' > uprobe_events + # echo 'r:zfree_exit /bin/zsh:0x46420 %ip %ax' >> uprobe_events -Please note: User has to explicitly calculate the offset of the probepoint +Please note: User has to explicitly calculate the offset of the probe-point in the object. We can see the events that are registered by looking at the uprobe_events file. # cat uprobe_events - p:uprobes/p_zsh_0x46420 /bin/zsh:0x00046420 arg1=%ip arg2=%ax + p:uprobes/zfree_entry /bin/zsh:0x00046420 arg1=%ip arg2=%ax + r:uprobes/zfree_exit /bin/zsh:0x00046420 arg1=%ip arg2=%ax -The format of events can be seen by viewing the file events/uprobes/p_zsh_0x46420/format +Format of events can be seen by viewing the file events/uprobes/zfree_entry/format - # cat events/uprobes/p_zsh_0x46420/format - name: p_zsh_0x46420 + # cat events/uprobes/zfree_entry/format + name: zfree_entry ID: 922 format: - field:unsigned short common_type; offset:0; size:2; signed:0; - field:unsigned char common_flags; offset:2; size:1; signed:0; - field:unsigned char common_preempt_count; offset:3; size:1; signed:0; - field:int common_pid; offset:4; size:4; signed:1; - field:int common_padding; offset:8; size:4; signed:1; + field:unsigned short common_type; offset:0; size:2; signed:0; + field:unsigned char common_flags; offset:2; size:1; signed:0; + field:unsigned char common_preempt_count; offset:3; size:1; signed:0; + field:int common_pid; offset:4; size:4; signed:1; + field:int common_padding; offset:8; size:4; signed:1; - field:unsigned long __probe_ip; offset:12; size:4; signed:0; - field:u32 arg1; offset:16; size:4; signed:0; - field:u32 arg2; offset:20; size:4; signed:0; + field:unsigned long __probe_ip; offset:12; size:4; signed:0; + field:u32 arg1; offset:16; size:4; signed:0; + field:u32 arg2; offset:20; size:4; signed:0; print fmt: "(%lx) arg1=%lx arg2=%lx", REC->__probe_ip, REC->arg1, REC->arg2 @@ -94,6 +112,7 @@ events, you need to enable it by: # echo 1 > events/uprobes/enable Lets disable the event after sleeping for some time. + # sleep 20 # echo 0 > events/uprobes/enable @@ -104,10 +123,11 @@ And you can see the traced information via /sys/kernel/debug/tracing/trace. # # TASK-PID CPU# TIMESTAMP FUNCTION # | | | | | - zsh-24842 [006] 258544.995456: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79 - zsh-24842 [007] 258545.000270: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79 - zsh-24842 [002] 258545.043929: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79 - zsh-24842 [004] 258547.046129: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79 - -Each line shows us probes were triggered for a pid 24842 with ip being -0x446421 and contents of ax register being 79. + zsh-24842 [006] 258544.995456: zfree_entry: (0x446420) arg1=446420 arg2=79 + zsh-24842 [007] 258545.000270: zfree_exit: (0x446540 <- 0x446420) arg1=446540 arg2=0 + zsh-24842 [002] 258545.043929: zfree_entry: (0x446420) arg1=446420 arg2=79 + zsh-24842 [004] 258547.046129: zfree_exit: (0x446540 <- 0x446420) arg1=446540 arg2=0 + +Output shows us uprobe was triggered for a pid 24842 with ip being 0x446420 +and contents of ax register being 79. And uretprobe was triggered with ip at +0x446540 with counterpart function entry at 0x446420. -- cgit v1.2.3 From 07720b63a964851928fa5d8b00ee5270d66b94f7 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 28 Mar 2013 18:01:04 +0100 Subject: uprobes/tracing: Kill the pointless task_pt_regs() calls uprobe_trace_func() and uprobe_perf_func() do not need task_pt_regs(), we already have "struct pt_regs *regs". Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Tested-by: Anton Arapov --- kernel/trace/trace_uprobe.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 8dad2a92dee9..af5b7735cebf 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -507,7 +507,7 @@ static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs) return 0; entry = ring_buffer_event_data(event); - entry->ip = instruction_pointer(task_pt_regs(current)); + entry->ip = instruction_pointer(regs); data = (u8 *)&entry[1]; for (i = 0; i < tu->nr_args; i++) call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); @@ -777,7 +777,7 @@ static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs) if (!entry) goto out; - entry->ip = instruction_pointer(task_pt_regs(current)); + entry->ip = instruction_pointer(regs); data = (u8 *)&entry[1]; for (i = 0; i < tu->nr_args; i++) call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); -- cgit v1.2.3 From 456fdbcb8648a20f56977efbc6f13e28936fcf0b Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 28 Mar 2013 18:58:11 +0100 Subject: uprobes/tracing: Kill the pointless seq_print_ip_sym() call seq_print_ip_sym(ip) in print_uprobe_event() is pointless, kallsyms_lookup(ip) can not resolve a user-space address. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Tested-by: Anton Arapov --- kernel/trace/trace_uprobe.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index af5b7735cebf..8e009016e4ba 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -531,13 +531,7 @@ print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *e field = (struct uprobe_trace_entry_head *)iter->ent; tu = container_of(event, struct trace_uprobe, call.event); - if (!trace_seq_printf(s, "%s: (", tu->call.name)) - goto partial; - - if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET)) - goto partial; - - if (!trace_seq_puts(s, ")")) + if (!trace_seq_printf(s, "%s: (0x%lx)", tu->call.name, field->ip)) goto partial; data = (u8 *)&field[1]; -- cgit v1.2.3 From 0e3853d202e8b2720bc4c674dc58849b2662c8f8 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 28 Mar 2013 19:19:11 +0100 Subject: uprobes/tracing: Kill the pointless local_save_flags/preempt_count calls uprobe_trace_func() is never called with irqs or preemption disabled, no need to ask preempt_count() or local_save_flags(). Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Acked-by: Masami Hiramatsu Tested-by: Anton Arapov --- kernel/trace/trace_uprobe.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 8e009016e4ba..43d258db998e 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -492,17 +492,13 @@ static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs) struct ring_buffer_event *event; struct ring_buffer *buffer; u8 *data; - int size, i, pc; - unsigned long irq_flags; + int size, i; struct ftrace_event_call *call = &tu->call; - local_save_flags(irq_flags); - pc = preempt_count(); - size = sizeof(*entry) + tu->size; event = trace_current_buffer_lock_reserve(&buffer, call->event.type, - size, irq_flags, pc); + size, 0, 0); if (!event) return 0; @@ -513,7 +509,7 @@ static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs) call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); if (!filter_current_check_discard(buffer, call, entry, event)) - trace_buffer_unlock_commit(buffer, event, irq_flags, pc); + trace_buffer_unlock_commit(buffer, event, 0, 0); return 0; } -- cgit v1.2.3 From 457d1772f1c1bcf37b2ae7fc8f1d6f303d1d5cf9 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 29 Mar 2013 18:26:51 +0100 Subject: uprobes/tracing: Generalize struct uprobe_trace_entry_head struct uprobe_trace_entry_head has a single member for reporting, "unsigned long ip". If we want to support uretprobes we need to create another struct which has "func" and "ret_ip" and duplicate a lot of functions, like trace_kprobe.c does. To avoid this copy-and-paste horror we turn ->ip into ->vaddr[] and add couple of trivial helpers to calculate sizeof/data. This uglifies the code a bit, but this allows us to avoid a lot more complications later, when we add the support for ret-probes. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Tested-by: Anton Arapov --- kernel/trace/trace.h | 5 ---- kernel/trace/trace_uprobe.c | 62 ++++++++++++++++++++++++++------------------- 2 files changed, 36 insertions(+), 31 deletions(-) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 2081971367ea..8bed1dfcb938 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -103,11 +103,6 @@ struct kretprobe_trace_entry_head { unsigned long ret_ip; }; -struct uprobe_trace_entry_head { - struct trace_entry ent; - unsigned long ip; -}; - /* * trace_flag_type is an enumeration that holds different * states when a trace occurs. These are: diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 43d258db998e..49b400368bfd 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -28,6 +28,18 @@ #define UPROBE_EVENT_SYSTEM "uprobes" +struct uprobe_trace_entry_head { + struct trace_entry ent; + unsigned long vaddr[]; +}; + +#define SIZEOF_TRACE_ENTRY(is_return) \ + (sizeof(struct uprobe_trace_entry_head) + \ + sizeof(unsigned long) * (is_return ? 2 : 1)) + +#define DATAOF_TRACE_ENTRY(entry, is_return) \ + ((void*)(entry) + SIZEOF_TRACE_ENTRY(is_return)) + struct trace_uprobe_filter { rwlock_t rwlock; int nr_systemwide; @@ -491,20 +503,19 @@ static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs) struct uprobe_trace_entry_head *entry; struct ring_buffer_event *event; struct ring_buffer *buffer; - u8 *data; + void *data; int size, i; struct ftrace_event_call *call = &tu->call; - size = sizeof(*entry) + tu->size; - + size = SIZEOF_TRACE_ENTRY(false) + tu->size; event = trace_current_buffer_lock_reserve(&buffer, call->event.type, size, 0, 0); if (!event) return 0; entry = ring_buffer_event_data(event); - entry->ip = instruction_pointer(regs); - data = (u8 *)&entry[1]; + entry->vaddr[0] = instruction_pointer(regs); + data = DATAOF_TRACE_ENTRY(entry, false); for (i = 0; i < tu->nr_args; i++) call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); @@ -518,22 +529,22 @@ static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs) static enum print_line_t print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *event) { - struct uprobe_trace_entry_head *field; + struct uprobe_trace_entry_head *entry; struct trace_seq *s = &iter->seq; struct trace_uprobe *tu; u8 *data; int i; - field = (struct uprobe_trace_entry_head *)iter->ent; + entry = (struct uprobe_trace_entry_head *)iter->ent; tu = container_of(event, struct trace_uprobe, call.event); - if (!trace_seq_printf(s, "%s: (0x%lx)", tu->call.name, field->ip)) + if (!trace_seq_printf(s, "%s: (0x%lx)", tu->call.name, entry->vaddr[0])) goto partial; - data = (u8 *)&field[1]; + data = DATAOF_TRACE_ENTRY(entry, false); for (i = 0; i < tu->nr_args; i++) { if (!tu->args[i].type->print(s, tu->args[i].name, - data + tu->args[i].offset, field)) + data + tu->args[i].offset, entry)) goto partial; } @@ -585,16 +596,17 @@ static void probe_event_disable(struct trace_uprobe *tu, int flag) static int uprobe_event_define_fields(struct ftrace_event_call *event_call) { - int ret, i; + int ret, i, size; struct uprobe_trace_entry_head field; - struct trace_uprobe *tu = (struct trace_uprobe *)event_call->data; + struct trace_uprobe *tu = event_call->data; - DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0); + DEFINE_FIELD(unsigned long, vaddr[0], FIELD_STRING_IP, 0); + size = SIZEOF_TRACE_ENTRY(false); /* Set argument names as fields */ for (i = 0; i < tu->nr_args; i++) { ret = trace_define_field(event_call, tu->args[i].type->fmttype, tu->args[i].name, - sizeof(field) + tu->args[i].offset, + size + tu->args[i].offset, tu->args[i].type->size, tu->args[i].type->is_signed, FILTER_OTHER); @@ -748,33 +760,31 @@ static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs) struct ftrace_event_call *call = &tu->call; struct uprobe_trace_entry_head *entry; struct hlist_head *head; - u8 *data; - int size, __size, i; - int rctx; + unsigned long ip; + void *data; + int size, rctx, i; if (!uprobe_perf_filter(&tu->consumer, 0, current->mm)) return UPROBE_HANDLER_REMOVE; - __size = sizeof(*entry) + tu->size; - size = ALIGN(__size + sizeof(u32), sizeof(u64)); - size -= sizeof(u32); + size = SIZEOF_TRACE_ENTRY(false); + size = ALIGN(size + tu->size + sizeof(u32), sizeof(u64)) - sizeof(u32); if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough")) return 0; preempt_disable(); - entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); if (!entry) goto out; - entry->ip = instruction_pointer(regs); - data = (u8 *)&entry[1]; + ip = instruction_pointer(regs); + entry->vaddr[0] = ip; + data = DATAOF_TRACE_ENTRY(entry, false); for (i = 0; i < tu->nr_args; i++) call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); head = this_cpu_ptr(call->perf_events); - perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head, NULL); - + perf_trace_buf_submit(entry, size, rctx, ip, 1, regs, head, NULL); out: preempt_enable(); return 0; @@ -784,7 +794,7 @@ static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs) static int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type, void *data) { - struct trace_uprobe *tu = (struct trace_uprobe *)event->data; + struct trace_uprobe *tu = event->data; switch (type) { case TRACE_REG_REGISTER: -- cgit v1.2.3 From a51cc6041773dd88ff35608f54274bfd6ac68652 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 30 Mar 2013 18:02:12 +0100 Subject: uprobes/tracing: Introduce uprobe_{trace,perf}_print() helpers Extract the output code from uprobe_trace_func() and uprobe_perf_func() into the new helpers, they will be used by ->ret_handler() too. We also add the unused "unsigned long func" argument in advance, to simplify the next changes. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Tested-by: Anton Arapov --- kernel/trace/trace_uprobe.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 49b400368bfd..0c0f0a7d4ff1 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -497,8 +497,8 @@ static const struct file_operations uprobe_profile_ops = { .release = seq_release, }; -/* uprobe handler */ -static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs) +static void uprobe_trace_print(struct trace_uprobe *tu, + unsigned long func, struct pt_regs *regs) { struct uprobe_trace_entry_head *entry; struct ring_buffer_event *event; @@ -511,7 +511,7 @@ static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs) event = trace_current_buffer_lock_reserve(&buffer, call->event.type, size, 0, 0); if (!event) - return 0; + return; entry = ring_buffer_event_data(event); entry->vaddr[0] = instruction_pointer(regs); @@ -521,7 +521,12 @@ static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs) if (!filter_current_check_discard(buffer, call, entry, event)) trace_buffer_unlock_commit(buffer, event, 0, 0); +} +/* uprobe handler */ +static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs) +{ + uprobe_trace_print(tu, 0, regs); return 0; } @@ -754,8 +759,8 @@ static bool uprobe_perf_filter(struct uprobe_consumer *uc, return ret; } -/* uprobe profile handler */ -static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs) +static void uprobe_perf_print(struct trace_uprobe *tu, + unsigned long func, struct pt_regs *regs) { struct ftrace_event_call *call = &tu->call; struct uprobe_trace_entry_head *entry; @@ -764,13 +769,10 @@ static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs) void *data; int size, rctx, i; - if (!uprobe_perf_filter(&tu->consumer, 0, current->mm)) - return UPROBE_HANDLER_REMOVE; - size = SIZEOF_TRACE_ENTRY(false); size = ALIGN(size + tu->size + sizeof(u32), sizeof(u64)) - sizeof(u32); if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough")) - return 0; + return; preempt_disable(); entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); @@ -787,6 +789,15 @@ static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs) perf_trace_buf_submit(entry, size, rctx, ip, 1, regs, head, NULL); out: preempt_enable(); +} + +/* uprobe profile handler */ +static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs) +{ + if (!uprobe_perf_filter(&tu->consumer, 0, current->mm)) + return UPROBE_HANDLER_REMOVE; + + uprobe_perf_print(tu, 0, regs); return 0; } #endif /* CONFIG_PERF_EVENTS */ -- cgit v1.2.3 From c1ae5c75e1034070b203dc9d4ad77ce196166a6c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 30 Mar 2013 18:25:23 +0100 Subject: uprobes/tracing: Introduce is_ret_probe() and uretprobe_dispatcher() Create the new functions we need to support uretprobes, and change alloc_trace_uprobe() to initialize consumer.ret_handler if the new "is_ret" argument is true. Curently this argument is always false, so the new code is never called and is_ret_probe(tu) is false too. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Tested-by: Anton Arapov --- kernel/trace/trace_uprobe.c | 42 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 0c0f0a7d4ff1..72aa45e50d48 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -76,6 +76,8 @@ static DEFINE_MUTEX(uprobe_lock); static LIST_HEAD(uprobe_list); static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs); +static int uretprobe_dispatcher(struct uprobe_consumer *con, + unsigned long func, struct pt_regs *regs); static inline void init_trace_uprobe_filter(struct trace_uprobe_filter *filter) { @@ -89,11 +91,16 @@ static inline bool uprobe_filter_is_empty(struct trace_uprobe_filter *filter) return !filter->nr_systemwide && list_empty(&filter->perf_events); } +static inline bool is_ret_probe(struct trace_uprobe *tu) +{ + return tu->consumer.ret_handler != NULL; +} + /* * Allocate new trace_uprobe and initialize it (including uprobes). */ static struct trace_uprobe * -alloc_trace_uprobe(const char *group, const char *event, int nargs) +alloc_trace_uprobe(const char *group, const char *event, int nargs, bool is_ret) { struct trace_uprobe *tu; @@ -118,6 +125,8 @@ alloc_trace_uprobe(const char *group, const char *event, int nargs) INIT_LIST_HEAD(&tu->list); tu->consumer.handler = uprobe_dispatcher; + if (is_ret) + tu->consumer.ret_handler = uretprobe_dispatcher; init_trace_uprobe_filter(&tu->filter); return tu; @@ -315,7 +324,7 @@ static int create_trace_uprobe(int argc, char **argv) kfree(tail); } - tu = alloc_trace_uprobe(group, event, argc); + tu = alloc_trace_uprobe(group, event, argc, false); if (IS_ERR(tu)) { pr_info("Failed to allocate trace_uprobe.(%d)\n", (int)PTR_ERR(tu)); ret = PTR_ERR(tu); @@ -530,6 +539,12 @@ static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs) return 0; } +static void uretprobe_trace_func(struct trace_uprobe *tu, unsigned long func, + struct pt_regs *regs) +{ + uprobe_trace_print(tu, func, regs); +} + /* Event entry printers */ static enum print_line_t print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *event) @@ -800,6 +815,12 @@ static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs) uprobe_perf_print(tu, 0, regs); return 0; } + +static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func, + struct pt_regs *regs) +{ + uprobe_perf_print(tu, func, regs); +} #endif /* CONFIG_PERF_EVENTS */ static @@ -854,6 +875,23 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs) return ret; } +static int uretprobe_dispatcher(struct uprobe_consumer *con, + unsigned long func, struct pt_regs *regs) +{ + struct trace_uprobe *tu; + + tu = container_of(con, struct trace_uprobe, consumer); + + if (tu->flags & TP_FLAG_TRACE) + uretprobe_trace_func(tu, func, regs); + +#ifdef CONFIG_PERF_EVENTS + if (tu->flags & TP_FLAG_PROFILE) + uretprobe_perf_func(tu, func, regs); +#endif + return 0; +} + static struct trace_event_functions uprobe_funcs = { .trace = print_uprobe_event }; -- cgit v1.2.3 From 393a736c280d555d9301fc5516d4d401544eb9db Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 30 Mar 2013 18:46:22 +0100 Subject: uprobes/tracing: Make uprobe_{trace,perf}_print() uretprobe-friendly Change uprobe_trace_print() and uprobe_perf_print() to check is_ret_probe() and fill ring_buffer_event accordingly. Also change uprobe_trace_func() and uprobe_perf_func() to not _print() if is_ret_probe() is true. Note that we keep ->handler() nontrivial even for uretprobe, we need this for filtering and for other potential extensions. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Tested-by: Anton Arapov --- kernel/trace/trace_uprobe.c | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 72aa45e50d48..0ed99a27d122 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -516,15 +516,22 @@ static void uprobe_trace_print(struct trace_uprobe *tu, int size, i; struct ftrace_event_call *call = &tu->call; - size = SIZEOF_TRACE_ENTRY(false) + tu->size; + size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); event = trace_current_buffer_lock_reserve(&buffer, call->event.type, - size, 0, 0); + size + tu->size, 0, 0); if (!event) return; entry = ring_buffer_event_data(event); - entry->vaddr[0] = instruction_pointer(regs); - data = DATAOF_TRACE_ENTRY(entry, false); + if (is_ret_probe(tu)) { + entry->vaddr[0] = func; + entry->vaddr[1] = instruction_pointer(regs); + data = DATAOF_TRACE_ENTRY(entry, true); + } else { + entry->vaddr[0] = instruction_pointer(regs); + data = DATAOF_TRACE_ENTRY(entry, false); + } + for (i = 0; i < tu->nr_args; i++) call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); @@ -535,7 +542,8 @@ static void uprobe_trace_print(struct trace_uprobe *tu, /* uprobe handler */ static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs) { - uprobe_trace_print(tu, 0, regs); + if (!is_ret_probe(tu)) + uprobe_trace_print(tu, 0, regs); return 0; } @@ -784,7 +792,7 @@ static void uprobe_perf_print(struct trace_uprobe *tu, void *data; int size, rctx, i; - size = SIZEOF_TRACE_ENTRY(false); + size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); size = ALIGN(size + tu->size + sizeof(u32), sizeof(u64)) - sizeof(u32); if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough")) return; @@ -795,8 +803,15 @@ static void uprobe_perf_print(struct trace_uprobe *tu, goto out; ip = instruction_pointer(regs); - entry->vaddr[0] = ip; - data = DATAOF_TRACE_ENTRY(entry, false); + if (is_ret_probe(tu)) { + entry->vaddr[0] = func; + entry->vaddr[1] = ip; + data = DATAOF_TRACE_ENTRY(entry, true); + } else { + entry->vaddr[0] = ip; + data = DATAOF_TRACE_ENTRY(entry, false); + } + for (i = 0; i < tu->nr_args; i++) call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); @@ -812,7 +827,8 @@ static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs) if (!uprobe_perf_filter(&tu->consumer, 0, current->mm)) return UPROBE_HANDLER_REMOVE; - uprobe_perf_print(tu, 0, regs); + if (!is_ret_probe(tu)) + uprobe_perf_print(tu, 0, regs); return 0; } -- cgit v1.2.3 From 4d1298e2124767b4e263498485618b2e91aee5f0 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 30 Mar 2013 19:23:15 +0100 Subject: uprobes/tracing: Make register_uprobe_event() paths uretprobe-friendly Change uprobe_event_define_fields(), and __set_print_fmt() to check is_ret_probe() and use the appropriate format/fields. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Tested-by: Anton Arapov --- kernel/trace/trace_uprobe.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 0ed99a27d122..457576215b3a 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -628,8 +628,14 @@ static int uprobe_event_define_fields(struct ftrace_event_call *event_call) struct uprobe_trace_entry_head field; struct trace_uprobe *tu = event_call->data; - DEFINE_FIELD(unsigned long, vaddr[0], FIELD_STRING_IP, 0); - size = SIZEOF_TRACE_ENTRY(false); + if (is_ret_probe(tu)) { + DEFINE_FIELD(unsigned long, vaddr[0], FIELD_STRING_FUNC, 0); + DEFINE_FIELD(unsigned long, vaddr[1], FIELD_STRING_RETIP, 0); + size = SIZEOF_TRACE_ENTRY(true); + } else { + DEFINE_FIELD(unsigned long, vaddr[0], FIELD_STRING_IP, 0); + size = SIZEOF_TRACE_ENTRY(false); + } /* Set argument names as fields */ for (i = 0; i < tu->nr_args; i++) { ret = trace_define_field(event_call, tu->args[i].type->fmttype, @@ -652,8 +658,13 @@ static int __set_print_fmt(struct trace_uprobe *tu, char *buf, int len) int i; int pos = 0; - fmt = "(%lx)"; - arg = "REC->" FIELD_STRING_IP; + if (is_ret_probe(tu)) { + fmt = "(%lx <- %lx)"; + arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP; + } else { + fmt = "(%lx)"; + arg = "REC->" FIELD_STRING_IP; + } /* When len=0, we just calculate the needed length */ -- cgit v1.2.3 From 3ede82dd3e3deb23429f2bf44fb600f440eef84b Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 30 Mar 2013 19:48:09 +0100 Subject: uprobes/tracing: Make seq_printf() code uretprobe-friendly Change probes_seq_show() and print_uprobe_event() to check is_ret_probe() and print the correct data. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Tested-by: Anton Arapov --- kernel/trace/trace_uprobe.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 457576215b3a..8c9f4894bcc0 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -435,9 +435,10 @@ static void probes_seq_stop(struct seq_file *m, void *v) static int probes_seq_show(struct seq_file *m, void *v) { struct trace_uprobe *tu = v; + char c = is_ret_probe(tu) ? 'r' : 'p'; int i; - seq_printf(m, "p:%s/%s", tu->call.class->system, tu->call.name); + seq_printf(m, "%c:%s/%s", c, tu->call.class->system, tu->call.name); seq_printf(m, " %s:0x%p", tu->filename, (void *)tu->offset); for (i = 0; i < tu->nr_args; i++) @@ -566,10 +567,18 @@ print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *e entry = (struct uprobe_trace_entry_head *)iter->ent; tu = container_of(event, struct trace_uprobe, call.event); - if (!trace_seq_printf(s, "%s: (0x%lx)", tu->call.name, entry->vaddr[0])) - goto partial; + if (is_ret_probe(tu)) { + if (!trace_seq_printf(s, "%s: (0x%lx <- 0x%lx)", tu->call.name, + entry->vaddr[1], entry->vaddr[0])) + goto partial; + data = DATAOF_TRACE_ENTRY(entry, true); + } else { + if (!trace_seq_printf(s, "%s: (0x%lx)", tu->call.name, + entry->vaddr[0])) + goto partial; + data = DATAOF_TRACE_ENTRY(entry, false); + } - data = DATAOF_TRACE_ENTRY(entry, false); for (i = 0; i < tu->nr_args; i++) { if (!tu->args[i].type->print(s, tu->args[i].name, data + tu->args[i].offset, entry)) -- cgit v1.2.3 From 4ee5a52ed6301d0afa56cc995ef2c3795f45e801 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 30 Mar 2013 20:28:15 +0100 Subject: uprobes/tracing: Change create_trace_uprobe() to support uretprobes Finally change create_trace_uprobe() to check if argv[0][0] == 'r' and pass the correct "is_ret" to alloc_trace_uprobe(). Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Tested-by: Anton Arapov --- kernel/trace/trace_uprobe.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 8c9f4894bcc0..2d08bea638a4 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -201,7 +201,7 @@ end: /* * Argument syntax: - * - Add uprobe: p[:[GRP/]EVENT] PATH:SYMBOL[+offs] [FETCHARGS] + * - Add uprobe: p|r[:[GRP/]EVENT] PATH:SYMBOL [FETCHARGS] * * - Remove uprobe: -:[GRP/]EVENT */ @@ -213,20 +213,23 @@ static int create_trace_uprobe(int argc, char **argv) char buf[MAX_EVENT_NAME_LEN]; struct path path; unsigned long offset; - bool is_delete; + bool is_delete, is_return; int i, ret; inode = NULL; ret = 0; is_delete = false; + is_return = false; event = NULL; group = NULL; /* argc must be >= 1 */ if (argv[0][0] == '-') is_delete = true; + else if (argv[0][0] == 'r') + is_return = true; else if (argv[0][0] != 'p') { - pr_info("Probe definition must be started with 'p' or '-'.\n"); + pr_info("Probe definition must be started with 'p', 'r' or '-'.\n"); return -EINVAL; } @@ -324,7 +327,7 @@ static int create_trace_uprobe(int argc, char **argv) kfree(tail); } - tu = alloc_trace_uprobe(group, event, argc, false); + tu = alloc_trace_uprobe(group, event, argc, is_return); if (IS_ERR(tu)) { pr_info("Failed to allocate trace_uprobe.(%d)\n", (int)PTR_ERR(tu)); ret = PTR_ERR(tu); -- cgit v1.2.3 From 32520b2c695b23221751eb09360a6a3dd3105b52 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 10 Apr 2013 16:25:49 +0200 Subject: uprobes/tracing: Don't pass addr=ip to perf_trace_buf_submit() uprobe_perf_print() passes addr=ip to perf_trace_buf_submit() for no reason. This sets perf_sample_data->addr for PERF_SAMPLE_ADDR, we already have perf_sample_data->ip initialized if PERF_SAMPLE_IP. Signed-off-by: Oleg Nesterov Acked-by: Masami Hiramatsu Acked-by: Srikar Dronamraju --- kernel/trace/trace_uprobe.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 2d08bea638a4..37ccb72f0f3b 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -811,7 +811,6 @@ static void uprobe_perf_print(struct trace_uprobe *tu, struct ftrace_event_call *call = &tu->call; struct uprobe_trace_entry_head *entry; struct hlist_head *head; - unsigned long ip; void *data; int size, rctx, i; @@ -825,13 +824,12 @@ static void uprobe_perf_print(struct trace_uprobe *tu, if (!entry) goto out; - ip = instruction_pointer(regs); if (is_ret_probe(tu)) { entry->vaddr[0] = func; - entry->vaddr[1] = ip; + entry->vaddr[1] = instruction_pointer(regs); data = DATAOF_TRACE_ENTRY(entry, true); } else { - entry->vaddr[0] = ip; + entry->vaddr[0] = instruction_pointer(regs); data = DATAOF_TRACE_ENTRY(entry, false); } @@ -839,7 +837,7 @@ static void uprobe_perf_print(struct trace_uprobe *tu, call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); head = this_cpu_ptr(call->perf_events); - perf_trace_buf_submit(entry, size, rctx, ip, 1, regs, head, NULL); + perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL); out: preempt_enable(); } -- cgit v1.2.3 From 515619f209114697fabd21eed1623bfa69746815 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 13 Apr 2013 15:36:49 +0200 Subject: uprobes/perf: Avoid perf_trace_buf_prepare/submit if ->perf_events is empty perf_trace_buf_prepare() + perf_trace_buf_submit() make no sense if this task/CPU has no active counters. Change uprobe_perf_print() to return if hlist_empty(call->perf_events). Note: this is not uprobe-specific, we can change other users too. Signed-off-by: Oleg Nesterov --- kernel/trace/trace_uprobe.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 37ccb72f0f3b..32494fb0ee64 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -820,6 +820,10 @@ static void uprobe_perf_print(struct trace_uprobe *tu, return; preempt_disable(); + head = this_cpu_ptr(call->perf_events); + if (hlist_empty(head)) + goto out; + entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); if (!entry) goto out; @@ -836,7 +840,6 @@ static void uprobe_perf_print(struct trace_uprobe *tu, for (i = 0; i < tu->nr_args; i++) call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); - head = this_cpu_ptr(call->perf_events); perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL); out: preempt_enable(); -- cgit v1.2.3