From 3247343118daa73f2b94b7fa565425d1d9f9ac84 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 8 Nov 2013 18:52:21 +0100 Subject: uprobes: Add uprobe_task->dup_xol_work/dup_xol_addr uprobe_task->vaddr is a bit strange. The generic code uses it only to pass the additional argument to arch_uprobe_pre_xol(), and since it is always equal to instruction_pointer() this looks even more strange. And both utask->vaddr and and utask->autask have the same scope, they only have the meaning when the task executes the probed insn out-of-line, so it is safe to reuse both in UTASK_RUNNING state. This all means that logically ->vaddr belongs to arch_uprobe_task and we should probably move it there, arch_uprobe_pre_xol() can record instruction_pointer() itself. OTOH, it is also used by uprobe_copy_process() and dup_xol_work() for another purpose, this doesn't look clean and doesn't allow to move this member into arch_uprobe_task. This patch adds the union with 2 anonymous structs into uprobe_task. The first struct is autask + vaddr, this way we "almost" move vaddr into autask. The second struct has 2 new members for uprobe_copy_process() paths: ->dup_xol_addr which can be used instead ->vaddr, and ->dup_xol_work which can be used to avoid kmalloc() and simplify the code. Note that this union will likely have another member(s), we need something like "private_data_for_handlers" so that the tracing handlers could use it to communicate with call_fetch() methods. Signed-off-by: Oleg Nesterov Reviewed-by: Masami Hiramatsu Acked-by: Srikar Dronamraju --- include/linux/uprobes.h | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 319eae70fe84..2225542624de 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -26,6 +26,7 @@ #include #include +#include struct vm_area_struct; struct mm_struct; @@ -72,14 +73,24 @@ enum uprobe_task_state { */ struct uprobe_task { enum uprobe_task_state state; - struct arch_uprobe_task autask; - struct return_instance *return_instances; - unsigned int depth; - struct uprobe *active_uprobe; + union { + struct { + struct arch_uprobe_task autask; + unsigned long vaddr; + }; + + struct { + struct callback_head dup_xol_work; + unsigned long dup_xol_addr; + }; + }; + struct uprobe *active_uprobe; unsigned long xol_vaddr; - unsigned long vaddr; + + struct return_instance *return_instances; + unsigned int depth; }; /* -- cgit v1.2.3 From c912dae60ae6f659455f239298110adc67a5f3e9 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 9 Nov 2013 19:49:39 +0100 Subject: uprobes: Cleanup !CONFIG_UPROBES decls, unexport xol_area 1. Don't include asm/uprobes.h unconditionally, we only need it if CONFIG_UPROBES. 2. Move the definition of "struct xol_area" into uprobes.c. Perhaps we should simply kill struct uprobes_state, it buys nothing. 3. Kill the dummy definition of uprobe_get_swbp_addr(), nobody except handle_swbp() needs it. 4. Purely cosmetic, but move the decl of uprobe_get_swbp_addr() up, close to other __weak helpers. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- include/linux/uprobes.h | 31 ++++--------------------------- kernel/events/uprobes.c | 19 +++++++++++++++++++ 2 files changed, 23 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 2225542624de..e32251e00e62 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -33,10 +33,6 @@ struct mm_struct; struct inode; struct notifier_block; -#ifdef CONFIG_ARCH_SUPPORTS_UPROBES -# include -#endif - #define UPROBE_HANDLER_REMOVE 1 #define UPROBE_HANDLER_MASK 1 @@ -61,6 +57,8 @@ struct uprobe_consumer { }; #ifdef CONFIG_UPROBES +#include + enum uprobe_task_state { UTASK_RUNNING, UTASK_SSTEP, @@ -93,24 +91,7 @@ struct uprobe_task { unsigned int depth; }; -/* - * On a breakpoint hit, thread contests for a slot. It frees the - * slot after singlestep. Currently a fixed number of slots are - * allocated. - */ -struct xol_area { - wait_queue_head_t wq; /* if all slots are busy */ - atomic_t slot_count; /* number of in-use slots */ - unsigned long *bitmap; /* 0 = free slot */ - struct page *page; - - /* - * We keep the vma's vm_start rather than a pointer to the vma - * itself. The probed process or a naughty kernel module could make - * the vma go away, and we must handle that reasonably gracefully. - */ - unsigned long vaddr; /* Page(s) of instruction slots */ -}; +struct xol_area; struct uprobes_state { struct xol_area *xol_area; @@ -120,6 +101,7 @@ extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsign extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); extern bool __weak is_swbp_insn(uprobe_opcode_t *insn); extern bool __weak is_trap_insn(uprobe_opcode_t *insn); +extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs); extern int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t); extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool); @@ -131,7 +113,6 @@ extern void uprobe_end_dup_mmap(void); extern void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm); extern void uprobe_free_utask(struct task_struct *t); extern void uprobe_copy_process(struct task_struct *t, unsigned long flags); -extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs); extern int uprobe_post_sstep_notifier(struct pt_regs *regs); extern int uprobe_pre_sstep_notifier(struct pt_regs *regs); extern void uprobe_notify_resume(struct pt_regs *regs); @@ -187,10 +168,6 @@ static inline bool uprobe_deny_signal(void) { return false; } -static inline unsigned long uprobe_get_swbp_addr(struct pt_regs *regs) -{ - return 0; -} static inline void uprobe_free_utask(struct task_struct *t) { } diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 445962a72498..51a7f535ff96 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -85,6 +85,25 @@ struct return_instance { struct return_instance *next; /* keep as stack */ }; +/* + * On a breakpoint hit, thread contests for a slot. It frees the + * slot after singlestep. Currently a fixed number of slots are + * allocated. + */ +struct xol_area { + wait_queue_head_t wq; /* if all slots are busy */ + atomic_t slot_count; /* number of in-use slots */ + unsigned long *bitmap; /* 0 = free slot */ + struct page *page; + + /* + * We keep the vma's vm_start rather than a pointer to the vma + * itself. The probed process or a naughty kernel module could make + * the vma go away, and we must handle that reasonably gracefully. + */ + unsigned long vaddr; /* Page(s) of instruction slots */ +}; + /* * valid_vma: Verify if the specified vma is an executable vma * Relax restrictions while unregistering: vm_flags might have -- cgit v1.2.3 From 71ad88efebbcde374bddf904b96f3a7fc82d45d4 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 12 Nov 2013 17:58:48 +0100 Subject: perf: Add active_entry list head to struct perf_event This patch adds a new field to the struct perf_event. It is intended to be used to chain events which are active (enabled). It helps in the hardware layer for PMUs which do not have actual counter restrictions, i.e., free running read-only counters. Active events are chained as opposed to being tracked via the counter they use. To save space we use a union with hlist_entry as both are mutually exclusive (suggested by Jiri Olsa). Signed-off-by: Stephane Eranian Reviewed-by: Andi Kleen Signed-off-by: Peter Zijlstra Cc: acme@redhat.com Cc: jolsa@redhat.com Cc: zheng.z.yan@intel.com Cc: bp@alien8.de Cc: maria.n.dimakopoulou@gmail.com Link: http://lkml.kernel.org/r/1384275531-10892-2-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 5 ++++- kernel/events/core.c | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 2e069d1288df..8f4a70f2eca8 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -319,7 +319,10 @@ struct perf_event { */ struct list_head migrate_entry; - struct hlist_node hlist_entry; + union { + struct hlist_node hlist_entry; + struct list_head active_entry; + }; int nr_siblings; int group_flags; struct perf_event *group_leader; diff --git a/kernel/events/core.c b/kernel/events/core.c index 72348dc192c1..403b781daafb 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6655,6 +6655,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, INIT_LIST_HEAD(&event->event_entry); INIT_LIST_HEAD(&event->sibling_list); INIT_LIST_HEAD(&event->rb_entry); + INIT_LIST_HEAD(&event->active_entry); init_waitqueue_head(&event->waitq); init_irq_work(&event->pending, perf_pending_event); -- cgit v1.2.3 From f3ae75de98c4bac145a87d830c156c96f9414022 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Wed, 8 Jan 2014 11:15:52 +0100 Subject: perf/x86: Fix active_entry initialization This patch fixes a problem with the initialization of the struct perf_event active_entry field. It is defined inside an anonymous union and was initialized in perf_event_alloc() using INIT_LIST_HEAD(). However at that time, we do not know whether the event is going to use active_entry or hlist_entry (SW). Or at last, we don't want to make that determination there. The problem is that hlist and list_head are not initialized the same way. One is okay with NULL (from kzmalloc), the other needs to pointers to point to self. This patch resolves this problem by dropping the union. This will avoid problems later on, if someone starts using active_entry or hlist_entry without verifying that they actually overlap. This also solves the initialization problem. Signed-off-by: Stephane Eranian Cc: ak@linux.intel.com Cc: acme@redhat.com Cc: jolsa@redhat.com Cc: zheng.z.yan@intel.com Cc: bp@alien8.de Cc: vincent.weaver@maine.edu Cc: maria.n.dimakopoulou@gmail.com Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1389176153-3128-2-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 6 ++---- kernel/events/core.c | 2 ++ 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 8f4a70f2eca8..e56b07f5c9b6 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -319,10 +319,8 @@ struct perf_event { */ struct list_head migrate_entry; - union { - struct hlist_node hlist_entry; - struct list_head active_entry; - }; + struct hlist_node hlist_entry; + struct list_head active_entry; int nr_siblings; int group_flags; struct perf_event *group_leader; diff --git a/kernel/events/core.c b/kernel/events/core.c index 89d34f9bb8cb..c3b6c2799f34 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6670,6 +6670,8 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, INIT_LIST_HEAD(&event->sibling_list); INIT_LIST_HEAD(&event->rb_entry); INIT_LIST_HEAD(&event->active_entry); + INIT_HLIST_NODE(&event->hlist_entry); + init_waitqueue_head(&event->waitq); init_irq_work(&event->pending, perf_pending_event); -- cgit v1.2.3 From a21b0b354d4ac39be691f51c53562e2c24443d9e Mon Sep 17 00:00:00 2001 From: Yann Droneaud Date: Sun, 5 Jan 2014 21:36:33 +0100 Subject: perf: Introduce a flag to enable close-on-exec in perf_event_open() Unlike recent modern userspace API such as: epoll_create1 (EPOLL_CLOEXEC), eventfd (EFD_CLOEXEC), fanotify_init (FAN_CLOEXEC), inotify_init1 (IN_CLOEXEC), signalfd (SFD_CLOEXEC), timerfd_create (TFD_CLOEXEC), or the venerable general purpose open (O_CLOEXEC), perf_event_open() syscall lack a flag to atomically set FD_CLOEXEC (eg. close-on-exec) flag on file descriptor it returns to userspace. The present patch adds a PERF_FLAG_FD_CLOEXEC flag to allow perf_event_open() syscall to atomically set close-on-exec. Having this flag will enable userspace to remove the file descriptor from the list of file descriptors being inherited across exec, without the need to call fcntl(fd, F_SETFD, FD_CLOEXEC) and the associated race condition between the current thread and another thread calling fork(2) then execve(2). Links: - Secure File Descriptor Handling (Ulrich Drepper, 2008) http://udrepper.livejournal.com/20407.html - Excuse me son, but your code is leaking !!! (Dan Walsh, March 2012) http://danwalsh.livejournal.com/53603.html - Notes in DMA buffer sharing: leak and security hole http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/dma-buf-sharing.txt?id=v3.13-rc3#n428 Signed-off-by: Yann Droneaud Cc: Arnaldo Carvalho de Melo Cc: Al Viro Cc: Andrew Morton Cc: Paul Mackerras Cc: Linus Torvalds Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/8c03f54e1598b1727c19706f3af03f98685d9fe6.1388952061.git.ydroneaud@opteya.com Signed-off-by: Ingo Molnar --- include/uapi/linux/perf_event.h | 1 + kernel/events/core.c | 12 +++++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index e1802d6153ae..ca018b4085c6 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -724,6 +724,7 @@ enum perf_callchain_context { #define PERF_FLAG_FD_NO_GROUP (1U << 0) #define PERF_FLAG_FD_OUTPUT (1U << 1) #define PERF_FLAG_PID_CGROUP (1U << 2) /* pid=cgroup id, per-cpu mode only */ +#define PERF_FLAG_FD_CLOEXEC (1U << 3) /* O_CLOEXEC */ union perf_mem_data_src { __u64 val; diff --git a/kernel/events/core.c b/kernel/events/core.c index c3b6c2799f34..5c8726473006 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -119,7 +119,8 @@ static int cpu_function_call(int cpu, int (*func) (void *info), void *info) #define PERF_FLAG_ALL (PERF_FLAG_FD_NO_GROUP |\ PERF_FLAG_FD_OUTPUT |\ - PERF_FLAG_PID_CGROUP) + PERF_FLAG_PID_CGROUP |\ + PERF_FLAG_FD_CLOEXEC) /* * branch priv levels that need permission checks @@ -6982,6 +6983,7 @@ SYSCALL_DEFINE5(perf_event_open, int event_fd; int move_group = 0; int err; + int f_flags = O_RDWR; /* for future expandability... */ if (flags & ~PERF_FLAG_ALL) @@ -7010,7 +7012,10 @@ SYSCALL_DEFINE5(perf_event_open, if ((flags & PERF_FLAG_PID_CGROUP) && (pid == -1 || cpu == -1)) return -EINVAL; - event_fd = get_unused_fd(); + if (flags & PERF_FLAG_FD_CLOEXEC) + f_flags |= O_CLOEXEC; + + event_fd = get_unused_fd_flags(f_flags); if (event_fd < 0) return event_fd; @@ -7132,7 +7137,8 @@ SYSCALL_DEFINE5(perf_event_open, goto err_context; } - event_file = anon_inode_getfile("[perf_event]", &perf_fops, event, O_RDWR); + event_file = anon_inode_getfile("[perf_event]", &perf_fops, event, + f_flags); if (IS_ERR(event_file)) { err = PTR_ERR(event_file); goto err_context; -- cgit v1.2.3