From 3b87487ac5008072f138953b07505a7e3493327f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 30 Dec 2011 13:24:40 -0800 Subject: Revert "clockevents: Set noop handler in clockevents_exchange_device()" This reverts commit de28f25e8244c7353abed8de0c7792f5f883588c. It results in resume problems for various people. See for example http://thread.gmane.org/gmane.linux.kernel/1233033 http://thread.gmane.org/gmane.linux.kernel/1233389 http://thread.gmane.org/gmane.linux.kernel/1233159 http://thread.gmane.org/gmane.linux.kernel/1227868/focus=1230877 and the fedora and ubuntu bug reports https://bugzilla.redhat.com/show_bug.cgi?id=767248 https://bugs.launchpad.net/ubuntu/+source/linux/+bug/904569 which got bisected down to the stable version of this commit. Reported-by: Jonathan Nieder Reported-by: Phil Miller Reported-by: Philip Langdale Reported-by: Tim Gardner Cc: Thomas Gleixner Cc: Greg KH Cc: stable@kernel.org # for stable kernels that applied the original Signed-off-by: Linus Torvalds --- kernel/time/clockevents.c | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel') diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index c4eb71c8b2ea..1ecd6ba36d6c 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -387,7 +387,6 @@ void clockevents_exchange_device(struct clock_event_device *old, * released list and do a notify add later. */ if (old) { - old->event_handler = clockevents_handle_noop; clockevents_set_mode(old, CLOCK_EVT_MODE_UNUSED); list_del(&old->list); list_add(&old->list, &clockevents_released); -- cgit v1.2.3 From e6780f7243eddb133cc20ec37fa69317c218b709 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sat, 31 Dec 2011 11:44:01 -0800 Subject: futex: Fix uninterruptible loop due to gate_area It was found (by Sasha) that if you use a futex located in the gate area we get stuck in an uninterruptible infinite loop, much like the ZERO_PAGE issue. While looking at this problem, PeterZ realized you'll get into similar trouble when hitting any install_special_pages() mapping. And are there still drivers setting up their own special mmaps without page->mapping, and without special VM or pte flags to make get_user_pages fail? In most cases, if page->mapping is NULL, we do not need to retry at all: Linus points out that even /proc/sys/vm/drop_caches poses no problem, because it ends up using remove_mapping(), which takes care not to interfere when the page reference count is raised. But there is still one case which does need a retry: if memory pressure called shmem_writepage in between get_user_pages_fast dropping page table lock and our acquiring page lock, then the page gets switched from filecache to swapcache (and ->mapping set to NULL) whatever the refcount. Fault it back in to get the page->mapping needed for key->shared.inode. Reported-by: Sasha Levin Signed-off-by: Hugh Dickins Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- kernel/futex.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/futex.c b/kernel/futex.c index ea87f4d2f455..1614be20173d 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -314,17 +314,29 @@ again: #endif lock_page(page_head); + + /* + * If page_head->mapping is NULL, then it cannot be a PageAnon + * page; but it might be the ZERO_PAGE or in the gate area or + * in a special mapping (all cases which we are happy to fail); + * or it may have been a good file page when get_user_pages_fast + * found it, but truncated or holepunched or subjected to + * invalidate_complete_page2 before we got the page lock (also + * cases which we are happy to fail). And we hold a reference, + * so refcount care in invalidate_complete_page's remove_mapping + * prevents drop_caches from setting mapping to NULL beneath us. + * + * The case we do have to guard against is when memory pressure made + * shmem_writepage move it from filecache to swapcache beneath us: + * an unlikely race, but we do need to retry for page_head->mapping. + */ if (!page_head->mapping) { + int shmem_swizzled = PageSwapCache(page_head); unlock_page(page_head); put_page(page_head); - /* - * ZERO_PAGE pages don't have a mapping. Avoid a busy loop - * trying to find one. RW mapping would have COW'd (and thus - * have a mapping) so this page is RO and won't ever change. - */ - if ((page_head == ZERO_PAGE(address))) - return -EFAULT; - goto again; + if (shmem_swizzled) + goto again; + return -EFAULT; } /* -- cgit v1.2.3 From f9fab10bbd768b0e5254e53a4a8477a94bfc4b96 Mon Sep 17 00:00:00 2001 From: Mandeep Singh Baines Date: Tue, 3 Jan 2012 14:41:13 -0800 Subject: hung_task: fix false positive during vfork vfork parent uninterruptibly and unkillably waits for its child to exec/exit. This wait is of unbounded length. Ignore such waits in the hung_task detector. Signed-off-by: Mandeep Singh Baines Reported-by: Sasha Levin LKML-Reference: <1325344394.28904.43.camel@lappy> Cc: Linus Torvalds Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Andrew Morton Cc: John Kacur Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- kernel/hung_task.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 8b1748d0172c..2e48ec0c2e91 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -74,11 +74,17 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) /* * Ensure the task is not frozen. - * Also, when a freshly created task is scheduled once, changes - * its state to TASK_UNINTERRUPTIBLE without having ever been - * switched out once, it musn't be checked. + * Also, skip vfork and any other user process that freezer should skip. */ - if (unlikely(t->flags & PF_FROZEN || !switch_count)) + if (unlikely(t->flags & (PF_FROZEN | PF_FREEZER_SKIP))) + return; + + /* + * When a freshly created task is scheduled once, changes its state to + * TASK_UNINTERRUPTIBLE without having ever been switched out once, it + * musn't be checked. + */ + if (unlikely(!switch_count)) return; if (switch_count != t->last_switch_count) { -- cgit v1.2.3 From 50b8d257486a45cba7b65ca978986ed216bbcc10 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 4 Jan 2012 17:29:02 +0100 Subject: ptrace: partially fix the do_wait(WEXITED) vs EXIT_DEAD->EXIT_ZOMBIE race Test-case: int main(void) { int pid, status; pid = fork(); if (!pid) { for (;;) { if (!fork()) return 0; if (waitpid(-1, &status, 0) < 0) { printf("ERR!! wait: %m\n"); return 0; } } } assert(ptrace(PTRACE_ATTACH, pid, 0,0) == 0); assert(waitpid(-1, NULL, 0) == pid); assert(ptrace(PTRACE_SETOPTIONS, pid, 0, PTRACE_O_TRACEFORK) == 0); do { ptrace(PTRACE_CONT, pid, 0, 0); pid = waitpid(-1, NULL, 0); } while (pid > 0); return 1; } It fails because ->real_parent sees its child in EXIT_DEAD state while the tracer is going to change the state back to EXIT_ZOMBIE in wait_task_zombie(). The offending commit is 823b018e which moved the EXIT_DEAD check, but in fact we should not blame it. The original code was not correct as well because it didn't take ptrace_reparented() into account and because we can't really trust ->ptrace. This patch adds the additional check to close this particular race but it doesn't solve the whole problem. We simply can't rely on ->ptrace in this case, it can be cleared if the tracer is multithreaded by the exiting ->parent. I think we should kill EXIT_DEAD altogether, we should always remove the soon-to-be-reaped child from ->children or at least we should never do the DEAD->ZOMBIE transition. But this is too complex for 3.2. Reported-and-tested-by: Denys Vlasenko Tested-by: Lukasz Michalik Acked-by: Tejun Heo Cc: [3.0+] Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- kernel/exit.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/exit.c b/kernel/exit.c index d0b7d988f873..e6e01b959a0e 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1540,8 +1540,15 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace, } /* dead body doesn't have much to contribute */ - if (p->exit_state == EXIT_DEAD) + if (unlikely(p->exit_state == EXIT_DEAD)) { + /* + * But do not ignore this task until the tracer does + * wait_task_zombie()->do_notify_parent(). + */ + if (likely(!ptrace) && unlikely(ptrace_reparented(p))) + wo->notask_error = 0; return 0; + } /* slay zombie? */ if (p->exit_state == EXIT_ZOMBIE) { -- cgit v1.2.3 From 8a88951b5878dc475dcd841cefc767e36397d14e Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 4 Jan 2012 17:29:20 +0100 Subject: ptrace: ensure JOBCTL_STOP_SIGMASK is not zero after detach This is the temporary simple fix for 3.2, we need more changes in this area. 1. do_signal_stop() assumes that the running untraced thread in the stopped thread group is not possible. This was our goal but it is not yet achieved: a stopped-but-resumed tracee can clone the running thread which can initiate another group-stop. Remove WARN_ON_ONCE(!current->ptrace). 2. A new thread always starts with ->jobctl = 0. If it is auto-attached and this group is stopped, __ptrace_unlink() sets JOBCTL_STOP_PENDING but JOBCTL_STOP_SIGMASK part is zero, this triggers WANR_ON(!signr) in do_jobctl_trap() if another debugger attaches. Change __ptrace_unlink() to set the artificial SIGSTOP for report. Alternatively we could change ptrace_init_task() to copy signr from current, but this means we can copy it for no reason and hide the possible similar problems. Acked-by: Tejun Heo Cc: [3.1] Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- kernel/ptrace.c | 13 ++++++++++++- kernel/signal.c | 2 -- 2 files changed, 12 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 24d04477b257..78ab24a7b0e4 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -96,9 +96,20 @@ void __ptrace_unlink(struct task_struct *child) */ if (!(child->flags & PF_EXITING) && (child->signal->flags & SIGNAL_STOP_STOPPED || - child->signal->group_stop_count)) + child->signal->group_stop_count)) { child->jobctl |= JOBCTL_STOP_PENDING; + /* + * This is only possible if this thread was cloned by the + * traced task running in the stopped group, set the signal + * for the future reports. + * FIXME: we should change ptrace_init_task() to handle this + * case. + */ + if (!(child->jobctl & JOBCTL_STOP_SIGMASK)) + child->jobctl |= SIGSTOP; + } + /* * If transition to TASK_STOPPED is pending or in TASK_TRACED, kick * @child in the butt. Note that @resume should be used iff @child diff --git a/kernel/signal.c b/kernel/signal.c index b3f78d09a105..206551563cce 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1994,8 +1994,6 @@ static bool do_signal_stop(int signr) */ if (!(sig->flags & SIGNAL_STOP_STOPPED)) sig->group_exit_code = signr; - else - WARN_ON_ONCE(!current->ptrace); sig->group_stop_count = 0; -- cgit v1.2.3 From c10076c4304083af15a41f6bc5e657e781c1f9a6 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 13 Jan 2012 21:40:59 -0500 Subject: tracepoints/module: Fix disabling tracepoints with taint CRAP or OOT Tracepoints are disabled for tainted modules, which is usually because the module is either proprietary or was forced, and we don't want either of them using kernel tracepoints. But, a module can also be tainted by being in the staging directory or compiled out of tree. Either is fine for use with tracepoints, no need to punish them. I found this out when I noticed that my sample trace event module, when done out of tree, stopped working. Cc: stable@vger.kernel.org # 3.2 Cc: Mathieu Desnoyers Cc: Ben Hutchings Cc: Dave Jones Cc: Greg Kroah-Hartman Cc: Rusty Russell Signed-off-by: Steven Rostedt --- kernel/tracepoint.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index db110b8ae030..f1539decd99d 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -634,10 +634,11 @@ static int tracepoint_module_coming(struct module *mod) int ret = 0; /* - * We skip modules that tain the kernel, especially those with different - * module header (for forced load), to make sure we don't cause a crash. + * We skip modules that taint the kernel, especially those with different + * module headers (for forced load), to make sure we don't cause a crash. + * Staging and out-of-tree GPL modules are fine. */ - if (mod->taints) + if (mod->taints & ~((1 << TAINT_OOT_MODULE) | (1 << TAINT_CRAP))) return 0; mutex_lock(&tracepoints_mutex); tp_mod = kmalloc(sizeof(struct tp_module), GFP_KERNEL); -- cgit v1.2.3