From bbe1a59b3a3d4af3869647d294618a23f6c9c6a4 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 22 Jan 2007 20:40:33 -0800 Subject: [PATCH] fix "kvm: add vm exit profiling" export profile_hits() on !SMP too. Cc: Ingo Molnar Cc: Avi Kivity Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/profile.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/profile.c b/kernel/profile.c index a6574a18514e..d6579d511069 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -331,7 +331,6 @@ out: local_irq_restore(flags); put_cpu(); } -EXPORT_SYMBOL_GPL(profile_hits); static int __devinit profile_cpu_callback(struct notifier_block *info, unsigned long action, void *__cpu) @@ -401,6 +400,8 @@ void profile_hits(int type, void *__pc, unsigned int nr_hits) } #endif /* !CONFIG_SMP */ +EXPORT_SYMBOL_GPL(profile_hits); + void profile_tick(int type) { struct pt_regs *regs = get_irq_regs(); -- cgit v1.2.3 From 1b5180b65122666a36a1a232b7b9b38b21a9dcdd Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 23 Jan 2007 10:45:50 +0100 Subject: [PATCH] notifiers: fix blocking_notifier_call_chain() scalability while lock-profiling the -rt kernel i noticed weird contention during mmap-intense workloads, and the tracer showed the following gem, in one of our MM hotpaths: threaded-2771 1.... 65us : sys_munmap (sysenter_do_call) threaded-2771 1.... 66us : profile_munmap (sys_munmap) threaded-2771 1.... 66us : blocking_notifier_call_chain (profile_munmap) threaded-2771 1.... 66us : rt_down_read (blocking_notifier_call_chain) ouch! a global rw-semaphore taken in one of the most performance- sensitive codepaths of the kernel. And i dont even have oprofile enabled! All distro kernels have CONFIG_PROFILING enabled, so this scalability problem affects the majority of Linux users. The fix is to enhance blocking_notifier_call_chain() to only take the lock if there appears to be work on the call-chain. With this patch applied i get nicely saturated system, and much higher munmap performance, on SMP systems. And as a bonus this also fixes a similar scalability bottleneck in the thread-exit codepath: profile_task_exit() ... Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra Acked-by: Nick Piggin Signed-off-by: Linus Torvalds --- kernel/sys.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/sys.c b/kernel/sys.c index c7675c1bfdf2..6e2101dec0fc 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -323,11 +323,18 @@ EXPORT_SYMBOL_GPL(blocking_notifier_chain_unregister); int blocking_notifier_call_chain(struct blocking_notifier_head *nh, unsigned long val, void *v) { - int ret; + int ret = NOTIFY_DONE; - down_read(&nh->rwsem); - ret = notifier_call_chain(&nh->head, val, v); - up_read(&nh->rwsem); + /* + * We check the head outside the lock, but if this access is + * racy then it does not matter what the result of the test + * is, we re-check the list after having taken the lock anyway: + */ + if (rcu_dereference(nh->head)) { + down_read(&nh->rwsem); + ret = notifier_call_chain(&nh->head, val, v); + up_read(&nh->rwsem); + } return ret; } -- cgit v1.2.3 From 8528b0f1de1101c6002036fd53638fb21111d0ea Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 23 Jan 2007 14:16:31 -0800 Subject: Clear spurious irq stat information when adding irq handler Any newly added irq handler may obviously make any old spurious irq status invalid, since the new handler may well be the thing that is supposed to handle any interrupts that came in. So just clear the statistics when adding handlers. Pointed-out-by: Alan Cox Acked-by: Thomas Gleixner Acked-by: Ingo Molnar Signed-off-by: Linus Torvalds --- kernel/irq/manage.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel') diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index b385878c6e80..8b961adc3bd2 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -315,6 +315,9 @@ int setup_irq(unsigned int irq, struct irqaction *new) /* Undo nested disables: */ desc->depth = 1; } + /* Reset broken irq detection when installing new handler */ + desc->irq_count = 0; + desc->irqs_unhandled = 0; spin_unlock_irqrestore(&desc->lock, flags); new->irq = irq; -- cgit v1.2.3 From 7a238fcba0629b6f2edbcd37458bae56fcf36be5 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Mon, 29 Jan 2007 13:19:40 -0800 Subject: [PATCH] namespaces: fix exit race by splitting exit Fix exit race by splitting the nsproxy putting into two pieces. First piece reduces the nsproxy refcount. If we dropped the last reference, then it puts the mnt_ns, and returns the nsproxy as a hint to the caller. Else it returns NULL. The second piece of exiting task namespaces sets tsk->nsproxy to NULL, and drops the references to other namespaces and frees the nsproxy only if an nsproxy was passed in. A little awkward and should probably be reworked, but hopefully it fixes the NFS oops. Signed-off-by: Serge E. Hallyn Cc: Herbert Poetzl Cc: Oleg Nesterov Cc: "Eric W. Biederman" Cc: Cedric Le Goater Cc: Daniel Hokka Zakrisson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nsproxy.h | 30 +++++++++++++++++++----------- kernel/exit.c | 6 ++++-- kernel/fork.c | 4 ++-- kernel/nsproxy.c | 16 +++++++++++++++- 4 files changed, 40 insertions(+), 16 deletions(-) (limited to 'kernel') diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index 0b9f0dc30d61..678e1d38effb 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -35,22 +35,30 @@ struct nsproxy *dup_namespaces(struct nsproxy *orig); int copy_namespaces(int flags, struct task_struct *tsk); void get_task_namespaces(struct task_struct *tsk); void free_nsproxy(struct nsproxy *ns); +struct nsproxy *put_nsproxy(struct nsproxy *ns); -static inline void put_nsproxy(struct nsproxy *ns) +static inline void finalize_put_nsproxy(struct nsproxy *ns) { - if (atomic_dec_and_test(&ns->count)) { + if (ns) free_nsproxy(ns); - } } -static inline void exit_task_namespaces(struct task_struct *p) +static inline void put_and_finalize_nsproxy(struct nsproxy *ns) { - struct nsproxy *ns = p->nsproxy; - if (ns) { - task_lock(p); - p->nsproxy = NULL; - task_unlock(p); - put_nsproxy(ns); - } + finalize_put_nsproxy(put_nsproxy(ns)); +} + +static inline struct nsproxy *preexit_task_namespaces(struct task_struct *p) +{ + return put_nsproxy(p->nsproxy); +} + +static inline void exit_task_namespaces(struct task_struct *p, + struct nsproxy *ns) +{ + task_lock(p); + p->nsproxy = NULL; + task_unlock(p); + finalize_put_nsproxy(ns); } #endif diff --git a/kernel/exit.c b/kernel/exit.c index 35401720635b..a5bf5329ff97 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -396,7 +396,7 @@ void daemonize(const char *name, ...) current->fs = fs; atomic_inc(&fs->count); - exit_task_namespaces(current); + put_and_finalize_nsproxy(current->nsproxy); current->nsproxy = init_task.nsproxy; get_task_namespaces(current); @@ -853,6 +853,7 @@ static void exit_notify(struct task_struct *tsk) fastcall NORET_TYPE void do_exit(long code) { struct task_struct *tsk = current; + struct nsproxy *ns; int group_dead; profile_task_exit(tsk); @@ -938,8 +939,9 @@ fastcall NORET_TYPE void do_exit(long code) tsk->exit_code = code; proc_exit_connector(tsk); + ns = preexit_task_namespaces(tsk); exit_notify(tsk); - exit_task_namespaces(tsk); + exit_task_namespaces(tsk, ns); #ifdef CONFIG_NUMA mpol_free(tsk->mempolicy); tsk->mempolicy = NULL; diff --git a/kernel/fork.c b/kernel/fork.c index fc723e595cd5..4cf868458f06 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1265,7 +1265,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, return p; bad_fork_cleanup_namespaces: - exit_task_namespaces(p); + put_and_finalize_nsproxy(p->nsproxy); bad_fork_cleanup_keys: exit_keys(p); bad_fork_cleanup_mm: @@ -1711,7 +1711,7 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) } if (new_nsproxy) - put_nsproxy(new_nsproxy); + put_and_finalize_nsproxy(new_nsproxy); bad_unshare_cleanup_ipc: if (new_ipc) diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index f5b9ee6f6bbb..7b05bce75cde 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -117,7 +117,7 @@ int copy_namespaces(int flags, struct task_struct *tsk) goto out_pid; out: - put_nsproxy(old_ns); + put_and_finalize_nsproxy(old_ns); return err; out_pid: @@ -135,6 +135,20 @@ out_ns: goto out; } +struct nsproxy *put_nsproxy(struct nsproxy *ns) +{ + if (ns) { + if (atomic_dec_and_test(&ns->count)) { + if (ns->mnt_ns) { + put_mnt_ns(ns->mnt_ns); + ns->mnt_ns = NULL; + } + return ns; + } + } + return NULL; +} + void free_nsproxy(struct nsproxy *ns) { if (ns->mnt_ns) -- cgit v1.2.3 From 444f378b237a0f728f5c4aba752c08d13c209344 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 30 Jan 2007 13:35:18 -0800 Subject: Revert "[PATCH] namespaces: fix exit race by splitting exit" This reverts commit 7a238fcba0629b6f2edbcd37458bae56fcf36be5 in preparation for a better and simpler fix proposed by Eric Biederman (and fixed up by Serge Hallyn) Acked-by: Serge E. Hallyn Signed-off-by: Linus Torvalds --- include/linux/nsproxy.h | 30 +++++++++++------------------- kernel/exit.c | 6 ++---- kernel/fork.c | 4 ++-- kernel/nsproxy.c | 16 +--------------- 4 files changed, 16 insertions(+), 40 deletions(-) (limited to 'kernel') diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index 678e1d38effb..0b9f0dc30d61 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -35,30 +35,22 @@ struct nsproxy *dup_namespaces(struct nsproxy *orig); int copy_namespaces(int flags, struct task_struct *tsk); void get_task_namespaces(struct task_struct *tsk); void free_nsproxy(struct nsproxy *ns); -struct nsproxy *put_nsproxy(struct nsproxy *ns); -static inline void finalize_put_nsproxy(struct nsproxy *ns) +static inline void put_nsproxy(struct nsproxy *ns) { - if (ns) + if (atomic_dec_and_test(&ns->count)) { free_nsproxy(ns); + } } -static inline void put_and_finalize_nsproxy(struct nsproxy *ns) +static inline void exit_task_namespaces(struct task_struct *p) { - finalize_put_nsproxy(put_nsproxy(ns)); -} - -static inline struct nsproxy *preexit_task_namespaces(struct task_struct *p) -{ - return put_nsproxy(p->nsproxy); -} - -static inline void exit_task_namespaces(struct task_struct *p, - struct nsproxy *ns) -{ - task_lock(p); - p->nsproxy = NULL; - task_unlock(p); - finalize_put_nsproxy(ns); + struct nsproxy *ns = p->nsproxy; + if (ns) { + task_lock(p); + p->nsproxy = NULL; + task_unlock(p); + put_nsproxy(ns); + } } #endif diff --git a/kernel/exit.c b/kernel/exit.c index a5bf5329ff97..35401720635b 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -396,7 +396,7 @@ void daemonize(const char *name, ...) current->fs = fs; atomic_inc(&fs->count); - put_and_finalize_nsproxy(current->nsproxy); + exit_task_namespaces(current); current->nsproxy = init_task.nsproxy; get_task_namespaces(current); @@ -853,7 +853,6 @@ static void exit_notify(struct task_struct *tsk) fastcall NORET_TYPE void do_exit(long code) { struct task_struct *tsk = current; - struct nsproxy *ns; int group_dead; profile_task_exit(tsk); @@ -939,9 +938,8 @@ fastcall NORET_TYPE void do_exit(long code) tsk->exit_code = code; proc_exit_connector(tsk); - ns = preexit_task_namespaces(tsk); exit_notify(tsk); - exit_task_namespaces(tsk, ns); + exit_task_namespaces(tsk); #ifdef CONFIG_NUMA mpol_free(tsk->mempolicy); tsk->mempolicy = NULL; diff --git a/kernel/fork.c b/kernel/fork.c index 4cf868458f06..fc723e595cd5 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1265,7 +1265,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, return p; bad_fork_cleanup_namespaces: - put_and_finalize_nsproxy(p->nsproxy); + exit_task_namespaces(p); bad_fork_cleanup_keys: exit_keys(p); bad_fork_cleanup_mm: @@ -1711,7 +1711,7 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) } if (new_nsproxy) - put_and_finalize_nsproxy(new_nsproxy); + put_nsproxy(new_nsproxy); bad_unshare_cleanup_ipc: if (new_ipc) diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 7b05bce75cde..f5b9ee6f6bbb 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -117,7 +117,7 @@ int copy_namespaces(int flags, struct task_struct *tsk) goto out_pid; out: - put_and_finalize_nsproxy(old_ns); + put_nsproxy(old_ns); return err; out_pid: @@ -135,20 +135,6 @@ out_ns: goto out; } -struct nsproxy *put_nsproxy(struct nsproxy *ns) -{ - if (ns) { - if (atomic_dec_and_test(&ns->count)) { - if (ns->mnt_ns) { - put_mnt_ns(ns->mnt_ns); - ns->mnt_ns = NULL; - } - return ns; - } - } - return NULL; -} - void free_nsproxy(struct nsproxy *ns) { if (ns->mnt_ns) -- cgit v1.2.3 From 0f2452855d86901ba3766826ccb5606ea4e15ab9 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Tue, 30 Jan 2007 15:28:23 -0600 Subject: [PATCH] namespaces: fix task exit disaster This is based on a patch by Eric W. Biederman, who pointed out that pid namespaces are still fake, and we only have one ever active. So for the time being, we can modify any code which could access tsk->nsproxy->pid_ns during task exit to just use &init_pid_ns instead, and move the exit_task_namespaces call in do_exit() back above exit_notify(), so that an exiting nfs server has a valid tsk->sighand to work with. Long term, pulling pid_ns out of nsproxy might be the cleanest solution. Signed-off-by: Eric W. Biederman [ Eric's patch fixed to take care of free_pid() too ] Signed-off-by: Serge E. Hallyn Signed-off-by: Linus Torvalds --- include/linux/pid_namespace.h | 2 +- kernel/exit.c | 2 +- kernel/pid.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index d2a9d419f01f..2833806d42c6 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -39,7 +39,7 @@ static inline void put_pid_ns(struct pid_namespace *ns) static inline struct task_struct *child_reaper(struct task_struct *tsk) { - return tsk->nsproxy->pid_ns->child_reaper; + return init_pid_ns.child_reaper; } #endif /* _LINUX_PID_NS_H */ diff --git a/kernel/exit.c b/kernel/exit.c index 35401720635b..fec12eb12471 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -938,8 +938,8 @@ fastcall NORET_TYPE void do_exit(long code) tsk->exit_code = code; proc_exit_connector(tsk); - exit_notify(tsk); exit_task_namespaces(tsk); + exit_notify(tsk); #ifdef CONFIG_NUMA mpol_free(tsk->mempolicy); tsk->mempolicy = NULL; diff --git a/kernel/pid.c b/kernel/pid.c index 2efe9d8d367b..78f2aee90f54 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -197,7 +197,7 @@ fastcall void free_pid(struct pid *pid) hlist_del_rcu(&pid->pid_chain); spin_unlock_irqrestore(&pidmap_lock, flags); - free_pidmap(current->nsproxy->pid_ns, pid->nr); + free_pidmap(&init_pid_ns, pid->nr); call_rcu(&pid->rcu, delayed_put_pid); } -- cgit v1.2.3 From ab40c5c6b6861ee71fd97f2611027b01e9ec4da0 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 30 Jan 2007 14:36:06 -0800 Subject: [PATCH] kprobes: replace magic numbers with enum Replace the magic numbers with an enum, and gets rid of a warning on the specific architectures (ex. powerpc) on which the compiler considers 'char' as 'unsigned char'. Signed-off-by: Masami Hiramatsu Cc: Prasanna S Panchamukhi Cc: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kprobes.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 17ec4afb0994..6fcf8dd148d0 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -87,6 +87,12 @@ struct kprobe_insn_page { int ngarbage; }; +enum kprobe_slot_state { + SLOT_CLEAN = 0, + SLOT_DIRTY = 1, + SLOT_USED = 2, +}; + static struct hlist_head kprobe_insn_pages; static int kprobe_garbage_slots; static int collect_garbage_slots(void); @@ -130,8 +136,8 @@ kprobe_opcode_t __kprobes *get_insn_slot(void) if (kip->nused < INSNS_PER_PAGE) { int i; for (i = 0; i < INSNS_PER_PAGE; i++) { - if (!kip->slot_used[i]) { - kip->slot_used[i] = 1; + if (kip->slot_used[i] == SLOT_CLEAN) { + kip->slot_used[i] = SLOT_USED; kip->nused++; return kip->insns + (i * MAX_INSN_SIZE); } @@ -163,8 +169,8 @@ kprobe_opcode_t __kprobes *get_insn_slot(void) } INIT_HLIST_NODE(&kip->hlist); hlist_add_head(&kip->hlist, &kprobe_insn_pages); - memset(kip->slot_used, 0, INSNS_PER_PAGE); - kip->slot_used[0] = 1; + memset(kip->slot_used, SLOT_CLEAN, INSNS_PER_PAGE); + kip->slot_used[0] = SLOT_USED; kip->nused = 1; kip->ngarbage = 0; return kip->insns; @@ -173,7 +179,7 @@ kprobe_opcode_t __kprobes *get_insn_slot(void) /* Return 1 if all garbages are collected, otherwise 0. */ static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx) { - kip->slot_used[idx] = 0; + kip->slot_used[idx] = SLOT_CLEAN; kip->nused--; if (kip->nused == 0) { /* @@ -212,7 +218,7 @@ static int __kprobes collect_garbage_slots(void) continue; kip->ngarbage = 0; /* we will collect all garbages */ for (i = 0; i < INSNS_PER_PAGE; i++) { - if (kip->slot_used[i] == -1 && + if (kip->slot_used[i] == SLOT_DIRTY && collect_one_slot(kip, i)) break; } @@ -232,7 +238,7 @@ void __kprobes free_insn_slot(kprobe_opcode_t * slot, int dirty) slot < kip->insns + (INSNS_PER_PAGE * MAX_INSN_SIZE)) { int i = (slot - kip->insns) / MAX_INSN_SIZE; if (dirty) { - kip->slot_used[i] = -1; + kip->slot_used[i] = SLOT_DIRTY; kip->ngarbage++; } else { collect_one_slot(kip, i); -- cgit v1.2.3