From a3f07114e3359fb98683069ae397220e8992a24a Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Wed, 5 Nov 2008 12:47:09 -0500 Subject: [PATCH] Audit: make audit=0 actually turn off audit Currently audit=0 on the kernel command line does absolutely nothing. Audit always loads and always uses its resources such as creating the kernel netlink socket. This patch causes audit=0 to actually disable audit. Audit will use no resources and starting the userspace auditd daemon will not cause the kernel audit system to activate. Signed-off-by: Eric Paris Signed-off-by: Al Viro --- kernel/audit.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/audit.c b/kernel/audit.c index 4414e93d8750..d8646c23b427 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -61,8 +61,11 @@ #include "audit.h" -/* No auditing will take place until audit_initialized != 0. +/* No auditing will take place until audit_initialized == AUDIT_INITIALIZED. * (Initialization happens after skb_init is called.) */ +#define AUDIT_DISABLED -1 +#define AUDIT_UNINITIALIZED 0 +#define AUDIT_INITIALIZED 1 static int audit_initialized; #define AUDIT_OFF 0 @@ -965,6 +968,9 @@ static int __init audit_init(void) { int i; + if (audit_initialized == AUDIT_DISABLED) + return 0; + printk(KERN_INFO "audit: initializing netlink socket (%s)\n", audit_default ? "enabled" : "disabled"); audit_sock = netlink_kernel_create(&init_net, NETLINK_AUDIT, 0, @@ -976,7 +982,7 @@ static int __init audit_init(void) skb_queue_head_init(&audit_skb_queue); skb_queue_head_init(&audit_skb_hold_queue); - audit_initialized = 1; + audit_initialized = AUDIT_INITIALIZED; audit_enabled = audit_default; audit_ever_enabled |= !!audit_default; @@ -999,13 +1005,21 @@ __initcall(audit_init); static int __init audit_enable(char *str) { audit_default = !!simple_strtol(str, NULL, 0); - printk(KERN_INFO "audit: %s%s\n", - audit_default ? "enabled" : "disabled", - audit_initialized ? "" : " (after initialization)"); - if (audit_initialized) { + if (!audit_default) + audit_initialized = AUDIT_DISABLED; + + printk(KERN_INFO "audit: %s", audit_default ? "enabled" : "disabled"); + + if (audit_initialized == AUDIT_INITIALIZED) { audit_enabled = audit_default; audit_ever_enabled |= !!audit_default; + } else if (audit_initialized == AUDIT_UNINITIALIZED) { + printk(" (after initialization)"); + } else { + printk(" (until reboot)"); } + printk("\n"); + return 1; } @@ -1146,7 +1160,7 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, int reserve; unsigned long timeout_start = jiffies; - if (!audit_initialized) + if (audit_initialized != AUDIT_INITIALIZED) return NULL; if (unlikely(audit_filter_type(type))) -- cgit v1.2.3 From a64e64944f4b8ce3288519555dbaa0232414b8ac Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 12 Nov 2008 18:37:41 -0500 Subject: [PATCH] return records for fork() both to child and parent Signed-off-by: Al Viro --- include/linux/audit.h | 2 ++ kernel/auditsc.c | 17 +++++++++++++++++ kernel/fork.c | 1 + 3 files changed, 20 insertions(+) (limited to 'kernel') diff --git a/include/linux/audit.h b/include/linux/audit.h index 6272a395d43c..1b2a6a5c1876 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -391,6 +391,7 @@ extern int audit_classify_arch(int arch); #ifdef CONFIG_AUDITSYSCALL /* These are defined in auditsc.c */ /* Public API */ +extern void audit_finish_fork(struct task_struct *child); extern int audit_alloc(struct task_struct *task); extern void audit_free(struct task_struct *task); extern void audit_syscall_entry(int arch, @@ -504,6 +505,7 @@ static inline int audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat) extern int audit_n_rules; extern int audit_signals; #else +#define audit_finish_fork(t) #define audit_alloc(t) ({ 0; }) #define audit_free(t) do { ; } while (0) #define audit_syscall_entry(ta,a,b,c,d,e) do { ; } while (0) diff --git a/kernel/auditsc.c b/kernel/auditsc.c index cf5bc2f5f9c3..de8468050afa 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1548,6 +1548,23 @@ void audit_syscall_entry(int arch, int major, context->ppid = 0; } +void audit_finish_fork(struct task_struct *child) +{ + struct audit_context *ctx = current->audit_context; + struct audit_context *p = child->audit_context; + if (!p || !ctx || !ctx->auditable) + return; + p->arch = ctx->arch; + p->major = ctx->major; + memcpy(p->argv, ctx->argv, sizeof(ctx->argv)); + p->ctime = ctx->ctime; + p->dummy = ctx->dummy; + p->auditable = ctx->auditable; + p->in_syscall = ctx->in_syscall; + p->filterkey = kstrdup(ctx->filterkey, GFP_KERNEL); + p->ppid = current->pid; +} + /** * audit_syscall_exit - deallocate audit context after a system call * @tsk: task being audited diff --git a/kernel/fork.c b/kernel/fork.c index 2a372a0e206f..8d6a7dd9282b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1398,6 +1398,7 @@ long do_fork(unsigned long clone_flags, init_completion(&vfork); } + audit_finish_fork(p); tracehook_report_clone(trace, regs, clone_flags, nr, p); /* -- cgit v1.2.3 From 7f0ed77d241b60f70136f15b8eef30a3de1fa249 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 1 Dec 2008 14:16:06 -0800 Subject: [patch 1/1] audit: remove excess kernel-doc Delete excess kernel-doc notation in kernel/auditsc.c: Warning(linux-2.6.27-git10//kernel/auditsc.c:1481): Excess function parameter or struct member 'tsk' description in 'audit_syscall_entry' Warning(linux-2.6.27-git10//kernel/auditsc.c:1564): Excess function parameter or struct member 'tsk' description in 'audit_syscall_exit' Signed-off-by: Randy Dunlap Cc: Al Viro Cc: Eric Paris Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- kernel/auditsc.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel') diff --git a/kernel/auditsc.c b/kernel/auditsc.c index de8468050afa..0a13d6895494 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1459,7 +1459,6 @@ void audit_free(struct task_struct *tsk) /** * audit_syscall_entry - fill in an audit record at syscall entry - * @tsk: task being audited * @arch: architecture type * @major: major syscall type (function) * @a1: additional syscall register 1 @@ -1567,7 +1566,6 @@ void audit_finish_fork(struct task_struct *child) /** * audit_syscall_exit - deallocate audit context after a system call - * @tsk: task being audited * @valid: success/failure flag * @return_code: syscall return value * -- cgit v1.2.3 From 48887e63d6e057543067327da6b091297f7fe645 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 6 Dec 2008 01:05:50 -0500 Subject: [PATCH] fix broken timestamps in AVC generated by kernel threads Timestamp in audit_context is valid only if ->in_syscall is set. Signed-off-by: Al Viro --- include/linux/audit.h | 4 ++-- kernel/audit.c | 4 +--- kernel/auditsc.c | 5 ++++- 3 files changed, 7 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/include/linux/audit.h b/include/linux/audit.h index 1b2a6a5c1876..8f0672d13eb1 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -435,7 +435,7 @@ static inline void audit_ptrace(struct task_struct *t) /* Private API (for audit.c only) */ extern unsigned int audit_serial(void); -extern void auditsc_get_stamp(struct audit_context *ctx, +extern int auditsc_get_stamp(struct audit_context *ctx, struct timespec *t, unsigned int *serial); extern int audit_set_loginuid(struct task_struct *task, uid_t loginuid); #define audit_get_loginuid(t) ((t)->loginuid) @@ -518,7 +518,7 @@ extern int audit_signals; #define audit_inode(n,d) do { ; } while (0) #define audit_inode_child(d,i,p) do { ; } while (0) #define audit_core_dumps(i) do { ; } while (0) -#define auditsc_get_stamp(c,t,s) do { BUG(); } while (0) +#define auditsc_get_stamp(c,t,s) (0) #define audit_get_loginuid(t) (-1) #define audit_get_sessionid(t) (-1) #define audit_log_task_context(b) do { ; } while (0) diff --git a/kernel/audit.c b/kernel/audit.c index d8646c23b427..ce6d8ea3131e 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1121,9 +1121,7 @@ unsigned int audit_serial(void) static inline void audit_get_stamp(struct audit_context *ctx, struct timespec *t, unsigned int *serial) { - if (ctx) - auditsc_get_stamp(ctx, t, serial); - else { + if (!ctx || !auditsc_get_stamp(ctx, t, serial)) { *t = CURRENT_TIME; *serial = audit_serial(); } diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 0a13d6895494..2a3f0afc4d2a 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1957,15 +1957,18 @@ EXPORT_SYMBOL_GPL(__audit_inode_child); * * Also sets the context as auditable. */ -void auditsc_get_stamp(struct audit_context *ctx, +int auditsc_get_stamp(struct audit_context *ctx, struct timespec *t, unsigned int *serial) { + if (!ctx->in_syscall) + return 0; if (!ctx->serial) ctx->serial = audit_serial(); t->tv_sec = ctx->ctime.tv_sec; t->tv_nsec = ctx->ctime.tv_nsec; *serial = ctx->serial; ctx->auditable = 1; + return 1; } /* global counter which is incremented every time something logs in */ -- cgit v1.2.3 From 9a2bd244e18ffbb96c8b783210fda4eded7c7e6f Mon Sep 17 00:00:00 2001 From: Brian King Date: Tue, 9 Dec 2008 08:47:00 -0600 Subject: sched: CPU remove deadlock fix Impact: fix possible deadlock in CPU hot-remove path This patch fixes a possible deadlock scenario in the CPU remove path. migration_call grabs rq->lock, then wakes up everything on rq->migration_queue with the lock held. Then one of the tasks on the migration queue ends up calling tg_shares_up which then also tries to acquire the same rq->lock. [c000000058eab2e0] c000000000502078 ._spin_lock_irqsave+0x98/0xf0 [c000000058eab370] c00000000008011c .tg_shares_up+0x10c/0x20c [c000000058eab430] c00000000007867c .walk_tg_tree+0xc4/0xfc [c000000058eab4d0] c0000000000840c8 .try_to_wake_up+0xb0/0x3c4 [c000000058eab590] c0000000000799a0 .__wake_up_common+0x6c/0xe0 [c000000058eab640] c00000000007ada4 .complete+0x54/0x80 [c000000058eab6e0] c000000000509fa8 .migration_call+0x5fc/0x6f8 [c000000058eab7c0] c000000000504074 .notifier_call_chain+0x68/0xe0 [c000000058eab860] c000000000506568 ._cpu_down+0x2b0/0x3f4 [c000000058eaba60] c000000000506750 .cpu_down+0xa4/0x108 [c000000058eabb10] c000000000507e54 .store_online+0x44/0xa8 [c000000058eabba0] c000000000396260 .sysdev_store+0x3c/0x50 [c000000058eabc10] c0000000001a39b8 .sysfs_write_file+0x124/0x18c [c000000058eabcd0] c00000000013061c .vfs_write+0xd0/0x1bc [c000000058eabd70] c0000000001308a4 .sys_write+0x68/0x114 [c000000058eabe30] c0000000000086b4 syscall_exit+0x0/0x40 Signed-off-by: Brian King Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index b7480fb5c3dc..e4bb1dd7b308 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -6587,7 +6587,9 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) req = list_entry(rq->migration_queue.next, struct migration_req, list); list_del_init(&req->list); + spin_unlock_irq(&rq->lock); complete(&req->done); + spin_lock_irq(&rq->lock); } spin_unlock_irq(&rq->lock); break; -- cgit v1.2.3 From fbb5b7ae4b442f1923513dc6165a66c7a7f29073 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Tue, 9 Dec 2008 13:14:10 -0800 Subject: relayfs: fix infinite loop with splice() Running kmemtraced, which uses splice() on relayfs, causes a hard lock on x86-64 SMP. As described by Tom Zanussi: It looks like you hit the same problem as described here: commit 8191ecd1d14c6914c660dfa007154860a7908857 splice: fix infinite loop in generic_file_splice_read() relay uses the same loop but it never got noticed or fixed. Cc: Mathieu Desnoyers Tested-by: Pekka Enberg Signed-off-by: Tom Zanussi Signed-off-by: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/relay.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/relay.c b/kernel/relay.c index 32b0befdcb6a..09ac2008f77b 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -1317,12 +1317,9 @@ static ssize_t relay_file_splice_read(struct file *in, if (ret < 0) break; else if (!ret) { - if (spliced) - break; - if (flags & SPLICE_F_NONBLOCK) { + if (flags & SPLICE_F_NONBLOCK) ret = -EAGAIN; - break; - } + break; } *ppos += ret; -- cgit v1.2.3 From 9c24624727f6d6c460e45762a408ca5f5b9b8ef2 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 9 Dec 2008 13:14:27 -0800 Subject: KSYM_SYMBOL_LEN fixes Miles Lane tailing /sys files hit a BUG which Pekka Enberg has tracked to my 966c8c12dc9e77f931e2281ba25d2f0244b06949 sprint_symbol(): use less stack exposing a bug in slub's list_locations() - kallsyms_lookup() writes a 0 to namebuf[KSYM_NAME_LEN-1], but that was beyond the end of page provided. The 100 slop which list_locations() allows at end of page looks roughly enough for all the other stuff it might print after the symbol before it checks again: break out KSYM_SYMBOL_LEN earlier than before. Latencytop and ftrace and are using KSYM_NAME_LEN buffers where they need KSYM_SYMBOL_LEN buffers, and vmallocinfo a 2*KSYM_NAME_LEN buffer where it wants a KSYM_SYMBOL_LEN buffer: fix those before anyone copies them. [akpm@linux-foundation.org: ftrace.h needs module.h] Signed-off-by: Hugh Dickins Cc: Christoph Lameter Cc Miles Lane Acked-by: Pekka Enberg Acked-by: Steven Rostedt Acked-by: Frederic Weisbecker Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 2 +- include/linux/ftrace.h | 3 ++- kernel/latencytop.c | 2 +- mm/slub.c | 2 +- mm/vmalloc.c | 2 +- 5 files changed, 6 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/fs/proc/base.c b/fs/proc/base.c index 486cf3fe7139..d4677603c889 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -371,7 +371,7 @@ static int lstats_show_proc(struct seq_file *m, void *v) task->latency_record[i].time, task->latency_record[i].max); for (q = 0; q < LT_BACKTRACEDEPTH; q++) { - char sym[KSYM_NAME_LEN]; + char sym[KSYM_SYMBOL_LEN]; char *c; if (!task->latency_record[i].backtrace[q]) break; diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 703eb53cfa2b..9c5bc6be2b09 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #ifdef CONFIG_FUNCTION_TRACER @@ -231,7 +232,7 @@ ftrace_init_module(unsigned long *start, unsigned long *end) { } struct boot_trace { pid_t caller; - char func[KSYM_NAME_LEN]; + char func[KSYM_SYMBOL_LEN]; int result; unsigned long long duration; /* usecs */ ktime_t calltime; diff --git a/kernel/latencytop.c b/kernel/latencytop.c index 5e7b45c56923..449db466bdbc 100644 --- a/kernel/latencytop.c +++ b/kernel/latencytop.c @@ -191,7 +191,7 @@ static int lstats_show(struct seq_file *m, void *v) latency_record[i].time, latency_record[i].max); for (q = 0; q < LT_BACKTRACEDEPTH; q++) { - char sym[KSYM_NAME_LEN]; + char sym[KSYM_SYMBOL_LEN]; char *c; if (!latency_record[i].backtrace[q]) break; diff --git a/mm/slub.c b/mm/slub.c index 749588a50a5a..a2cd47d89e0a 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3597,7 +3597,7 @@ static int list_locations(struct kmem_cache *s, char *buf, for (i = 0; i < t.count; i++) { struct location *l = &t.loc[i]; - if (len > PAGE_SIZE - 100) + if (len > PAGE_SIZE - KSYM_SYMBOL_LEN - 100) break; len += sprintf(buf + len, "%7ld ", l->count); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index f3f6e0758562..1ddb77ba3995 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1717,7 +1717,7 @@ static int s_show(struct seq_file *m, void *p) v->addr, v->addr + v->size, v->size); if (v->caller) { - char buff[2 * KSYM_NAME_LEN]; + char buff[KSYM_SYMBOL_LEN]; seq_putc(m, ' '); sprint_symbol(buff, (unsigned long)v->caller); -- cgit v1.2.3 From b88ed20594db2c685555b68c52b693b75738b2f5 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 10 Dec 2008 20:48:52 +0000 Subject: fix mapping_writably_mapped() Lee Schermerhorn noticed yesterday that I broke the mapping_writably_mapped test in 2.6.7! Bad bad bug, good good find. The i_mmap_writable count must be incremented for VM_SHARED (just as i_writecount is for VM_DENYWRITE, but while holding the i_mmap_lock) when dup_mmap() copies the vma for fork: it has its own more optimal version of __vma_link_file(), and I missed this out. So the count was later going down to 0 (dangerous) when one end unmapped, then wrapping negative (inefficient) when the other end unmapped. The only impact on x86 would have been that setting a mandatory lock on a file which has at some time been opened O_RDWR and mapped MAP_SHARED (but not necessarily PROT_WRITE) across a fork, might fail with -EAGAIN when it should succeed, or succeed when it should fail. But those architectures which rely on flush_dcache_page() to flush userspace modifications back into the page before the kernel reads it, may in some cases have skipped the flush after such a fork - though any repetitive test will soon wrap the count negative, in which case it will flush_dcache_page() unnecessarily. Fix would be a two-liner, but mapping variable added, and comment moved. Reported-by: Lee Schermerhorn Signed-off-by: Hugh Dickins Signed-off-by: Linus Torvalds --- kernel/fork.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/fork.c b/kernel/fork.c index 8d6a7dd9282b..495da2e9a8b4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -315,17 +315,20 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) file = tmp->vm_file; if (file) { struct inode *inode = file->f_path.dentry->d_inode; + struct address_space *mapping = file->f_mapping; + get_file(file); if (tmp->vm_flags & VM_DENYWRITE) atomic_dec(&inode->i_writecount); - - /* insert tmp into the share list, just after mpnt */ - spin_lock(&file->f_mapping->i_mmap_lock); + spin_lock(&mapping->i_mmap_lock); + if (tmp->vm_flags & VM_SHARED) + mapping->i_mmap_writable++; tmp->vm_truncate_count = mpnt->vm_truncate_count; - flush_dcache_mmap_lock(file->f_mapping); + flush_dcache_mmap_lock(mapping); + /* insert tmp into the share list, just after mpnt */ vma_prio_tree_add(tmp, mpnt); - flush_dcache_mmap_unlock(file->f_mapping); - spin_unlock(&file->f_mapping->i_mmap_lock); + flush_dcache_mmap_unlock(mapping); + spin_unlock(&mapping->i_mmap_lock); } /* -- cgit v1.2.3