summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2010-02-26 09:18:32 +0100
committerIngo Molnar <mingo@elte.hu>2010-02-26 09:18:32 +0100
commit64b9fb5704a479d98a59f2a1d45d3331a8f847f8 (patch)
tree2b1052b05fa7615c817894bc9802bc5bb2af7ac1 /kernel
parent83f0d53993b2967e54186468b0fc4321447f68f1 (diff)
parent60b341b778cc2929df16c0a504c91621b3c6a4ad (diff)
downloadlinux-64b9fb5704a479d98a59f2a1d45d3331a8f847f8.tar.bz2
Merge commit 'v2.6.33' into tracing/core
Conflicts: scripts/recordmcount.pl Merge reason: Merge up to v2.6.33. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c8
-rw-r--r--kernel/cpu.c10
-rw-r--r--kernel/cred.c2
-rw-r--r--kernel/fork.c15
-rw-r--r--kernel/futex.c57
-rw-r--r--kernel/hw_breakpoint.c58
-rw-r--r--kernel/kexec.c4
-rw-r--r--kernel/kfifo.c111
-rw-r--r--kernel/kgdb.c9
-rw-r--r--kernel/kmod.c12
-rw-r--r--kernel/lockdep.c2
-rw-r--r--kernel/module.c17
-rw-r--r--kernel/panic.c3
-rw-r--r--kernel/perf_event.c16
-rw-r--r--kernel/printk.c1
-rw-r--r--kernel/sched.c44
-rw-r--r--kernel/sched_fair.c2
-rw-r--r--kernel/signal.c3
-rw-r--r--kernel/smp.c2
-rw-r--r--kernel/softirq.c15
-rw-r--r--kernel/softlockup.c15
-rw-r--r--kernel/sys.c2
-rw-r--r--kernel/time/clockevents.c3
-rw-r--r--kernel/time/clocksource.c18
-rw-r--r--kernel/time/timekeeping.c2
-rw-r--r--kernel/timer.c3
-rw-r--r--kernel/trace/Kconfig4
-rw-r--r--kernel/trace/ftrace.c6
-rw-r--r--kernel/trace/ring_buffer.c28
-rw-r--r--kernel/trace/trace.c5
-rw-r--r--kernel/trace/trace_events_filter.c29
-rw-r--r--kernel/trace/trace_kprobe.c2
-rw-r--r--kernel/trace/trace_stack.c24
33 files changed, 372 insertions, 160 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 0249f4be9b5c..aa3bee566446 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2468,7 +2468,6 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
/* make sure l doesn't vanish out from under us */
down_write(&l->mutex);
mutex_unlock(&cgrp->pidlist_mutex);
- l->use_count++;
return l;
}
}
@@ -2937,14 +2936,17 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
for_each_subsys(root, ss) {
struct cgroup_subsys_state *css = ss->create(ss, cgrp);
+
if (IS_ERR(css)) {
err = PTR_ERR(css);
goto err_destroy;
}
init_cgroup_css(css, ss, cgrp);
- if (ss->use_id)
- if (alloc_css_id(ss, parent, cgrp))
+ if (ss->use_id) {
+ err = alloc_css_id(ss, parent, cgrp);
+ if (err)
goto err_destroy;
+ }
/* At error, ->destroy() callback has to free assigned ID. */
}
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 1c8ddd6ee940..677f25376a38 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -151,13 +151,13 @@ static inline void check_for_tasks(int cpu)
write_lock_irq(&tasklist_lock);
for_each_process(p) {
- if (task_cpu(p) == cpu &&
+ if (task_cpu(p) == cpu && p->state == TASK_RUNNING &&
(!cputime_eq(p->utime, cputime_zero) ||
!cputime_eq(p->stime, cputime_zero)))
- printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d\
- (state = %ld, flags = %x) \n",
- p->comm, task_pid_nr(p), cpu,
- p->state, p->flags);
+ printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d "
+ "(state = %ld, flags = %x)\n",
+ p->comm, task_pid_nr(p), cpu,
+ p->state, p->flags);
}
write_unlock_irq(&tasklist_lock);
}
diff --git a/kernel/cred.c b/kernel/cred.c
index dd76cfe5f5b0..1ed8ca18790c 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -224,7 +224,7 @@ struct cred *cred_alloc_blank(void)
#ifdef CONFIG_KEYS
new->tgcred = kzalloc(sizeof(*new->tgcred), GFP_KERNEL);
if (!new->tgcred) {
- kfree(new);
+ kmem_cache_free(cred_jar, new);
return NULL;
}
atomic_set(&new->tgcred->usage, 1);
diff --git a/kernel/fork.c b/kernel/fork.c
index 5b2959b3ffc2..f88bd984df35 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1241,21 +1241,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
/* Need tasklist lock for parent etc handling! */
write_lock_irq(&tasklist_lock);
- /*
- * The task hasn't been attached yet, so its cpus_allowed mask will
- * not be changed, nor will its assigned CPU.
- *
- * The cpus_allowed mask of the parent may have changed after it was
- * copied first time - so re-copy it here, then check the child's CPU
- * to ensure it is on a valid CPU (and if not, just force it back to
- * parent's CPU). This avoids alot of nasty races.
- */
- p->cpus_allowed = current->cpus_allowed;
- p->rt.nr_cpus_allowed = current->rt.nr_cpus_allowed;
- if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) ||
- !cpu_online(task_cpu(p))))
- set_task_cpu(p, smp_processor_id());
-
/* CLONE_PARENT re-uses the old parent */
if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
p->real_parent = current->real_parent;
diff --git a/kernel/futex.c b/kernel/futex.c
index 8e3c3ffe1b9a..e7a35f1039e7 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -203,8 +203,6 @@ static void drop_futex_key_refs(union futex_key *key)
* @uaddr: virtual address of the futex
* @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED
* @key: address where result is stored.
- * @rw: mapping needs to be read/write (values: VERIFY_READ,
- * VERIFY_WRITE)
*
* Returns a negative error code or 0
* The key words are stored in *key on success.
@@ -216,7 +214,7 @@ static void drop_futex_key_refs(union futex_key *key)
* lock_page() might sleep, the caller should not hold a spinlock.
*/
static int
-get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
+get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key)
{
unsigned long address = (unsigned long)uaddr;
struct mm_struct *mm = current->mm;
@@ -239,7 +237,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
* but access_ok() should be faster than find_vma()
*/
if (!fshared) {
- if (unlikely(!access_ok(rw, uaddr, sizeof(u32))))
+ if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))))
return -EFAULT;
key->private.mm = mm;
key->private.address = address;
@@ -248,7 +246,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
}
again:
- err = get_user_pages_fast(address, 1, rw == VERIFY_WRITE, &page);
+ err = get_user_pages_fast(address, 1, 1, &page);
if (err < 0)
return err;
@@ -532,8 +530,25 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
return -EINVAL;
WARN_ON(!atomic_read(&pi_state->refcount));
- WARN_ON(pid && pi_state->owner &&
- pi_state->owner->pid != pid);
+
+ /*
+ * When pi_state->owner is NULL then the owner died
+ * and another waiter is on the fly. pi_state->owner
+ * is fixed up by the task which acquires
+ * pi_state->rt_mutex.
+ *
+ * We do not check for pid == 0 which can happen when
+ * the owner died and robust_list_exit() cleared the
+ * TID.
+ */
+ if (pid && pi_state->owner) {
+ /*
+ * Bail out if user space manipulated the
+ * futex value.
+ */
+ if (pid != task_pid_vnr(pi_state->owner))
+ return -EINVAL;
+ }
atomic_inc(&pi_state->refcount);
*ps = pi_state;
@@ -760,6 +775,13 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
if (!pi_state)
return -EINVAL;
+ /*
+ * If current does not own the pi_state then the futex is
+ * inconsistent and user space fiddled with the futex value.
+ */
+ if (pi_state->owner != current)
+ return -EINVAL;
+
raw_spin_lock(&pi_state->pi_mutex.wait_lock);
new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
@@ -867,7 +889,7 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
if (!bitset)
return -EINVAL;
- ret = get_futex_key(uaddr, fshared, &key, VERIFY_READ);
+ ret = get_futex_key(uaddr, fshared, &key);
if (unlikely(ret != 0))
goto out;
@@ -913,10 +935,10 @@ futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
int ret, op_ret;
retry:
- ret = get_futex_key(uaddr1, fshared, &key1, VERIFY_READ);
+ ret = get_futex_key(uaddr1, fshared, &key1);
if (unlikely(ret != 0))
goto out;
- ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_WRITE);
+ ret = get_futex_key(uaddr2, fshared, &key2);
if (unlikely(ret != 0))
goto out_put_key1;
@@ -1175,11 +1197,10 @@ retry:
pi_state = NULL;
}
- ret = get_futex_key(uaddr1, fshared, &key1, VERIFY_READ);
+ ret = get_futex_key(uaddr1, fshared, &key1);
if (unlikely(ret != 0))
goto out;
- ret = get_futex_key(uaddr2, fshared, &key2,
- requeue_pi ? VERIFY_WRITE : VERIFY_READ);
+ ret = get_futex_key(uaddr2, fshared, &key2);
if (unlikely(ret != 0))
goto out_put_key1;
@@ -1738,7 +1759,7 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, int fshared,
*/
retry:
q->key = FUTEX_KEY_INIT;
- ret = get_futex_key(uaddr, fshared, &q->key, VERIFY_READ);
+ ret = get_futex_key(uaddr, fshared, &q->key);
if (unlikely(ret != 0))
return ret;
@@ -1904,7 +1925,7 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared,
q.requeue_pi_key = NULL;
retry:
q.key = FUTEX_KEY_INIT;
- ret = get_futex_key(uaddr, fshared, &q.key, VERIFY_WRITE);
+ ret = get_futex_key(uaddr, fshared, &q.key);
if (unlikely(ret != 0))
goto out;
@@ -1974,7 +1995,7 @@ retry_private:
/* Unqueue and drop the lock */
unqueue_me_pi(&q);
- goto out;
+ goto out_put_key;
out_unlock_put_key:
queue_unlock(&q, hb);
@@ -2023,7 +2044,7 @@ retry:
if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current))
return -EPERM;
- ret = get_futex_key(uaddr, fshared, &key, VERIFY_WRITE);
+ ret = get_futex_key(uaddr, fshared, &key);
if (unlikely(ret != 0))
goto out;
@@ -2215,7 +2236,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
rt_waiter.task = NULL;
key2 = FUTEX_KEY_INIT;
- ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_WRITE);
+ ret = get_futex_key(uaddr2, fshared, &key2);
if (unlikely(ret != 0))
goto out;
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index 50dbd5999588..967e66143e11 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -243,38 +243,70 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
* ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *))
* + max(per_cpu(nr_task_bp_pinned, *))) < HBP_NUM
*/
-int reserve_bp_slot(struct perf_event *bp)
+static int __reserve_bp_slot(struct perf_event *bp)
{
struct bp_busy_slots slots = {0};
- int ret = 0;
-
- mutex_lock(&nr_bp_mutex);
fetch_bp_busy_slots(&slots, bp);
/* Flexible counters need to keep at least one slot */
- if (slots.pinned + (!!slots.flexible) == HBP_NUM) {
- ret = -ENOSPC;
- goto end;
- }
+ if (slots.pinned + (!!slots.flexible) == HBP_NUM)
+ return -ENOSPC;
toggle_bp_slot(bp, true);
-end:
+ return 0;
+}
+
+int reserve_bp_slot(struct perf_event *bp)
+{
+ int ret;
+
+ mutex_lock(&nr_bp_mutex);
+
+ ret = __reserve_bp_slot(bp);
+
mutex_unlock(&nr_bp_mutex);
return ret;
}
+static void __release_bp_slot(struct perf_event *bp)
+{
+ toggle_bp_slot(bp, false);
+}
+
void release_bp_slot(struct perf_event *bp)
{
mutex_lock(&nr_bp_mutex);
- toggle_bp_slot(bp, false);
+ __release_bp_slot(bp);
mutex_unlock(&nr_bp_mutex);
}
+/*
+ * Allow the kernel debugger to reserve breakpoint slots without
+ * taking a lock using the dbg_* variant of for the reserve and
+ * release breakpoint slots.
+ */
+int dbg_reserve_bp_slot(struct perf_event *bp)
+{
+ if (mutex_is_locked(&nr_bp_mutex))
+ return -1;
+
+ return __reserve_bp_slot(bp);
+}
+
+int dbg_release_bp_slot(struct perf_event *bp)
+{
+ if (mutex_is_locked(&nr_bp_mutex))
+ return -1;
+
+ __release_bp_slot(bp);
+
+ return 0;
+}
int register_perf_hw_breakpoint(struct perf_event *bp)
{
@@ -296,6 +328,10 @@ int register_perf_hw_breakpoint(struct perf_event *bp)
if (!bp->attr.disabled || !bp->overflow_handler)
ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task);
+ /* if arch_validate_hwbkpt_settings() fails then release bp slot */
+ if (ret)
+ release_bp_slot(bp);
+
return ret;
}
@@ -324,8 +360,8 @@ EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr)
{
u64 old_addr = bp->attr.bp_addr;
+ u64 old_len = bp->attr.bp_len;
int old_type = bp->attr.bp_type;
- int old_len = bp->attr.bp_len;
int err = 0;
perf_event_disable(bp);
diff --git a/kernel/kexec.c b/kernel/kexec.c
index a9a93d9ee7a7..ef077fb73155 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -32,6 +32,7 @@
#include <linux/console.h>
#include <linux/vmalloc.h>
#include <linux/swap.h>
+#include <linux/kmsg_dump.h>
#include <asm/page.h>
#include <asm/uaccess.h>
@@ -1074,6 +1075,9 @@ void crash_kexec(struct pt_regs *regs)
if (mutex_trylock(&kexec_mutex)) {
if (kexec_crash_image) {
struct pt_regs fixed_regs;
+
+ kmsg_dump(KMSG_DUMP_KEXEC);
+
crash_setup_regs(&fixed_regs, regs);
crash_save_vmcoreinfo();
machine_crash_shutdown(&fixed_regs);
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index e92d519f93b1..35edbe22e9a9 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -28,7 +28,7 @@
#include <linux/log2.h>
#include <linux/uaccess.h>
-static void _kfifo_init(struct kfifo *fifo, unsigned char *buffer,
+static void _kfifo_init(struct kfifo *fifo, void *buffer,
unsigned int size)
{
fifo->buffer = buffer;
@@ -41,10 +41,10 @@ static void _kfifo_init(struct kfifo *fifo, unsigned char *buffer,
* kfifo_init - initialize a FIFO using a preallocated buffer
* @fifo: the fifo to assign the buffer
* @buffer: the preallocated buffer to be used.
- * @size: the size of the internal buffer, this have to be a power of 2.
+ * @size: the size of the internal buffer, this has to be a power of 2.
*
*/
-void kfifo_init(struct kfifo *fifo, unsigned char *buffer, unsigned int size)
+void kfifo_init(struct kfifo *fifo, void *buffer, unsigned int size)
{
/* size must be a power of 2 */
BUG_ON(!is_power_of_2(size));
@@ -80,7 +80,7 @@ int kfifo_alloc(struct kfifo *fifo, unsigned int size, gfp_t gfp_mask)
buffer = kmalloc(size, gfp_mask);
if (!buffer) {
- _kfifo_init(fifo, 0, 0);
+ _kfifo_init(fifo, NULL, 0);
return -ENOMEM;
}
@@ -97,6 +97,7 @@ EXPORT_SYMBOL(kfifo_alloc);
void kfifo_free(struct kfifo *fifo)
{
kfree(fifo->buffer);
+ _kfifo_init(fifo, NULL, 0);
}
EXPORT_SYMBOL(kfifo_free);
@@ -159,8 +160,9 @@ static inline void __kfifo_out_data(struct kfifo *fifo,
memcpy(to + l, fifo->buffer, len - l);
}
-static inline unsigned int __kfifo_from_user_data(struct kfifo *fifo,
- const void __user *from, unsigned int len, unsigned int off)
+static inline int __kfifo_from_user_data(struct kfifo *fifo,
+ const void __user *from, unsigned int len, unsigned int off,
+ unsigned *lenout)
{
unsigned int l;
int ret;
@@ -177,16 +179,20 @@ static inline unsigned int __kfifo_from_user_data(struct kfifo *fifo,
/* first put the data starting from fifo->in to buffer end */
l = min(len, fifo->size - off);
ret = copy_from_user(fifo->buffer + off, from, l);
-
- if (unlikely(ret))
- return ret + len - l;
+ if (unlikely(ret)) {
+ *lenout = ret;
+ return -EFAULT;
+ }
+ *lenout = l;
/* then put the rest (if any) at the beginning of the buffer */
- return copy_from_user(fifo->buffer, from + l, len - l);
+ ret = copy_from_user(fifo->buffer, from + l, len - l);
+ *lenout += ret ? ret : len - l;
+ return ret ? -EFAULT : 0;
}
-static inline unsigned int __kfifo_to_user_data(struct kfifo *fifo,
- void __user *to, unsigned int len, unsigned int off)
+static inline int __kfifo_to_user_data(struct kfifo *fifo,
+ void __user *to, unsigned int len, unsigned int off, unsigned *lenout)
{
unsigned int l;
int ret;
@@ -203,12 +209,21 @@ static inline unsigned int __kfifo_to_user_data(struct kfifo *fifo,
/* first get the data from fifo->out until the end of the buffer */
l = min(len, fifo->size - off);
ret = copy_to_user(to, fifo->buffer + off, l);
-
- if (unlikely(ret))
- return ret + len - l;
+ *lenout = l;
+ if (unlikely(ret)) {
+ *lenout -= ret;
+ return -EFAULT;
+ }
/* then get the rest (if any) from the beginning of the buffer */
- return copy_to_user(to + l, fifo->buffer, len - l);
+ len -= l;
+ ret = copy_to_user(to + l, fifo->buffer, len);
+ if (unlikely(ret)) {
+ *lenout += len - ret;
+ return -EFAULT;
+ }
+ *lenout += len;
+ return 0;
}
unsigned int __kfifo_in_n(struct kfifo *fifo,
@@ -235,7 +250,7 @@ EXPORT_SYMBOL(__kfifo_in_n);
* Note that with only one concurrent reader and one concurrent
* writer, you don't need extra locking to use these functions.
*/
-unsigned int kfifo_in(struct kfifo *fifo, const unsigned char *from,
+unsigned int kfifo_in(struct kfifo *fifo, const void *from,
unsigned int len)
{
len = min(kfifo_avail(fifo), len);
@@ -277,7 +292,7 @@ EXPORT_SYMBOL(__kfifo_out_n);
* Note that with only one concurrent reader and one concurrent
* writer, you don't need extra locking to use these functions.
*/
-unsigned int kfifo_out(struct kfifo *fifo, unsigned char *to, unsigned int len)
+unsigned int kfifo_out(struct kfifo *fifo, void *to, unsigned int len)
{
len = min(kfifo_len(fifo), len);
@@ -288,6 +303,27 @@ unsigned int kfifo_out(struct kfifo *fifo, unsigned char *to, unsigned int len)
}
EXPORT_SYMBOL(kfifo_out);
+/**
+ * kfifo_out_peek - copy some data from the FIFO, but do not remove it
+ * @fifo: the fifo to be used.
+ * @to: where the data must be copied.
+ * @len: the size of the destination buffer.
+ * @offset: offset into the fifo
+ *
+ * This function copies at most @len bytes at @offset from the FIFO
+ * into the @to buffer and returns the number of copied bytes.
+ * The data is not removed from the FIFO.
+ */
+unsigned int kfifo_out_peek(struct kfifo *fifo, void *to, unsigned int len,
+ unsigned offset)
+{
+ len = min(kfifo_len(fifo), len + offset);
+
+ __kfifo_out_data(fifo, to, len, offset);
+ return len;
+}
+EXPORT_SYMBOL(kfifo_out_peek);
+
unsigned int __kfifo_out_generic(struct kfifo *fifo,
void *to, unsigned int len, unsigned int recsize,
unsigned int *total)
@@ -299,10 +335,13 @@ EXPORT_SYMBOL(__kfifo_out_generic);
unsigned int __kfifo_from_user_n(struct kfifo *fifo,
const void __user *from, unsigned int len, unsigned int recsize)
{
+ unsigned total;
+
if (kfifo_avail(fifo) < len + recsize)
return len + 1;
- return __kfifo_from_user_data(fifo, from, len, recsize);
+ __kfifo_from_user_data(fifo, from, len, recsize, &total);
+ return total;
}
EXPORT_SYMBOL(__kfifo_from_user_n);
@@ -311,20 +350,24 @@ EXPORT_SYMBOL(__kfifo_from_user_n);
* @fifo: the fifo to be used.
* @from: pointer to the data to be added.
* @len: the length of the data to be added.
+ * @total: the actual returned data length.
*
* This function copies at most @len bytes from the @from into the
- * FIFO depending and returns the number of copied bytes.
+ * FIFO depending and returns -EFAULT/0.
*
* Note that with only one concurrent reader and one concurrent
* writer, you don't need extra locking to use these functions.
*/
-unsigned int kfifo_from_user(struct kfifo *fifo,
- const void __user *from, unsigned int len)
+int kfifo_from_user(struct kfifo *fifo,
+ const void __user *from, unsigned int len, unsigned *total)
{
+ int ret;
len = min(kfifo_avail(fifo), len);
- len -= __kfifo_from_user_data(fifo, from, len, 0);
+ ret = __kfifo_from_user_data(fifo, from, len, 0, total);
+ if (ret)
+ return ret;
__kfifo_add_in(fifo, len);
- return len;
+ return 0;
}
EXPORT_SYMBOL(kfifo_from_user);
@@ -339,17 +382,17 @@ unsigned int __kfifo_to_user_n(struct kfifo *fifo,
void __user *to, unsigned int len, unsigned int reclen,
unsigned int recsize)
{
- unsigned int ret;
+ unsigned int ret, total;
if (kfifo_len(fifo) < reclen + recsize)
return len;
- ret = __kfifo_to_user_data(fifo, to, reclen, recsize);
+ ret = __kfifo_to_user_data(fifo, to, reclen, recsize, &total);
if (likely(ret == 0))
__kfifo_add_out(fifo, reclen + recsize);
- return ret;
+ return total;
}
EXPORT_SYMBOL(__kfifo_to_user_n);
@@ -358,20 +401,22 @@ EXPORT_SYMBOL(__kfifo_to_user_n);
* @fifo: the fifo to be used.
* @to: where the data must be copied.
* @len: the size of the destination buffer.
+ * @lenout: pointer to output variable with copied data
*
* This function copies at most @len bytes from the FIFO into the
- * @to buffer and returns the number of copied bytes.
+ * @to buffer and 0 or -EFAULT.
*
* Note that with only one concurrent reader and one concurrent
* writer, you don't need extra locking to use these functions.
*/
-unsigned int kfifo_to_user(struct kfifo *fifo,
- void __user *to, unsigned int len)
+int kfifo_to_user(struct kfifo *fifo,
+ void __user *to, unsigned int len, unsigned *lenout)
{
+ int ret;
len = min(kfifo_len(fifo), len);
- len -= __kfifo_to_user_data(fifo, to, len, 0);
- __kfifo_add_out(fifo, len);
- return len;
+ ret = __kfifo_to_user_data(fifo, to, len, 0, lenout);
+ __kfifo_add_out(fifo, *lenout);
+ return ret;
}
EXPORT_SYMBOL(kfifo_to_user);
diff --git a/kernel/kgdb.c b/kernel/kgdb.c
index 2eb517e23514..761fdd2b3034 100644
--- a/kernel/kgdb.c
+++ b/kernel/kgdb.c
@@ -583,6 +583,9 @@ static void kgdb_wait(struct pt_regs *regs)
smp_wmb();
atomic_set(&cpu_in_kgdb[cpu], 1);
+ /* Disable any cpu specific hw breakpoints */
+ kgdb_disable_hw_debug(regs);
+
/* Wait till primary CPU is done with debugging */
while (atomic_read(&passive_cpu_wait[cpu]))
cpu_relax();
@@ -596,7 +599,7 @@ static void kgdb_wait(struct pt_regs *regs)
/* Signal the primary CPU that we are done: */
atomic_set(&cpu_in_kgdb[cpu], 0);
- touch_softlockup_watchdog();
+ touch_softlockup_watchdog_sync();
clocksource_touch_watchdog();
local_irq_restore(flags);
}
@@ -1450,7 +1453,7 @@ acquirelock:
(kgdb_info[cpu].task &&
kgdb_info[cpu].task->pid != kgdb_sstep_pid) && --sstep_tries) {
atomic_set(&kgdb_active, -1);
- touch_softlockup_watchdog();
+ touch_softlockup_watchdog_sync();
clocksource_touch_watchdog();
local_irq_restore(flags);
@@ -1550,7 +1553,7 @@ kgdb_restore:
}
/* Free kgdb_active */
atomic_set(&kgdb_active, -1);
- touch_softlockup_watchdog();
+ touch_softlockup_watchdog_sync();
clocksource_touch_watchdog();
local_irq_restore(flags);
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 25b103190364..bf0e231d9702 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -520,13 +520,15 @@ int call_usermodehelper_pipe(char *path, char **argv, char **envp,
return -ENOMEM;
ret = call_usermodehelper_stdinpipe(sub_info, filp);
- if (ret < 0)
- goto out;
+ if (ret < 0) {
+ call_usermodehelper_freeinfo(sub_info);
+ return ret;
+ }
- return call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC);
+ ret = call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC);
+ if (ret < 0) /* Failed to execute helper, close pipe */
+ filp_close(*filp, NULL);
- out:
- call_usermodehelper_freeinfo(sub_info);
return ret;
}
EXPORT_SYMBOL(call_usermodehelper_pipe);
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 5feaddcdbe49..c62ec14609b9 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -2147,7 +2147,7 @@ check_usage_backwards(struct task_struct *curr, struct held_lock *this,
return ret;
return print_irq_inversion_bug(curr, &root, target_entry,
- this, 1, irqclass);
+ this, 0, irqclass);
}
void print_irqtrace_events(struct task_struct *curr)
diff --git a/kernel/module.c b/kernel/module.c
index e96b8ed1cb6a..f82386bd9ee9 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1010,6 +1010,12 @@ static const struct kernel_symbol *resolve_symbol(Elf_Shdr *sechdrs,
* J. Corbet <corbet@lwn.net>
*/
#if defined(CONFIG_KALLSYMS) && defined(CONFIG_SYSFS)
+
+static inline bool sect_empty(const Elf_Shdr *sect)
+{
+ return !(sect->sh_flags & SHF_ALLOC) || sect->sh_size == 0;
+}
+
struct module_sect_attr
{
struct module_attribute mattr;
@@ -1051,8 +1057,7 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect,
/* Count loaded sections and allocate structures */
for (i = 0; i < nsect; i++)
- if (sechdrs[i].sh_flags & SHF_ALLOC
- && sechdrs[i].sh_size)
+ if (!sect_empty(&sechdrs[i]))
nloaded++;
size[0] = ALIGN(sizeof(*sect_attrs)
+ nloaded * sizeof(sect_attrs->attrs[0]),
@@ -1070,9 +1075,7 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect,
sattr = &sect_attrs->attrs[0];
gattr = &sect_attrs->grp.attrs[0];
for (i = 0; i < nsect; i++) {
- if (! (sechdrs[i].sh_flags & SHF_ALLOC))
- continue;
- if (!sechdrs[i].sh_size)
+ if (sect_empty(&sechdrs[i]))
continue;
sattr->address = sechdrs[i].sh_addr;
sattr->name = kstrdup(secstrings + sechdrs[i].sh_name,
@@ -1156,7 +1159,7 @@ static void add_notes_attrs(struct module *mod, unsigned int nsect,
/* Count notes sections and allocate structures. */
notes = 0;
for (i = 0; i < nsect; i++)
- if ((sechdrs[i].sh_flags & SHF_ALLOC) &&
+ if (!sect_empty(&sechdrs[i]) &&
(sechdrs[i].sh_type == SHT_NOTE))
++notes;
@@ -1172,7 +1175,7 @@ static void add_notes_attrs(struct module *mod, unsigned int nsect,
notes_attrs->notes = notes;
nattr = &notes_attrs->attrs[0];
for (loaded = i = 0; i < nsect; ++i) {
- if (!(sechdrs[i].sh_flags & SHF_ALLOC))
+ if (sect_empty(&sechdrs[i]))
continue;
if (sechdrs[i].sh_type == SHT_NOTE) {
nattr->attr.name = mod->sect_attrs->attrs[loaded].name;
diff --git a/kernel/panic.c b/kernel/panic.c
index 5827f7b97254..c787333282b8 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -75,7 +75,6 @@ NORET_TYPE void panic(const char * fmt, ...)
dump_stack();
#endif
- kmsg_dump(KMSG_DUMP_PANIC);
/*
* If we have crashed and we have a crash kernel loaded let it handle
* everything else.
@@ -83,6 +82,8 @@ NORET_TYPE void panic(const char * fmt, ...)
*/
crash_kexec(NULL);
+ kmsg_dump(KMSG_DUMP_PANIC);
+
/*
* Note smp_send_stop is the usual smp shutdown function, which
* unfortunately means it may not be hardened to work in a panic
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 603c0d8b5df1..2ae7409bf38f 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -3259,8 +3259,6 @@ static void perf_event_task_output(struct perf_event *event,
task_event->event_id.tid = perf_event_tid(event, task);
task_event->event_id.ptid = perf_event_tid(event, current);
- task_event->event_id.time = perf_clock();
-
perf_output_put(&handle, task_event->event_id);
perf_output_end(&handle);
@@ -3268,6 +3266,9 @@ static void perf_event_task_output(struct perf_event *event,
static int perf_event_task_match(struct perf_event *event)
{
+ if (event->state < PERF_EVENT_STATE_INACTIVE)
+ return 0;
+
if (event->cpu != -1 && event->cpu != smp_processor_id())
return 0;
@@ -3297,7 +3298,7 @@ static void perf_event_task_event(struct perf_task_event *task_event)
cpuctx = &get_cpu_var(perf_cpu_context);
perf_event_task_ctx(&cpuctx->ctx, task_event);
if (!ctx)
- ctx = rcu_dereference(task_event->task->perf_event_ctxp);
+ ctx = rcu_dereference(current->perf_event_ctxp);
if (ctx)
perf_event_task_ctx(ctx, task_event);
put_cpu_var(perf_cpu_context);
@@ -3328,6 +3329,7 @@ static void perf_event_task(struct task_struct *task,
/* .ppid */
/* .tid */
/* .ptid */
+ .time = perf_clock(),
},
};
@@ -3377,6 +3379,9 @@ static void perf_event_comm_output(struct perf_event *event,
static int perf_event_comm_match(struct perf_event *event)
{
+ if (event->state < PERF_EVENT_STATE_INACTIVE)
+ return 0;
+
if (event->cpu != -1 && event->cpu != smp_processor_id())
return 0;
@@ -3494,6 +3499,9 @@ static void perf_event_mmap_output(struct perf_event *event,
static int perf_event_mmap_match(struct perf_event *event,
struct perf_mmap_event *mmap_event)
{
+ if (event->state < PERF_EVENT_STATE_INACTIVE)
+ return 0;
+
if (event->cpu != -1 && event->cpu != smp_processor_id())
return 0;
@@ -4571,7 +4579,7 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
if (attr->type >= PERF_TYPE_MAX)
return -EINVAL;
- if (attr->__reserved_1 || attr->__reserved_2)
+ if (attr->__reserved_1)
return -EINVAL;
if (attr->sample_type & ~(PERF_SAMPLE_MAX-1))
diff --git a/kernel/printk.c b/kernel/printk.c
index 17463ca2e229..1751c456b71f 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1467,6 +1467,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_unregister);
static const char const *kmsg_reasons[] = {
[KMSG_DUMP_OOPS] = "oops",
[KMSG_DUMP_PANIC] = "panic",
+ [KMSG_DUMP_KEXEC] = "kexec",
};
static const char *kmsg_to_str(enum kmsg_dump_reason reason)
diff --git a/kernel/sched.c b/kernel/sched.c
index c535cc4f6428..3a8fb30a91b1 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2320,14 +2320,12 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
}
/*
- * Called from:
+ * Gets called from 3 sites (exec, fork, wakeup), since it is called without
+ * holding rq->lock we need to ensure ->cpus_allowed is stable, this is done
+ * by:
*
- * - fork, @p is stable because it isn't on the tasklist yet
- *
- * - exec, @p is unstable, retry loop
- *
- * - wake-up, we serialize ->cpus_allowed against TASK_WAKING so
- * we should be good.
+ * exec: is unstable, retry loop
+ * fork & wake-up: serialize ->cpus_allowed against TASK_WAKING
*/
static inline
int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
@@ -2620,9 +2618,6 @@ void sched_fork(struct task_struct *p, int clone_flags)
if (p->sched_class->task_fork)
p->sched_class->task_fork(p);
-#ifdef CONFIG_SMP
- cpu = select_task_rq(p, SD_BALANCE_FORK, 0);
-#endif
set_task_cpu(p, cpu);
#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
@@ -2652,6 +2647,21 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
{
unsigned long flags;
struct rq *rq;
+ int cpu = get_cpu();
+
+#ifdef CONFIG_SMP
+ /*
+ * Fork balancing, do it here and not earlier because:
+ * - cpus_allowed can change in the fork path
+ * - any previously selected cpu might disappear through hotplug
+ *
+ * We still have TASK_WAKING but PF_STARTING is gone now, meaning
+ * ->cpus_allowed is stable, we have preemption disabled, meaning
+ * cpu_online_mask is stable.
+ */
+ cpu = select_task_rq(p, SD_BALANCE_FORK, 0);
+ set_task_cpu(p, cpu);
+#endif
rq = task_rq_lock(p, &flags);
BUG_ON(p->state != TASK_WAKING);
@@ -2665,6 +2675,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
p->sched_class->task_woken(rq, p);
#endif
task_rq_unlock(rq, &flags);
+ put_cpu();
}
#ifdef CONFIG_PREEMPT_NOTIFIERS
@@ -5530,8 +5541,11 @@ need_resched_nonpreemptible:
post_schedule(rq);
- if (unlikely(reacquire_kernel_lock(current) < 0))
+ if (unlikely(reacquire_kernel_lock(current) < 0)) {
+ prev = rq->curr;
+ switch_count = &prev->nivcsw;
goto need_resched_nonpreemptible;
+ }
preempt_enable_no_resched();
if (need_resched())
@@ -7136,14 +7150,18 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
* the ->cpus_allowed mask from under waking tasks, which would be
* possible when we change rq->lock in ttwu(), so synchronize against
* TASK_WAKING to avoid that.
+ *
+ * Make an exception for freshly cloned tasks, since cpuset namespaces
+ * might move the task about, we have to validate the target in
+ * wake_up_new_task() anyway since the cpu might have gone away.
*/
again:
- while (p->state == TASK_WAKING)
+ while (p->state == TASK_WAKING && !(p->flags & PF_STARTING))
cpu_relax();
rq = task_rq_lock(p, &flags);
- if (p->state == TASK_WAKING) {
+ if (p->state == TASK_WAKING && !(p->flags & PF_STARTING)) {
task_rq_unlock(rq, &flags);
goto again;
}
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 42ac3c9f66f6..8fe7ee81c552 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1508,7 +1508,7 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag
* If there's an idle sibling in this domain, make that
* the wake_affine target instead of the current cpu.
*/
- if (tmp->flags & SD_PREFER_SIBLING)
+ if (tmp->flags & SD_SHARE_PKG_RESOURCES)
target = select_idle_sibling(p, tmp, target);
if (target >= 0) {
diff --git a/kernel/signal.c b/kernel/signal.c
index d09692b40376..934ae5e687b9 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -979,7 +979,8 @@ static void print_fatal_signal(struct pt_regs *regs, int signr)
for (i = 0; i < 16; i++) {
unsigned char insn;
- __get_user(insn, (unsigned char *)(regs->ip + i));
+ if (get_user(insn, (unsigned char *)(regs->ip + i)))
+ break;
printk("%02x ", insn);
}
}
diff --git a/kernel/smp.c b/kernel/smp.c
index de735a6637d0..f10408422444 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -347,7 +347,7 @@ int smp_call_function_any(const struct cpumask *mask,
goto call;
/* Try for same node. */
- nodemask = cpumask_of_node(cpu);
+ nodemask = cpumask_of_node(cpu_to_node(cpu));
for (cpu = cpumask_first_and(nodemask, mask); cpu < nr_cpu_ids;
cpu = cpumask_next_and(cpu, nodemask, mask)) {
if (cpu_online(cpu))
diff --git a/kernel/softirq.c b/kernel/softirq.c
index a09502e2ef75..7c1a67ef0274 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -500,22 +500,17 @@ EXPORT_SYMBOL(tasklet_kill);
*/
/*
- * The trampoline is called when the hrtimer expires. If this is
- * called from the hrtimer interrupt then we schedule the tasklet as
- * the timer callback function expects to run in softirq context. If
- * it's called in softirq context anyway (i.e. high resolution timers
- * disabled) then the hrtimer callback is called right away.
+ * The trampoline is called when the hrtimer expires. It schedules a tasklet
+ * to run __tasklet_hrtimer_trampoline() which in turn will call the intended
+ * hrtimer callback, but from softirq context.
*/
static enum hrtimer_restart __hrtimer_tasklet_trampoline(struct hrtimer *timer)
{
struct tasklet_hrtimer *ttimer =
container_of(timer, struct tasklet_hrtimer, timer);
- if (hrtimer_is_hres_active(timer)) {
- tasklet_hi_schedule(&ttimer->tasklet);
- return HRTIMER_NORESTART;
- }
- return ttimer->function(timer);
+ tasklet_hi_schedule(&ttimer->tasklet);
+ return HRTIMER_NORESTART;
}
/*
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index d22579087e27..0d4c7898ab80 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -25,6 +25,7 @@ static DEFINE_SPINLOCK(print_lock);
static DEFINE_PER_CPU(unsigned long, softlockup_touch_ts); /* touch timestamp */
static DEFINE_PER_CPU(unsigned long, softlockup_print_ts); /* print timestamp */
static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
+static DEFINE_PER_CPU(bool, softlock_touch_sync);
static int __read_mostly did_panic;
int __read_mostly softlockup_thresh = 60;
@@ -79,6 +80,12 @@ void touch_softlockup_watchdog(void)
}
EXPORT_SYMBOL(touch_softlockup_watchdog);
+void touch_softlockup_watchdog_sync(void)
+{
+ __raw_get_cpu_var(softlock_touch_sync) = true;
+ __raw_get_cpu_var(softlockup_touch_ts) = 0;
+}
+
void touch_all_softlockup_watchdogs(void)
{
int cpu;
@@ -118,6 +125,14 @@ void softlockup_tick(void)
}
if (touch_ts == 0) {
+ if (unlikely(per_cpu(softlock_touch_sync, this_cpu))) {
+ /*
+ * If the time stamp was touched atomically
+ * make sure the scheduler tick is up to date.
+ */
+ per_cpu(softlock_touch_sync, this_cpu) = false;
+ sched_clock_tick();
+ }
__touch_softlockup_watchdog();
return;
}
diff --git a/kernel/sys.c b/kernel/sys.c
index 26a6b73a6b85..18bde979f346 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -222,6 +222,7 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who)
if (which > PRIO_USER || which < PRIO_PROCESS)
return -EINVAL;
+ rcu_read_lock();
read_lock(&tasklist_lock);
switch (which) {
case PRIO_PROCESS:
@@ -267,6 +268,7 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who)
}
out_unlock:
read_unlock(&tasklist_lock);
+ rcu_read_unlock();
return retval;
}
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 6f740d9f0948..d7395fdfb9f3 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -259,7 +259,8 @@ void clockevents_notify(unsigned long reason, void *arg)
cpu = *((int *)arg);
list_for_each_entry_safe(dev, tmp, &clockevent_devices, list) {
if (cpumask_test_cpu(cpu, dev->cpumask) &&
- cpumask_weight(dev->cpumask) == 1) {
+ cpumask_weight(dev->cpumask) == 1 &&
+ !tick_is_broadcast_device(dev)) {
BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
list_del(&dev->list);
}
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index e85c23404d34..13700833c181 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -343,7 +343,19 @@ static void clocksource_resume_watchdog(void)
{
unsigned long flags;
- spin_lock_irqsave(&watchdog_lock, flags);
+ /*
+ * We use trylock here to avoid a potential dead lock when
+ * kgdb calls this code after the kernel has been stopped with
+ * watchdog_lock held. When watchdog_lock is held we just
+ * return and accept, that the watchdog might trigger and mark
+ * the monitored clock source (usually TSC) unstable.
+ *
+ * This does not affect the other caller clocksource_resume()
+ * because at this point the kernel is UP, interrupts are
+ * disabled and nothing can hold watchdog_lock.
+ */
+ if (!spin_trylock_irqsave(&watchdog_lock, flags))
+ return;
clocksource_reset_watchdog();
spin_unlock_irqrestore(&watchdog_lock, flags);
}
@@ -458,8 +470,8 @@ void clocksource_resume(void)
* clocksource_touch_watchdog - Update watchdog
*
* Update the watchdog after exception contexts such as kgdb so as not
- * to incorrectly trip the watchdog.
- *
+ * to incorrectly trip the watchdog. This might fail when the kernel
+ * was stopped in code which holds watchdog_lock.
*/
void clocksource_touch_watchdog(void)
{
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 7faaa32fbf4f..e2ab064c6d41 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -880,6 +880,7 @@ void getboottime(struct timespec *ts)
set_normalized_timespec(ts, -boottime.tv_sec, -boottime.tv_nsec);
}
+EXPORT_SYMBOL_GPL(getboottime);
/**
* monotonic_to_bootbased - Convert the monotonic time to boot based.
@@ -889,6 +890,7 @@ void monotonic_to_bootbased(struct timespec *ts)
{
*ts = timespec_add_safe(*ts, total_sleep_time);
}
+EXPORT_SYMBOL_GPL(monotonic_to_bootbased);
unsigned long get_seconds(void)
{
diff --git a/kernel/timer.c b/kernel/timer.c
index 15533b792397..c61a7949387f 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1198,6 +1198,7 @@ void update_process_times(int user_tick)
run_local_timers();
rcu_check_callbacks(cpu, user_tick);
printk_tick();
+ perf_event_do_pending();
scheduler_tick();
run_posix_cpu_timers(p);
}
@@ -1209,8 +1210,6 @@ static void run_timer_softirq(struct softirq_action *h)
{
struct tvec_base *base = __get_cpu_var(tvec_bases);
- perf_event_do_pending();
-
hrtimer_run_pending();
if (time_after_eq(jiffies, base->timer_jiffies))
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 6c22d8a2f289..60e2ce0181ee 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -27,9 +27,7 @@ config HAVE_FUNCTION_GRAPH_TRACER
config HAVE_FUNCTION_GRAPH_FP_TEST
bool
help
- An arch may pass in a unique value (frame pointer) to both the
- entering and exiting of a function. On exit, the value is compared
- and if it does not match, then it will panic the kernel.
+ See Documentation/trace/ftrace-design.txt
config HAVE_FUNCTION_TRACE_MCOUNT_TEST
bool
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 43bec993c864..d996353473fd 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1690,7 +1690,7 @@ ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
static int ftrace_match(char *str, char *regex, int len, int type)
{
int matched = 0;
- char *ptr;
+ int slen;
switch (type) {
case MATCH_FULL:
@@ -1706,8 +1706,8 @@ static int ftrace_match(char *str, char *regex, int len, int type)
matched = 1;
break;
case MATCH_END_ONLY:
- ptr = strstr(str, regex);
- if (ptr && (ptr[len] == 0))
+ slen = strlen(str);
+ if (slen >= len && memcmp(str + slen - len, regex, len) == 0)
matched = 1;
break;
}
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 2326b04c95c4..8c1b2d290718 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -464,6 +464,8 @@ struct ring_buffer_iter {
struct ring_buffer_per_cpu *cpu_buffer;
unsigned long head;
struct buffer_page *head_page;
+ struct buffer_page *cache_reader_page;
+ unsigned long cache_read;
u64 read_stamp;
};
@@ -2716,6 +2718,8 @@ static void rb_iter_reset(struct ring_buffer_iter *iter)
iter->read_stamp = cpu_buffer->read_stamp;
else
iter->read_stamp = iter->head_page->page->time_stamp;
+ iter->cache_reader_page = cpu_buffer->reader_page;
+ iter->cache_read = cpu_buffer->read;
}
/**
@@ -2869,7 +2873,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
* Splice the empty reader page into the list around the head.
*/
reader = rb_set_head_page(cpu_buffer);
- cpu_buffer->reader_page->list.next = reader->list.next;
+ cpu_buffer->reader_page->list.next = rb_list_head(reader->list.next);
cpu_buffer->reader_page->list.prev = reader->list.prev;
/*
@@ -2906,7 +2910,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
*
* Now make the new head point back to the reader page.
*/
- reader->list.next->prev = &cpu_buffer->reader_page->list;
+ rb_list_head(reader->list.next)->prev = &cpu_buffer->reader_page->list;
rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
/* Finally update the reader page to the new head */
@@ -3060,13 +3064,22 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
struct ring_buffer_event *event;
int nr_loops = 0;
- if (ring_buffer_iter_empty(iter))
- return NULL;
-
cpu_buffer = iter->cpu_buffer;
buffer = cpu_buffer->buffer;
+ /*
+ * Check if someone performed a consuming read to
+ * the buffer. A consuming read invalidates the iterator
+ * and we need to reset the iterator in this case.
+ */
+ if (unlikely(iter->cache_read != cpu_buffer->read ||
+ iter->cache_reader_page != cpu_buffer->reader_page))
+ rb_iter_reset(iter);
+
again:
+ if (ring_buffer_iter_empty(iter))
+ return NULL;
+
/*
* We repeat when a timestamp is encountered.
* We can get multiple timestamps by nested interrupts or also
@@ -3081,6 +3094,11 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
if (rb_per_cpu_empty(cpu_buffer))
return NULL;
+ if (iter->head >= local_read(&iter->head_page->page->commit)) {
+ rb_inc_iter(iter);
+ goto again;
+ }
+
event = rb_iter_head_event(iter);
switch (event->type_len) {
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 5314c90bbc83..032c57ca6502 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1028,6 +1028,11 @@ void trace_find_cmdline(int pid, char comm[])
return;
}
+ if (WARN_ON_ONCE(pid < 0)) {
+ strcpy(comm, "<XXX>");
+ return;
+ }
+
if (pid > PID_MAX_DEFAULT) {
strcpy(comm, "<...>");
return;
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 50504cb228de..e42af9aad69f 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -211,8 +211,9 @@ static int filter_pred_pchar(struct filter_pred *pred, void *event,
{
char **addr = (char **)(event + pred->offset);
int cmp, match;
+ int len = strlen(*addr) + 1; /* including tailing '\0' */
- cmp = pred->regex.match(*addr, &pred->regex, pred->regex.field_len);
+ cmp = pred->regex.match(*addr, &pred->regex, len);
match = cmp ^ pred->not;
@@ -251,7 +252,18 @@ static int filter_pred_none(struct filter_pred *pred, void *event,
return 0;
}
-/* Basic regex callbacks */
+/*
+ * regex_match_foo - Basic regex callbacks
+ *
+ * @str: the string to be searched
+ * @r: the regex structure containing the pattern string
+ * @len: the length of the string to be searched (including '\0')
+ *
+ * Note:
+ * - @str might not be NULL-terminated if it's of type DYN_STRING
+ * or STATIC_STRING
+ */
+
static int regex_match_full(char *str, struct regex *r, int len)
{
if (strncmp(str, r->pattern, len) == 0)
@@ -261,23 +273,24 @@ static int regex_match_full(char *str, struct regex *r, int len)
static int regex_match_front(char *str, struct regex *r, int len)
{
- if (strncmp(str, r->pattern, len) == 0)
+ if (strncmp(str, r->pattern, r->len) == 0)
return 1;
return 0;
}
static int regex_match_middle(char *str, struct regex *r, int len)
{
- if (strstr(str, r->pattern))
+ if (strnstr(str, r->pattern, len))
return 1;
return 0;
}
static int regex_match_end(char *str, struct regex *r, int len)
{
- char *ptr = strstr(str, r->pattern);
+ int strlen = len - 1;
- if (ptr && (ptr[r->len] == 0))
+ if (strlen >= r->len &&
+ memcmp(str + strlen - r->len, r->pattern, r->len) == 0)
return 1;
return 0;
}
@@ -781,10 +794,8 @@ static int filter_add_pred(struct filter_parse_state *ps,
pred->regex.field_len = field->size;
} else if (field->filter_type == FILTER_DYN_STRING)
fn = filter_pred_strloc;
- else {
+ else
fn = filter_pred_pchar;
- pred->regex.field_len = strlen(pred->regex.pattern);
- }
} else {
if (field->is_signed)
ret = strict_strtoll(pred->regex.pattern, 0, &val);
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index c99029916c76..53f748b64ef3 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -689,7 +689,7 @@ static int create_trace_probe(int argc, char **argv)
return -EINVAL;
}
/* an address specified */
- ret = strict_strtoul(&argv[0][2], 0, (unsigned long *)&addr);
+ ret = strict_strtoul(&argv[1][0], 0, (unsigned long *)&addr);
if (ret) {
pr_info("Failed to parse address.\n");
return ret;
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 678a5120ee30..f4bc9b27de5f 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -157,6 +157,7 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
unsigned long val, flags;
char buf[64];
int ret;
+ int cpu;
if (count >= sizeof(buf))
return -EINVAL;
@@ -171,9 +172,20 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
return ret;
local_irq_save(flags);
+
+ /*
+ * In case we trace inside arch_spin_lock() or after (NMI),
+ * we will cause circular lock, so we also need to increase
+ * the percpu trace_active here.
+ */
+ cpu = smp_processor_id();
+ per_cpu(trace_active, cpu)++;
+
arch_spin_lock(&max_stack_lock);
*ptr = val;
arch_spin_unlock(&max_stack_lock);
+
+ per_cpu(trace_active, cpu)--;
local_irq_restore(flags);
return count;
@@ -206,7 +218,13 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
static void *t_start(struct seq_file *m, loff_t *pos)
{
+ int cpu;
+
local_irq_disable();
+
+ cpu = smp_processor_id();
+ per_cpu(trace_active, cpu)++;
+
arch_spin_lock(&max_stack_lock);
if (*pos == 0)
@@ -217,7 +235,13 @@ static void *t_start(struct seq_file *m, loff_t *pos)
static void t_stop(struct seq_file *m, void *p)
{
+ int cpu;
+
arch_spin_unlock(&max_stack_lock);
+
+ cpu = smp_processor_id();
+ per_cpu(trace_active, cpu)--;
+
local_irq_enable();
}