summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup_freezer.c9
-rw-r--r--kernel/cred.c6
-rw-r--r--kernel/early_res.c6
-rw-r--r--kernel/irq/chip.c35
-rw-r--r--kernel/irq/manage.c22
-rw-r--r--kernel/kgdb.c205
-rw-r--r--kernel/perf_event.c22
-rw-r--r--kernel/posix-cpu-timers.c10
-rw-r--r--kernel/power/process.c5
-rw-r--r--kernel/rcupdate.c23
-rw-r--r--kernel/resource.c44
-rw-r--r--kernel/sched.c14
-rw-r--r--kernel/sched_debug.c4
-rw-r--r--kernel/slow-work.c2
-rw-r--r--kernel/slow-work.h8
-rw-r--r--kernel/softlockup.c4
-rw-r--r--kernel/time/tick-oneshot.c52
-rw-r--r--kernel/time/timekeeping.c3
-rw-r--r--kernel/time/timer_list.c3
-rw-r--r--kernel/timer.c1
-rw-r--r--kernel/trace/ring_buffer.c22
-rw-r--r--kernel/trace/trace_clock.c4
-rw-r--r--kernel/trace/trace_event_perf.c11
23 files changed, 337 insertions, 178 deletions
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index 59e9ef6aab40..eb3f34d57419 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -47,17 +47,20 @@ static inline struct freezer *task_freezer(struct task_struct *task)
struct freezer, css);
}
-int cgroup_frozen(struct task_struct *task)
+int cgroup_freezing_or_frozen(struct task_struct *task)
{
struct freezer *freezer;
enum freezer_state state;
task_lock(task);
freezer = task_freezer(task);
- state = freezer->state;
+ if (!freezer->css.cgroup->parent)
+ state = CGROUP_THAWED; /* root cgroup can't be frozen */
+ else
+ state = freezer->state;
task_unlock(task);
- return state == CGROUP_FROZEN;
+ return (state == CGROUP_FREEZING) || (state == CGROUP_FROZEN);
}
/*
diff --git a/kernel/cred.c b/kernel/cred.c
index 1ed8ca18790c..1b1129d0cce8 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -364,7 +364,7 @@ struct cred *prepare_usermodehelper_creds(void)
new = kmem_cache_alloc(cred_jar, GFP_ATOMIC);
if (!new)
- return NULL;
+ goto free_tgcred;
kdebug("prepare_usermodehelper_creds() alloc %p", new);
@@ -397,6 +397,10 @@ struct cred *prepare_usermodehelper_creds(void)
error:
put_cred(new);
+free_tgcred:
+#ifdef CONFIG_KEYS
+ kfree(tgcred);
+#endif
return NULL;
}
diff --git a/kernel/early_res.c b/kernel/early_res.c
index 3cb2c661bb78..31aa9332ef3f 100644
--- a/kernel/early_res.c
+++ b/kernel/early_res.c
@@ -333,6 +333,12 @@ void __init free_early_partial(u64 start, u64 end)
struct early_res *r;
int i;
+ if (start == end)
+ return;
+
+ if (WARN_ONCE(start > end, " wrong range [%#llx, %#llx]\n", start, end))
+ return;
+
try_next:
i = find_overlapped_early(start, end);
if (i >= max_early_res)
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 42ec11b2af8a..b7091d5ca2f8 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -359,6 +359,23 @@ static inline void mask_ack_irq(struct irq_desc *desc, int irq)
if (desc->chip->ack)
desc->chip->ack(irq);
}
+ desc->status |= IRQ_MASKED;
+}
+
+static inline void mask_irq(struct irq_desc *desc, int irq)
+{
+ if (desc->chip->mask) {
+ desc->chip->mask(irq);
+ desc->status |= IRQ_MASKED;
+ }
+}
+
+static inline void unmask_irq(struct irq_desc *desc, int irq)
+{
+ if (desc->chip->unmask) {
+ desc->chip->unmask(irq);
+ desc->status &= ~IRQ_MASKED;
+ }
}
/*
@@ -484,10 +501,8 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
raw_spin_lock(&desc->lock);
desc->status &= ~IRQ_INPROGRESS;
- if (unlikely(desc->status & IRQ_ONESHOT))
- desc->status |= IRQ_MASKED;
- else if (!(desc->status & IRQ_DISABLED) && desc->chip->unmask)
- desc->chip->unmask(irq);
+ if (!(desc->status & (IRQ_DISABLED | IRQ_ONESHOT)))
+ unmask_irq(desc, irq);
out_unlock:
raw_spin_unlock(&desc->lock);
}
@@ -524,8 +539,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
action = desc->action;
if (unlikely(!action || (desc->status & IRQ_DISABLED))) {
desc->status |= IRQ_PENDING;
- if (desc->chip->mask)
- desc->chip->mask(irq);
+ mask_irq(desc, irq);
goto out;
}
@@ -593,7 +607,7 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
irqreturn_t action_ret;
if (unlikely(!action)) {
- desc->chip->mask(irq);
+ mask_irq(desc, irq);
goto out_unlock;
}
@@ -605,8 +619,7 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
if (unlikely((desc->status &
(IRQ_PENDING | IRQ_MASKED | IRQ_DISABLED)) ==
(IRQ_PENDING | IRQ_MASKED))) {
- desc->chip->unmask(irq);
- desc->status &= ~IRQ_MASKED;
+ unmask_irq(desc, irq);
}
desc->status &= ~IRQ_PENDING;
@@ -716,7 +729,7 @@ set_irq_chip_and_handler_name(unsigned int irq, struct irq_chip *chip,
__set_irq_handler(irq, handle, 0, name);
}
-void __init set_irq_noprobe(unsigned int irq)
+void set_irq_noprobe(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
unsigned long flags;
@@ -731,7 +744,7 @@ void __init set_irq_noprobe(unsigned int irq)
raw_spin_unlock_irqrestore(&desc->lock, flags);
}
-void __init set_irq_probe(unsigned int irq)
+void set_irq_probe(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
unsigned long flags;
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index eb6078ca60c7..398fda155f6e 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -382,6 +382,7 @@ int can_request_irq(unsigned int irq, unsigned long irqflags)
{
struct irq_desc *desc = irq_to_desc(irq);
struct irqaction *action;
+ unsigned long flags;
if (!desc)
return 0;
@@ -389,11 +390,14 @@ int can_request_irq(unsigned int irq, unsigned long irqflags)
if (desc->status & IRQ_NOREQUEST)
return 0;
+ raw_spin_lock_irqsave(&desc->lock, flags);
action = desc->action;
if (action)
if (irqflags & action->flags & IRQF_SHARED)
action = NULL;
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
+
return !action;
}
@@ -483,8 +487,26 @@ static int irq_wait_for_interrupt(struct irqaction *action)
*/
static void irq_finalize_oneshot(unsigned int irq, struct irq_desc *desc)
{
+again:
chip_bus_lock(irq, desc);
raw_spin_lock_irq(&desc->lock);
+
+ /*
+ * Implausible though it may be we need to protect us against
+ * the following scenario:
+ *
+ * The thread is faster done than the hard interrupt handler
+ * on the other CPU. If we unmask the irq line then the
+ * interrupt can come in again and masks the line, leaves due
+ * to IRQ_INPROGRESS and the irq line is masked forever.
+ */
+ if (unlikely(desc->status & IRQ_INPROGRESS)) {
+ raw_spin_unlock_irq(&desc->lock);
+ chip_bus_sync_unlock(irq, desc);
+ cpu_relax();
+ goto again;
+ }
+
if (!(desc->status & IRQ_DISABLED) && (desc->status & IRQ_MASKED)) {
desc->status &= ~IRQ_MASKED;
desc->chip->unmask(irq);
diff --git a/kernel/kgdb.c b/kernel/kgdb.c
index 761fdd2b3034..11f3515ca83f 100644
--- a/kernel/kgdb.c
+++ b/kernel/kgdb.c
@@ -69,9 +69,16 @@ struct kgdb_state {
struct pt_regs *linux_regs;
};
+/* Exception state values */
+#define DCPU_WANT_MASTER 0x1 /* Waiting to become a master kgdb cpu */
+#define DCPU_NEXT_MASTER 0x2 /* Transition from one master cpu to another */
+#define DCPU_IS_SLAVE 0x4 /* Slave cpu enter exception */
+#define DCPU_SSTEP 0x8 /* CPU is single stepping */
+
static struct debuggerinfo_struct {
void *debuggerinfo;
struct task_struct *task;
+ int exception_state;
} kgdb_info[NR_CPUS];
/**
@@ -391,27 +398,22 @@ int kgdb_mem2hex(char *mem, char *buf, int count)
/*
* Copy the binary array pointed to by buf into mem. Fix $, #, and
- * 0x7d escaped with 0x7d. Return a pointer to the character after
- * the last byte written.
+ * 0x7d escaped with 0x7d. Return -EFAULT on failure or 0 on success.
+ * The input buf is overwitten with the result to write to mem.
*/
static int kgdb_ebin2mem(char *buf, char *mem, int count)
{
- int err = 0;
- char c;
+ int size = 0;
+ char *c = buf;
while (count-- > 0) {
- c = *buf++;
- if (c == 0x7d)
- c = *buf++ ^ 0x20;
-
- err = probe_kernel_write(mem, &c, 1);
- if (err)
- break;
-
- mem++;
+ c[size] = *buf++;
+ if (c[size] == 0x7d)
+ c[size] = *buf++ ^ 0x20;
+ size++;
}
- return err;
+ return probe_kernel_write(mem, c, size);
}
/*
@@ -563,49 +565,6 @@ static struct task_struct *getthread(struct pt_regs *regs, int tid)
}
/*
- * CPU debug state control:
- */
-
-#ifdef CONFIG_SMP
-static void kgdb_wait(struct pt_regs *regs)
-{
- unsigned long flags;
- int cpu;
-
- local_irq_save(flags);
- cpu = raw_smp_processor_id();
- kgdb_info[cpu].debuggerinfo = regs;
- kgdb_info[cpu].task = current;
- /*
- * Make sure the above info reaches the primary CPU before
- * our cpu_in_kgdb[] flag setting does:
- */
- smp_wmb();
- atomic_set(&cpu_in_kgdb[cpu], 1);
-
- /* Disable any cpu specific hw breakpoints */
- kgdb_disable_hw_debug(regs);
-
- /* Wait till primary CPU is done with debugging */
- while (atomic_read(&passive_cpu_wait[cpu]))
- cpu_relax();
-
- kgdb_info[cpu].debuggerinfo = NULL;
- kgdb_info[cpu].task = NULL;
-
- /* fix up hardware debug registers on local cpu */
- if (arch_kgdb_ops.correct_hw_break)
- arch_kgdb_ops.correct_hw_break();
-
- /* Signal the primary CPU that we are done: */
- atomic_set(&cpu_in_kgdb[cpu], 0);
- touch_softlockup_watchdog_sync();
- clocksource_touch_watchdog();
- local_irq_restore(flags);
-}
-#endif
-
-/*
* Some architectures need cache flushes when we set/clear a
* breakpoint:
*/
@@ -1400,34 +1359,13 @@ static int kgdb_reenter_check(struct kgdb_state *ks)
return 1;
}
-/*
- * kgdb_handle_exception() - main entry point from a kernel exception
- *
- * Locking hierarchy:
- * interface locks, if any (begin_session)
- * kgdb lock (kgdb_active)
- */
-int
-kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs)
+static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs)
{
- struct kgdb_state kgdb_var;
- struct kgdb_state *ks = &kgdb_var;
unsigned long flags;
int sstep_tries = 100;
int error = 0;
int i, cpu;
-
- ks->cpu = raw_smp_processor_id();
- ks->ex_vector = evector;
- ks->signo = signo;
- ks->ex_vector = evector;
- ks->err_code = ecode;
- ks->kgdb_usethreadid = 0;
- ks->linux_regs = regs;
-
- if (kgdb_reenter_check(ks))
- return 0; /* Ouch, double exception ! */
-
+ int trace_on = 0;
acquirelock:
/*
* Interrupts will be restored by the 'trap return' code, except when
@@ -1435,13 +1373,43 @@ acquirelock:
*/
local_irq_save(flags);
- cpu = raw_smp_processor_id();
+ cpu = ks->cpu;
+ kgdb_info[cpu].debuggerinfo = regs;
+ kgdb_info[cpu].task = current;
+ /*
+ * Make sure the above info reaches the primary CPU before
+ * our cpu_in_kgdb[] flag setting does:
+ */
+ atomic_inc(&cpu_in_kgdb[cpu]);
/*
- * Acquire the kgdb_active lock:
+ * CPU will loop if it is a slave or request to become a kgdb
+ * master cpu and acquire the kgdb_active lock:
*/
- while (atomic_cmpxchg(&kgdb_active, -1, cpu) != -1)
+ while (1) {
+ if (kgdb_info[cpu].exception_state & DCPU_WANT_MASTER) {
+ if (atomic_cmpxchg(&kgdb_active, -1, cpu) == cpu)
+ break;
+ } else if (kgdb_info[cpu].exception_state & DCPU_IS_SLAVE) {
+ if (!atomic_read(&passive_cpu_wait[cpu]))
+ goto return_normal;
+ } else {
+return_normal:
+ /* Return to normal operation by executing any
+ * hw breakpoint fixup.
+ */
+ if (arch_kgdb_ops.correct_hw_break)
+ arch_kgdb_ops.correct_hw_break();
+ if (trace_on)
+ tracing_on();
+ atomic_dec(&cpu_in_kgdb[cpu]);
+ touch_softlockup_watchdog_sync();
+ clocksource_touch_watchdog();
+ local_irq_restore(flags);
+ return 0;
+ }
cpu_relax();
+ }
/*
* For single stepping, try to only enter on the processor
@@ -1475,9 +1443,6 @@ acquirelock:
if (kgdb_io_ops->pre_exception)
kgdb_io_ops->pre_exception();
- kgdb_info[ks->cpu].debuggerinfo = ks->linux_regs;
- kgdb_info[ks->cpu].task = current;
-
kgdb_disable_hw_debug(ks->linux_regs);
/*
@@ -1486,15 +1451,9 @@ acquirelock:
*/
if (!kgdb_single_step) {
for (i = 0; i < NR_CPUS; i++)
- atomic_set(&passive_cpu_wait[i], 1);
+ atomic_inc(&passive_cpu_wait[i]);
}
- /*
- * spin_lock code is good enough as a barrier so we don't
- * need one here:
- */
- atomic_set(&cpu_in_kgdb[ks->cpu], 1);
-
#ifdef CONFIG_SMP
/* Signal the other CPUs to enter kgdb_wait() */
if ((!kgdb_single_step) && kgdb_do_roundup)
@@ -1518,6 +1477,9 @@ acquirelock:
kgdb_single_step = 0;
kgdb_contthread = current;
exception_level = 0;
+ trace_on = tracing_is_on();
+ if (trace_on)
+ tracing_off();
/* Talk to debugger with gdbserial protocol */
error = gdb_serial_stub(ks);
@@ -1526,13 +1488,11 @@ acquirelock:
if (kgdb_io_ops->post_exception)
kgdb_io_ops->post_exception();
- kgdb_info[ks->cpu].debuggerinfo = NULL;
- kgdb_info[ks->cpu].task = NULL;
- atomic_set(&cpu_in_kgdb[ks->cpu], 0);
+ atomic_dec(&cpu_in_kgdb[ks->cpu]);
if (!kgdb_single_step) {
for (i = NR_CPUS-1; i >= 0; i--)
- atomic_set(&passive_cpu_wait[i], 0);
+ atomic_dec(&passive_cpu_wait[i]);
/*
* Wait till all the CPUs have quit
* from the debugger.
@@ -1551,6 +1511,8 @@ kgdb_restore:
else
kgdb_sstep_pid = 0;
}
+ if (trace_on)
+ tracing_on();
/* Free kgdb_active */
atomic_set(&kgdb_active, -1);
touch_softlockup_watchdog_sync();
@@ -1560,13 +1522,52 @@ kgdb_restore:
return error;
}
+/*
+ * kgdb_handle_exception() - main entry point from a kernel exception
+ *
+ * Locking hierarchy:
+ * interface locks, if any (begin_session)
+ * kgdb lock (kgdb_active)
+ */
+int
+kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs)
+{
+ struct kgdb_state kgdb_var;
+ struct kgdb_state *ks = &kgdb_var;
+ int ret;
+
+ ks->cpu = raw_smp_processor_id();
+ ks->ex_vector = evector;
+ ks->signo = signo;
+ ks->ex_vector = evector;
+ ks->err_code = ecode;
+ ks->kgdb_usethreadid = 0;
+ ks->linux_regs = regs;
+
+ if (kgdb_reenter_check(ks))
+ return 0; /* Ouch, double exception ! */
+ kgdb_info[ks->cpu].exception_state |= DCPU_WANT_MASTER;
+ ret = kgdb_cpu_enter(ks, regs);
+ kgdb_info[ks->cpu].exception_state &= ~DCPU_WANT_MASTER;
+ return ret;
+}
+
int kgdb_nmicallback(int cpu, void *regs)
{
#ifdef CONFIG_SMP
+ struct kgdb_state kgdb_var;
+ struct kgdb_state *ks = &kgdb_var;
+
+ memset(ks, 0, sizeof(struct kgdb_state));
+ ks->cpu = cpu;
+ ks->linux_regs = regs;
+
if (!atomic_read(&cpu_in_kgdb[cpu]) &&
- atomic_read(&kgdb_active) != cpu &&
- atomic_read(&cpu_in_kgdb[atomic_read(&kgdb_active)])) {
- kgdb_wait((struct pt_regs *)regs);
+ atomic_read(&kgdb_active) != -1 &&
+ atomic_read(&kgdb_active) != cpu) {
+ kgdb_info[cpu].exception_state |= DCPU_IS_SLAVE;
+ kgdb_cpu_enter(ks, regs);
+ kgdb_info[cpu].exception_state &= ~DCPU_IS_SLAVE;
return 0;
}
#endif
@@ -1742,11 +1743,11 @@ EXPORT_SYMBOL_GPL(kgdb_unregister_io_module);
*/
void kgdb_breakpoint(void)
{
- atomic_set(&kgdb_setting_breakpoint, 1);
+ atomic_inc(&kgdb_setting_breakpoint);
wmb(); /* Sync point before breakpoint */
arch_kgdb_breakpoint();
wmb(); /* Sync point after breakpoint */
- atomic_set(&kgdb_setting_breakpoint, 0);
+ atomic_dec(&kgdb_setting_breakpoint);
}
EXPORT_SYMBOL_GPL(kgdb_breakpoint);
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 574ee58a3046..681af806d76b 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1164,11 +1164,9 @@ void perf_event_task_sched_out(struct task_struct *task,
struct perf_event_context *ctx = task->perf_event_ctxp;
struct perf_event_context *next_ctx;
struct perf_event_context *parent;
- struct pt_regs *regs;
int do_switch = 1;
- regs = task_pt_regs(task);
- perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, regs, 0);
+ perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0);
if (likely(!ctx || !cpuctx->task_ctx))
return;
@@ -2786,12 +2784,11 @@ __weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
return NULL;
}
-#ifdef CONFIG_EVENT_TRACING
__weak
void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip)
{
}
-#endif
+
/*
* Output
@@ -3378,15 +3375,23 @@ static void perf_event_task_output(struct perf_event *event,
struct perf_task_event *task_event)
{
struct perf_output_handle handle;
- int size;
struct task_struct *task = task_event->task;
- int ret;
+ unsigned long flags;
+ int size, ret;
+
+ /*
+ * If this CPU attempts to acquire an rq lock held by a CPU spinning
+ * in perf_output_lock() from interrupt context, it's game over.
+ */
+ local_irq_save(flags);
size = task_event->event_id.header.size;
ret = perf_output_begin(&handle, event, size, 0, 0);
- if (ret)
+ if (ret) {
+ local_irq_restore(flags);
return;
+ }
task_event->event_id.pid = perf_event_pid(event, task);
task_event->event_id.ppid = perf_event_pid(event, current);
@@ -3397,6 +3402,7 @@ static void perf_event_task_output(struct perf_event *event,
perf_output_put(&handle, task_event->event_id);
perf_output_end(&handle);
+ local_irq_restore(flags);
}
static int perf_event_task_match(struct perf_event *event)
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 1a22dfd42df9..bc7704b3a443 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -1061,9 +1061,9 @@ static void check_thread_timers(struct task_struct *tsk,
}
}
-static void stop_process_timers(struct task_struct *tsk)
+static void stop_process_timers(struct signal_struct *sig)
{
- struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
+ struct thread_group_cputimer *cputimer = &sig->cputimer;
unsigned long flags;
if (!cputimer->running)
@@ -1072,6 +1072,10 @@ static void stop_process_timers(struct task_struct *tsk)
spin_lock_irqsave(&cputimer->lock, flags);
cputimer->running = 0;
spin_unlock_irqrestore(&cputimer->lock, flags);
+
+ sig->cputime_expires.prof_exp = cputime_zero;
+ sig->cputime_expires.virt_exp = cputime_zero;
+ sig->cputime_expires.sched_exp = 0;
}
static u32 onecputick;
@@ -1133,7 +1137,7 @@ static void check_process_timers(struct task_struct *tsk,
list_empty(&timers[CPUCLOCK_VIRT]) &&
cputime_eq(sig->it[CPUCLOCK_VIRT].expires, cputime_zero) &&
list_empty(&timers[CPUCLOCK_SCHED])) {
- stop_process_timers(tsk);
+ stop_process_timers(sig);
return;
}
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 5ade1bdcf366..71ae29052ab6 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -88,12 +88,11 @@ static int try_to_freeze_tasks(bool sig_only)
printk(KERN_ERR "Freezing of tasks failed after %d.%02d seconds "
"(%d tasks refusing to freeze):\n",
elapsed_csecs / 100, elapsed_csecs % 100, todo);
- show_state();
read_lock(&tasklist_lock);
do_each_thread(g, p) {
task_lock(p);
if (freezing(p) && !freezer_should_skip(p))
- printk(KERN_ERR " %s\n", p->comm);
+ sched_show_task(p);
cancel_freezing(p);
task_unlock(p);
} while_each_thread(g, p);
@@ -145,7 +144,7 @@ static void thaw_tasks(bool nosig_only)
if (nosig_only && should_send_signal(p))
continue;
- if (cgroup_frozen(p))
+ if (cgroup_freezing_or_frozen(p))
continue;
thaw_process(p);
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index f1125c1a6321..63fe25433980 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -45,6 +45,7 @@
#include <linux/mutex.h>
#include <linux/module.h>
#include <linux/kernel_stat.h>
+#include <linux/hardirq.h>
#ifdef CONFIG_DEBUG_LOCK_ALLOC
static struct lock_class_key rcu_lock_key;
@@ -66,6 +67,28 @@ EXPORT_SYMBOL_GPL(rcu_sched_lock_map);
int rcu_scheduler_active __read_mostly;
EXPORT_SYMBOL_GPL(rcu_scheduler_active);
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+
+/**
+ * rcu_read_lock_bh_held - might we be in RCU-bh read-side critical section?
+ *
+ * Check for bottom half being disabled, which covers both the
+ * CONFIG_PROVE_RCU and not cases. Note that if someone uses
+ * rcu_read_lock_bh(), but then later enables BH, lockdep (if enabled)
+ * will show the situation.
+ *
+ * Check debug_lockdep_rcu_enabled() to prevent false positives during boot.
+ */
+int rcu_read_lock_bh_held(void)
+{
+ if (!debug_lockdep_rcu_enabled())
+ return 1;
+ return in_softirq();
+}
+EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
+
+#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
/*
* This function is invoked towards the end of the scheduler's initialization
* process. Before this is called, the idle task might contain
diff --git a/kernel/resource.c b/kernel/resource.c
index 2d5be5d9bf5f..9c358e263534 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -219,19 +219,34 @@ void release_child_resources(struct resource *r)
}
/**
- * request_resource - request and reserve an I/O or memory resource
+ * request_resource_conflict - request and reserve an I/O or memory resource
* @root: root resource descriptor
* @new: resource descriptor desired by caller
*
- * Returns 0 for success, negative error code on error.
+ * Returns 0 for success, conflict resource on error.
*/
-int request_resource(struct resource *root, struct resource *new)
+struct resource *request_resource_conflict(struct resource *root, struct resource *new)
{
struct resource *conflict;
write_lock(&resource_lock);
conflict = __request_resource(root, new);
write_unlock(&resource_lock);
+ return conflict;
+}
+
+/**
+ * request_resource - request and reserve an I/O or memory resource
+ * @root: root resource descriptor
+ * @new: resource descriptor desired by caller
+ *
+ * Returns 0 for success, negative error code on error.
+ */
+int request_resource(struct resource *root, struct resource *new)
+{
+ struct resource *conflict;
+
+ conflict = request_resource_conflict(root, new);
return conflict ? -EBUSY : 0;
}
@@ -474,25 +489,40 @@ static struct resource * __insert_resource(struct resource *parent, struct resou
}
/**
- * insert_resource - Inserts a resource in the resource tree
+ * insert_resource_conflict - Inserts resource in the resource tree
* @parent: parent of the new resource
* @new: new resource to insert
*
- * Returns 0 on success, -EBUSY if the resource can't be inserted.
+ * Returns 0 on success, conflict resource if the resource can't be inserted.
*
- * This function is equivalent to request_resource when no conflict
+ * This function is equivalent to request_resource_conflict when no conflict
* happens. If a conflict happens, and the conflicting resources
* entirely fit within the range of the new resource, then the new
* resource is inserted and the conflicting resources become children of
* the new resource.
*/
-int insert_resource(struct resource *parent, struct resource *new)
+struct resource *insert_resource_conflict(struct resource *parent, struct resource *new)
{
struct resource *conflict;
write_lock(&resource_lock);
conflict = __insert_resource(parent, new);
write_unlock(&resource_lock);
+ return conflict;
+}
+
+/**
+ * insert_resource - Inserts a resource in the resource tree
+ * @parent: parent of the new resource
+ * @new: new resource to insert
+ *
+ * Returns 0 on success, -EBUSY if the resource can't be inserted.
+ */
+int insert_resource(struct resource *parent, struct resource *new)
+{
+ struct resource *conflict;
+
+ conflict = insert_resource_conflict(parent, new);
return conflict ? -EBUSY : 0;
}
diff --git a/kernel/sched.c b/kernel/sched.c
index 9ab3cd7858d3..528a10592c16 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2650,7 +2650,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
{
unsigned long flags;
struct rq *rq;
- int cpu = get_cpu();
+ int cpu __maybe_unused = get_cpu();
#ifdef CONFIG_SMP
/*
@@ -4902,7 +4902,9 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
int ret;
cpumask_var_t mask;
- if (len < cpumask_size())
+ if (len < nr_cpu_ids)
+ return -EINVAL;
+ if (len & (sizeof(unsigned long)-1))
return -EINVAL;
if (!alloc_cpumask_var(&mask, GFP_KERNEL))
@@ -4910,10 +4912,12 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
ret = sched_getaffinity(pid, mask);
if (ret == 0) {
- if (copy_to_user(user_mask_ptr, mask, cpumask_size()))
+ size_t retlen = min_t(size_t, len, cpumask_size());
+
+ if (copy_to_user(user_mask_ptr, mask, retlen))
ret = -EFAULT;
else
- ret = cpumask_size();
+ ret = retlen;
}
free_cpumask_var(mask);
@@ -5383,7 +5387,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
get_task_struct(mt);
task_rq_unlock(rq, &flags);
- wake_up_process(rq->migration_thread);
+ wake_up_process(mt);
put_task_struct(mt);
wait_for_completion(&req.done);
tlb_migrate_finish(p->mm);
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 67f95aada4b9..9b49db144037 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -518,8 +518,4 @@ void proc_sched_set_task(struct task_struct *p)
p->se.nr_wakeups_idle = 0;
p->sched_info.bkl_count = 0;
#endif
- p->se.sum_exec_runtime = 0;
- p->se.prev_sum_exec_runtime = 0;
- p->nvcsw = 0;
- p->nivcsw = 0;
}
diff --git a/kernel/slow-work.c b/kernel/slow-work.c
index 7494bbf5a270..7d3f4fa9ef4f 100644
--- a/kernel/slow-work.c
+++ b/kernel/slow-work.c
@@ -637,7 +637,7 @@ int delayed_slow_work_enqueue(struct delayed_slow_work *dwork,
goto cancelled;
/* the timer holds a reference whilst it is pending */
- ret = work->ops->get_ref(work);
+ ret = slow_work_get_ref(work);
if (ret < 0)
goto cant_get_ref;
diff --git a/kernel/slow-work.h b/kernel/slow-work.h
index 321f3c59d732..a29ebd1ef41d 100644
--- a/kernel/slow-work.h
+++ b/kernel/slow-work.h
@@ -43,28 +43,28 @@ extern void slow_work_new_thread_desc(struct slow_work *, struct seq_file *);
*/
static inline void slow_work_set_thread_pid(int id, pid_t pid)
{
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
slow_work_pids[id] = pid;
#endif
}
static inline void slow_work_mark_time(struct slow_work *work)
{
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
work->mark = CURRENT_TIME;
#endif
}
static inline void slow_work_begin_exec(int id, struct slow_work *work)
{
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
slow_work_execs[id] = work;
#endif
}
static inline void slow_work_end_exec(int id, struct slow_work *work)
{
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
write_lock(&slow_work_execs_lock);
slow_work_execs[id] = NULL;
write_unlock(&slow_work_execs_lock);
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index 0d4c7898ab80..4b493f67dcb5 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -155,11 +155,11 @@ void softlockup_tick(void)
* Wake up the high-prio watchdog task twice per
* threshold timespan.
*/
- if (now > touch_ts + softlockup_thresh/2)
+ if (time_after(now - softlockup_thresh/2, touch_ts))
wake_up_process(per_cpu(softlockup_watchdog, this_cpu));
/* Warn about unreasonable delays: */
- if (now <= (touch_ts + softlockup_thresh))
+ if (time_before_eq(now - softlockup_thresh, touch_ts))
return;
per_cpu(softlockup_print_ts, this_cpu) = touch_ts;
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c
index 0a8a213016f0..aada0e52680a 100644
--- a/kernel/time/tick-oneshot.c
+++ b/kernel/time/tick-oneshot.c
@@ -22,6 +22,29 @@
#include "tick-internal.h"
+/* Limit min_delta to a jiffie */
+#define MIN_DELTA_LIMIT (NSEC_PER_SEC / HZ)
+
+static int tick_increase_min_delta(struct clock_event_device *dev)
+{
+ /* Nothing to do if we already reached the limit */
+ if (dev->min_delta_ns >= MIN_DELTA_LIMIT)
+ return -ETIME;
+
+ if (dev->min_delta_ns < 5000)
+ dev->min_delta_ns = 5000;
+ else
+ dev->min_delta_ns += dev->min_delta_ns >> 1;
+
+ if (dev->min_delta_ns > MIN_DELTA_LIMIT)
+ dev->min_delta_ns = MIN_DELTA_LIMIT;
+
+ printk(KERN_WARNING "CE: %s increased min_delta_ns to %llu nsec\n",
+ dev->name ? dev->name : "?",
+ (unsigned long long) dev->min_delta_ns);
+ return 0;
+}
+
/**
* tick_program_event internal worker function
*/
@@ -37,23 +60,28 @@ int tick_dev_program_event(struct clock_event_device *dev, ktime_t expires,
if (!ret || !force)
return ret;
+ dev->retries++;
/*
- * We tried 2 times to program the device with the given
- * min_delta_ns. If that's not working then we double it
+ * We tried 3 times to program the device with the given
+ * min_delta_ns. If that's not working then we increase it
* and emit a warning.
*/
if (++i > 2) {
/* Increase the min. delta and try again */
- if (!dev->min_delta_ns)
- dev->min_delta_ns = 5000;
- else
- dev->min_delta_ns += dev->min_delta_ns >> 1;
-
- printk(KERN_WARNING
- "CE: %s increasing min_delta_ns to %llu nsec\n",
- dev->name ? dev->name : "?",
- (unsigned long long) dev->min_delta_ns << 1);
-
+ if (tick_increase_min_delta(dev)) {
+ /*
+ * Get out of the loop if min_delta_ns
+ * hit the limit already. That's
+ * better than staying here forever.
+ *
+ * We clear next_event so we have a
+ * chance that the box survives.
+ */
+ printk(KERN_WARNING
+ "CE: Reprogramming failure. Giving up\n");
+ dev->next_event.tv64 = KTIME_MAX;
+ return -ETIME;
+ }
i = 0;
}
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 16736379a9ca..39f6177fafac 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -818,7 +818,8 @@ void update_wall_time(void)
shift = min(shift, maxshift);
while (offset >= timekeeper.cycle_interval) {
offset = logarithmic_accumulation(offset, shift);
- shift--;
+ if(offset < timekeeper.cycle_interval<<shift)
+ shift--;
}
/* correct the clock when NTP error is too big */
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index bdfb8dd1050c..1a4a7dd78777 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -228,6 +228,7 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
SEQ_printf(m, " event_handler: ");
print_name_offset(m, dev->event_handler);
SEQ_printf(m, "\n");
+ SEQ_printf(m, " retries: %lu\n", dev->retries);
}
static void timer_list_show_tickdevices(struct seq_file *m)
@@ -257,7 +258,7 @@ static int timer_list_show(struct seq_file *m, void *v)
u64 now = ktime_to_ns(ktime_get());
int cpu;
- SEQ_printf(m, "Timer List Version: v0.5\n");
+ SEQ_printf(m, "Timer List Version: v0.6\n");
SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES);
SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now);
diff --git a/kernel/timer.c b/kernel/timer.c
index c61a7949387f..fc965eae0e87 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -880,6 +880,7 @@ int try_to_del_timer_sync(struct timer_list *timer)
if (base->running_timer == timer)
goto out;
+ timer_stats_timer_clear_start_info(timer);
ret = 0;
if (timer_pending(timer)) {
detach_timer(timer, 1);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 05a9f83b8819..9a0f9bf6a37b 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -207,6 +207,14 @@ EXPORT_SYMBOL_GPL(tracing_is_on);
#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
#define RB_EVNT_MIN_SIZE 8U /* two 32bit words */
+#if !defined(CONFIG_64BIT) || defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+# define RB_FORCE_8BYTE_ALIGNMENT 0
+# define RB_ARCH_ALIGNMENT RB_ALIGNMENT
+#else
+# define RB_FORCE_8BYTE_ALIGNMENT 1
+# define RB_ARCH_ALIGNMENT 8U
+#endif
+
/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */
#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
@@ -1201,18 +1209,19 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
for (i = 0; i < nr_pages; i++) {
if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)))
- return;
+ goto out;
p = cpu_buffer->pages->next;
bpage = list_entry(p, struct buffer_page, list);
list_del_init(&bpage->list);
free_buffer_page(bpage);
}
if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)))
- return;
+ goto out;
rb_reset_cpu(cpu_buffer);
rb_check_pages(cpu_buffer);
+out:
spin_unlock_irq(&cpu_buffer->reader_lock);
}
@@ -1229,7 +1238,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
for (i = 0; i < nr_pages; i++) {
if (RB_WARN_ON(cpu_buffer, list_empty(pages)))
- return;
+ goto out;
p = pages->next;
bpage = list_entry(p, struct buffer_page, list);
list_del_init(&bpage->list);
@@ -1238,6 +1247,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
rb_reset_cpu(cpu_buffer);
rb_check_pages(cpu_buffer);
+out:
spin_unlock_irq(&cpu_buffer->reader_lock);
}
@@ -1547,7 +1557,7 @@ rb_update_event(struct ring_buffer_event *event,
case 0:
length -= RB_EVNT_HDR_SIZE;
- if (length > RB_MAX_SMALL_DATA)
+ if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
event->array[0] = length;
else
event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
@@ -1722,11 +1732,11 @@ static unsigned rb_calculate_event_length(unsigned length)
if (!length)
length = 1;
- if (length > RB_MAX_SMALL_DATA)
+ if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
length += sizeof(event.array[0]);
length += RB_EVNT_HDR_SIZE;
- length = ALIGN(length, RB_ALIGNMENT);
+ length = ALIGN(length, RB_ARCH_ALIGNMENT);
return length;
}
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 6fbfb8f417b9..9d589d8dcd1a 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -84,7 +84,7 @@ u64 notrace trace_clock_global(void)
int this_cpu;
u64 now;
- raw_local_irq_save(flags);
+ local_irq_save(flags);
this_cpu = raw_smp_processor_id();
now = cpu_clock(this_cpu);
@@ -110,7 +110,7 @@ u64 notrace trace_clock_global(void)
arch_spin_unlock(&trace_clock_struct.lock);
out:
- raw_local_irq_restore(flags);
+ local_irq_restore(flags);
return now;
}
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 81f691eb3a30..0565bb42566f 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -17,7 +17,12 @@ EXPORT_SYMBOL_GPL(perf_arch_fetch_caller_regs);
static char *perf_trace_buf;
static char *perf_trace_buf_nmi;
-typedef typeof(char [PERF_MAX_TRACE_SIZE]) perf_trace_t ;
+/*
+ * Force it to be aligned to unsigned long to avoid misaligned accesses
+ * suprises
+ */
+typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)])
+ perf_trace_t;
/* Count the events in use (per event id, not per instance) */
static int total_ref_count;
@@ -130,6 +135,8 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
char *trace_buf, *raw_data;
int pc, cpu;
+ BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long));
+
pc = preempt_count();
/* Protect the per cpu buffer, begin the rcu read side */
@@ -152,7 +159,7 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
raw_data = per_cpu_ptr(trace_buf, cpu);
/* zero the dead bytes from align to not leak stack to user */
- *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
+ memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64));
entry = (struct trace_entry *)raw_data;
tracing_generic_entry_update(entry, *irq_flags, pc);