summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile5
-rw-r--r--kernel/exit.c16
-rw-r--r--kernel/fork.c9
-rw-r--r--kernel/freezer.c9
-rw-r--r--kernel/hrtimer.c2
-rw-r--r--kernel/irq/manage.c2
-rw-r--r--kernel/irq_work.c18
-rw-r--r--kernel/kprobes.c8
-rw-r--r--kernel/kthread.c2
-rw-r--r--kernel/latencytop.c23
-rw-r--r--kernel/perf_event.c82
-rw-r--r--kernel/power/Makefile5
-rw-r--r--kernel/power/hibernate.c7
-rw-r--r--kernel/power/process.c8
-rw-r--r--kernel/power/suspend.c2
-rw-r--r--kernel/printk.c44
-rw-r--r--kernel/rcutree.c4
-rw-r--r--kernel/sched.c45
-rw-r--r--kernel/sched_autogroup.c8
-rw-r--r--kernel/smp.c19
-rw-r--r--kernel/softirq.c63
-rw-r--r--kernel/sys.c6
-rw-r--r--kernel/sysctl.c31
-rw-r--r--kernel/taskstats.c7
-rw-r--r--kernel/time/ntp.c425
-rw-r--r--kernel/time/tick-common.c2
-rw-r--r--kernel/time/tick-oneshot.c4
-rw-r--r--kernel/time/timekeeping.c43
-rw-r--r--kernel/trace/Makefile2
-rw-r--r--kernel/trace/trace.c6
-rw-r--r--kernel/trace/trace_selftest.c2
-rw-r--r--kernel/user_namespace.c15
-rw-r--r--kernel/watchdog.c36
-rw-r--r--kernel/workqueue.c60
34 files changed, 767 insertions, 253 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 0b5ff083fa22..353d3fe8ba33 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -43,7 +43,7 @@ obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
-obj-$(CONFIG_USE_GENERIC_SMP_HELPERS) += smp.o
+obj-$(CONFIG_SMP) += smp.o
ifneq ($(CONFIG_SMP),y)
obj-y += up.o
endif
@@ -100,6 +100,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += trace/
obj-$(CONFIG_TRACING) += trace/
obj-$(CONFIG_X86_DS) += trace/
obj-$(CONFIG_RING_BUFFER) += trace/
+obj-$(CONFIG_TRACEPOINTS) += trace/
obj-$(CONFIG_SMP) += sched_cpupri.o
obj-$(CONFIG_IRQ_WORK) += irq_work.o
obj-$(CONFIG_PERF_EVENTS) += perf_event.o
@@ -121,7 +122,7 @@ $(obj)/configs.o: $(obj)/config_data.h
# config_data.h contains the same information as ikconfig.h but gzipped.
# Info from config_data can be extracted from /proc/config*
targets += config_data.gz
-$(obj)/config_data.gz: .config FORCE
+$(obj)/config_data.gz: $(KCONFIG_CONFIG) FORCE
$(call if_changed,gzip)
quiet_cmd_ikconfiggz = IKCFG $@
diff --git a/kernel/exit.c b/kernel/exit.c
index 676149a4ac5f..f9a45ebcc7b1 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -69,7 +69,7 @@ static void __unhash_process(struct task_struct *p, bool group_dead)
list_del_rcu(&p->tasks);
list_del_init(&p->sibling);
- __get_cpu_var(process_counts)--;
+ __this_cpu_dec(process_counts);
}
list_del_rcu(&p->thread_group);
}
@@ -994,6 +994,15 @@ NORET_TYPE void do_exit(long code)
exit_fs(tsk);
check_stack_usage();
exit_thread();
+
+ /*
+ * Flush inherited counters to the parent - before the parent
+ * gets woken up by child-exit notifications.
+ *
+ * because of cgroup mode, must be called before cgroup_exit()
+ */
+ perf_event_exit_task(tsk);
+
cgroup_exit(tsk, 1);
if (group_dead)
@@ -1007,11 +1016,6 @@ NORET_TYPE void do_exit(long code)
* FIXME: do that only when needed, using sched_exit tracepoint
*/
flush_ptrace_hw_breakpoint(tsk);
- /*
- * Flush inherited counters to the parent - before the parent
- * gets woken up by child-exit notifications.
- */
- perf_event_exit_task(tsk);
exit_notify(tsk, group_dead);
#ifdef CONFIG_NUMA
diff --git a/kernel/fork.c b/kernel/fork.c
index 7d164e25b0f0..d9b44f20b6b0 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -169,15 +169,14 @@ EXPORT_SYMBOL(free_task);
static inline void free_signal_struct(struct signal_struct *sig)
{
taskstats_tgid_free(sig);
+ sched_autogroup_exit(sig);
kmem_cache_free(signal_cachep, sig);
}
static inline void put_signal_struct(struct signal_struct *sig)
{
- if (atomic_dec_and_test(&sig->sigcnt)) {
- sched_autogroup_exit(sig);
+ if (atomic_dec_and_test(&sig->sigcnt))
free_signal_struct(sig);
- }
}
void __put_task_struct(struct task_struct *tsk)
@@ -1286,7 +1285,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
attach_pid(p, PIDTYPE_SID, task_session(current));
list_add_tail(&p->sibling, &p->real_parent->children);
list_add_tail_rcu(&p->tasks, &init_task.tasks);
- __get_cpu_var(process_counts)++;
+ __this_cpu_inc(process_counts);
}
attach_pid(p, PIDTYPE_PID, pid);
nr_threads++;
@@ -1318,7 +1317,7 @@ bad_fork_cleanup_mm:
}
bad_fork_cleanup_signal:
if (!(clone_flags & CLONE_THREAD))
- put_signal_struct(p->signal);
+ free_signal_struct(p->signal);
bad_fork_cleanup_sighand:
__cleanup_sighand(p->sighand);
bad_fork_cleanup_fs:
diff --git a/kernel/freezer.c b/kernel/freezer.c
index bd1d42b17cb2..66ecd2ead215 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -104,8 +104,13 @@ bool freeze_task(struct task_struct *p, bool sig_only)
}
if (should_send_signal(p)) {
- if (!signal_pending(p))
- fake_signal_wake_up(p);
+ fake_signal_wake_up(p);
+ /*
+ * fake_signal_wake_up() goes through p's scheduler
+ * lock and guarantees that TASK_STOPPED/TRACED ->
+ * TASK_RUNNING transition can't race with task state
+ * testing in try_to_freeze_tasks().
+ */
} else if (sig_only) {
return false;
} else {
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index f2429fc3438c..45da2b6920ab 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -497,7 +497,7 @@ static inline int hrtimer_is_hres_enabled(void)
*/
static inline int hrtimer_hres_active(void)
{
- return __get_cpu_var(hrtimer_bases).hres_active;
+ return __this_cpu_read(hrtimer_bases.hres_active);
}
/*
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 91a5fa25054e..0caa59f747dd 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -577,7 +577,7 @@ irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) { }
*/
static int irq_thread(void *data)
{
- static struct sched_param param = {
+ static const struct sched_param param = {
.sched_priority = MAX_USER_RT_PRIO/2,
};
struct irqaction *action = data;
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index 90f881904bb1..c58fa7da8aef 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -77,21 +77,21 @@ void __weak arch_irq_work_raise(void)
*/
static void __irq_work_queue(struct irq_work *entry)
{
- struct irq_work **head, *next;
+ struct irq_work *next;
- head = &get_cpu_var(irq_work_list);
+ preempt_disable();
do {
- next = *head;
+ next = __this_cpu_read(irq_work_list);
/* Can assign non-atomic because we keep the flags set. */
entry->next = next_flags(next, IRQ_WORK_FLAGS);
- } while (cmpxchg(head, next, entry) != next);
+ } while (this_cpu_cmpxchg(irq_work_list, next, entry) != next);
/* The list was empty, raise self-interrupt to start processing. */
if (!irq_work_next(entry))
arch_irq_work_raise();
- put_cpu_var(irq_work_list);
+ preempt_enable();
}
/*
@@ -120,16 +120,16 @@ EXPORT_SYMBOL_GPL(irq_work_queue);
*/
void irq_work_run(void)
{
- struct irq_work *list, **head;
+ struct irq_work *list;
- head = &__get_cpu_var(irq_work_list);
- if (*head == NULL)
+ if (this_cpu_read(irq_work_list) == NULL)
return;
BUG_ON(!in_irq());
BUG_ON(!irqs_disabled());
- list = xchg(head, NULL);
+ list = this_cpu_xchg(irq_work_list, NULL);
+
while (list != NULL) {
struct irq_work *entry = list;
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 7663e5df0e6f..77981813a1e7 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -317,12 +317,12 @@ void __kprobes free_optinsn_slot(kprobe_opcode_t * slot, int dirty)
/* We have preemption disabled.. so it is safe to use __ versions */
static inline void set_kprobe_instance(struct kprobe *kp)
{
- __get_cpu_var(kprobe_instance) = kp;
+ __this_cpu_write(kprobe_instance, kp);
}
static inline void reset_kprobe_instance(void)
{
- __get_cpu_var(kprobe_instance) = NULL;
+ __this_cpu_write(kprobe_instance, NULL);
}
/*
@@ -965,7 +965,7 @@ static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs,
int trapnr)
{
- struct kprobe *cur = __get_cpu_var(kprobe_instance);
+ struct kprobe *cur = __this_cpu_read(kprobe_instance);
/*
* if we faulted "during" the execution of a user specified
@@ -980,7 +980,7 @@ static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs,
static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs)
{
- struct kprobe *cur = __get_cpu_var(kprobe_instance);
+ struct kprobe *cur = __this_cpu_read(kprobe_instance);
int ret = 0;
if (cur && cur->break_handler) {
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 5355cfd44a3f..c55afba990a3 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -148,7 +148,7 @@ struct task_struct *kthread_create(int (*threadfn)(void *data),
wait_for_completion(&create.done);
if (!IS_ERR(create.result)) {
- static struct sched_param param = { .sched_priority = 0 };
+ static const struct sched_param param = { .sched_priority = 0 };
va_list args;
va_start(args, namefmt);
diff --git a/kernel/latencytop.c b/kernel/latencytop.c
index 17110a4a4fc2..ee74b35e528d 100644
--- a/kernel/latencytop.c
+++ b/kernel/latencytop.c
@@ -241,24 +241,19 @@ static int lstats_show(struct seq_file *m, void *v)
seq_puts(m, "Latency Top version : v0.1\n");
for (i = 0; i < MAXLR; i++) {
- if (latency_record[i].backtrace[0]) {
+ struct latency_record *lr = &latency_record[i];
+
+ if (lr->backtrace[0]) {
int q;
- seq_printf(m, "%i %lu %lu ",
- latency_record[i].count,
- latency_record[i].time,
- latency_record[i].max);
+ seq_printf(m, "%i %lu %lu",
+ lr->count, lr->time, lr->max);
for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
- char sym[KSYM_SYMBOL_LEN];
- char *c;
- if (!latency_record[i].backtrace[q])
+ unsigned long bt = lr->backtrace[q];
+ if (!bt)
break;
- if (latency_record[i].backtrace[q] == ULONG_MAX)
+ if (bt == ULONG_MAX)
break;
- sprint_symbol(sym, latency_record[i].backtrace[q]);
- c = strchr(sym, '+');
- if (c)
- *c = 0;
- seq_printf(m, "%s ", sym);
+ seq_printf(m, " %ps", (void *)bt);
}
seq_printf(m, "\n");
}
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 11847bf1e8cc..b782b7a79f00 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -38,6 +38,12 @@
#include <asm/irq_regs.h>
+enum event_type_t {
+ EVENT_FLEXIBLE = 0x1,
+ EVENT_PINNED = 0x2,
+ EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED,
+};
+
atomic_t perf_task_events __read_mostly;
static atomic_t nr_mmap_events __read_mostly;
static atomic_t nr_comm_events __read_mostly;
@@ -65,6 +71,12 @@ int sysctl_perf_event_sample_rate __read_mostly = 100000;
static atomic64_t perf_event_id;
+static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx,
+ enum event_type_t event_type);
+
+static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
+ enum event_type_t event_type);
+
void __weak perf_event_print_debug(void) { }
extern __weak const char *perf_pmu_name(void)
@@ -72,6 +84,11 @@ extern __weak const char *perf_pmu_name(void)
return "pmu";
}
+static inline u64 perf_clock(void)
+{
+ return local_clock();
+}
+
void perf_pmu_disable(struct pmu *pmu)
{
int *count = this_cpu_ptr(pmu->pmu_disable_count);
@@ -240,11 +257,6 @@ static void perf_unpin_context(struct perf_event_context *ctx)
put_ctx(ctx);
}
-static inline u64 perf_clock(void)
-{
- return local_clock();
-}
-
/*
* Update the record of the current time in a context.
*/
@@ -256,6 +268,12 @@ static void update_context_time(struct perf_event_context *ctx)
ctx->timestamp = now;
}
+static u64 perf_event_time(struct perf_event *event)
+{
+ struct perf_event_context *ctx = event->ctx;
+ return ctx ? ctx->time : 0;
+}
+
/*
* Update the total_time_enabled and total_time_running fields for a event.
*/
@@ -269,7 +287,7 @@ static void update_event_times(struct perf_event *event)
return;
if (ctx->is_active)
- run_end = ctx->time;
+ run_end = perf_event_time(event);
else
run_end = event->tstamp_stopped;
@@ -278,7 +296,7 @@ static void update_event_times(struct perf_event *event)
if (event->state == PERF_EVENT_STATE_INACTIVE)
run_end = event->tstamp_stopped;
else
- run_end = ctx->time;
+ run_end = perf_event_time(event);
event->total_time_running = run_end - event->tstamp_running;
}
@@ -534,6 +552,7 @@ event_sched_out(struct perf_event *event,
struct perf_cpu_context *cpuctx,
struct perf_event_context *ctx)
{
+ u64 tstamp = perf_event_time(event);
u64 delta;
/*
* An event which could not be activated because of
@@ -545,7 +564,7 @@ event_sched_out(struct perf_event *event,
&& !event_filter_match(event)) {
delta = ctx->time - event->tstamp_stopped;
event->tstamp_running += delta;
- event->tstamp_stopped = ctx->time;
+ event->tstamp_stopped = tstamp;
}
if (event->state != PERF_EVENT_STATE_ACTIVE)
@@ -556,7 +575,7 @@ event_sched_out(struct perf_event *event,
event->pending_disable = 0;
event->state = PERF_EVENT_STATE_OFF;
}
- event->tstamp_stopped = ctx->time;
+ event->tstamp_stopped = tstamp;
event->pmu->del(event, 0);
event->oncpu = -1;
@@ -768,6 +787,8 @@ event_sched_in(struct perf_event *event,
struct perf_cpu_context *cpuctx,
struct perf_event_context *ctx)
{
+ u64 tstamp = perf_event_time(event);
+
if (event->state <= PERF_EVENT_STATE_OFF)
return 0;
@@ -784,9 +805,9 @@ event_sched_in(struct perf_event *event,
return -EAGAIN;
}
- event->tstamp_running += ctx->time - event->tstamp_stopped;
+ event->tstamp_running += tstamp - event->tstamp_stopped;
- event->shadow_ctx_time = ctx->time - ctx->timestamp;
+ event->shadow_ctx_time = tstamp - ctx->timestamp;
if (!is_software_event(event))
cpuctx->active_oncpu++;
@@ -898,11 +919,13 @@ static int group_can_go_on(struct perf_event *event,
static void add_event_to_ctx(struct perf_event *event,
struct perf_event_context *ctx)
{
+ u64 tstamp = perf_event_time(event);
+
list_add_event(event, ctx);
perf_group_attach(event);
- event->tstamp_enabled = ctx->time;
- event->tstamp_running = ctx->time;
- event->tstamp_stopped = ctx->time;
+ event->tstamp_enabled = tstamp;
+ event->tstamp_running = tstamp;
+ event->tstamp_stopped = tstamp;
}
/*
@@ -937,7 +960,7 @@ static void __perf_install_in_context(void *info)
add_event_to_ctx(event, ctx);
- if (event->cpu != -1 && event->cpu != smp_processor_id())
+ if (!event_filter_match(event))
goto unlock;
/*
@@ -1042,14 +1065,13 @@ static void __perf_event_mark_enabled(struct perf_event *event,
struct perf_event_context *ctx)
{
struct perf_event *sub;
+ u64 tstamp = perf_event_time(event);
event->state = PERF_EVENT_STATE_INACTIVE;
- event->tstamp_enabled = ctx->time - event->total_time_enabled;
+ event->tstamp_enabled = tstamp - event->total_time_enabled;
list_for_each_entry(sub, &event->sibling_list, group_entry) {
- if (sub->state >= PERF_EVENT_STATE_INACTIVE) {
- sub->tstamp_enabled =
- ctx->time - sub->total_time_enabled;
- }
+ if (sub->state >= PERF_EVENT_STATE_INACTIVE)
+ sub->tstamp_enabled = tstamp - sub->total_time_enabled;
}
}
@@ -1082,7 +1104,7 @@ static void __perf_event_enable(void *info)
goto unlock;
__perf_event_mark_enabled(event, ctx);
- if (event->cpu != -1 && event->cpu != smp_processor_id())
+ if (!event_filter_match(event))
goto unlock;
/*
@@ -1193,12 +1215,6 @@ static int perf_event_refresh(struct perf_event *event, int refresh)
return 0;
}
-enum event_type_t {
- EVENT_FLEXIBLE = 0x1,
- EVENT_PINNED = 0x2,
- EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED,
-};
-
static void ctx_sched_out(struct perf_event_context *ctx,
struct perf_cpu_context *cpuctx,
enum event_type_t event_type)
@@ -1435,7 +1451,7 @@ ctx_pinned_sched_in(struct perf_event_context *ctx,
list_for_each_entry(event, &ctx->pinned_groups, group_entry) {
if (event->state <= PERF_EVENT_STATE_OFF)
continue;
- if (event->cpu != -1 && event->cpu != smp_processor_id())
+ if (!event_filter_match(event))
continue;
if (group_can_go_on(event, cpuctx, 1))
@@ -1467,7 +1483,7 @@ ctx_flexible_sched_in(struct perf_event_context *ctx,
* Listen to the 'cpu' scheduling filter constraint
* of events:
*/
- if (event->cpu != -1 && event->cpu != smp_processor_id())
+ if (!event_filter_match(event))
continue;
if (group_can_go_on(event, cpuctx, can_add_hw)) {
@@ -1694,7 +1710,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
if (event->state != PERF_EVENT_STATE_ACTIVE)
continue;
- if (event->cpu != -1 && event->cpu != smp_processor_id())
+ if (!event_filter_match(event))
continue;
hwc = &event->hw;
@@ -3893,7 +3909,7 @@ static int perf_event_task_match(struct perf_event *event)
if (event->state < PERF_EVENT_STATE_INACTIVE)
return 0;
- if (event->cpu != -1 && event->cpu != smp_processor_id())
+ if (!event_filter_match(event))
return 0;
if (event->attr.comm || event->attr.mmap ||
@@ -4030,7 +4046,7 @@ static int perf_event_comm_match(struct perf_event *event)
if (event->state < PERF_EVENT_STATE_INACTIVE)
return 0;
- if (event->cpu != -1 && event->cpu != smp_processor_id())
+ if (!event_filter_match(event))
return 0;
if (event->attr.comm)
@@ -4178,7 +4194,7 @@ static int perf_event_mmap_match(struct perf_event *event,
if (event->state < PERF_EVENT_STATE_INACTIVE)
return 0;
- if (event->cpu != -1 && event->cpu != smp_processor_id())
+ if (!event_filter_match(event))
return 0;
if ((!executable && event->attr.mmap_data) ||
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index f9063c6b185d..b75597235d85 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -1,7 +1,4 @@
-
-ifeq ($(CONFIG_PM_DEBUG),y)
-EXTRA_CFLAGS += -DDEBUG
-endif
+ccflags-$(CONFIG_PM_DEBUG) := -DDEBUG
obj-$(CONFIG_PM) += main.o
obj-$(CONFIG_PM_SLEEP) += console.o
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 048d0b514831..870f72bc72ae 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -62,7 +62,7 @@ void hibernation_set_ops(struct platform_hibernation_ops *ops)
{
if (ops && !(ops->begin && ops->end && ops->pre_snapshot
&& ops->prepare && ops->finish && ops->enter && ops->pre_restore
- && ops->restore_cleanup)) {
+ && ops->restore_cleanup && ops->leave)) {
WARN_ON(1);
return;
}
@@ -278,7 +278,7 @@ static int create_image(int platform_mode)
goto Enable_irqs;
}
- if (hibernation_test(TEST_CORE) || !pm_check_wakeup_events())
+ if (hibernation_test(TEST_CORE) || pm_wakeup_pending())
goto Power_up;
in_suspend = 1;
@@ -516,7 +516,7 @@ int hibernation_platform_enter(void)
local_irq_disable();
sysdev_suspend(PMSG_HIBERNATE);
- if (!pm_check_wakeup_events()) {
+ if (pm_wakeup_pending()) {
error = -EAGAIN;
goto Power_up;
}
@@ -647,6 +647,7 @@ int hibernate(void)
swsusp_free();
if (!error)
power_down();
+ in_suspend = 0;
pm_restore_gfp_mask();
} else {
pr_debug("PM: Image restored successfully.\n");
diff --git a/kernel/power/process.c b/kernel/power/process.c
index e50b4c1b2a0f..d6d2a10320e0 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -64,6 +64,12 @@ static int try_to_freeze_tasks(bool sig_only)
* perturb a task in TASK_STOPPED or TASK_TRACED.
* It is "frozen enough". If the task does wake
* up, it will immediately call try_to_freeze.
+ *
+ * Because freeze_task() goes through p's
+ * scheduler lock after setting TIF_FREEZE, it's
+ * guaranteed that either we see TASK_RUNNING or
+ * try_to_stop() after schedule() in ptrace/signal
+ * stop sees TIF_FREEZE.
*/
if (!task_is_stopped_or_traced(p) &&
!freezer_should_skip(p))
@@ -79,7 +85,7 @@ static int try_to_freeze_tasks(bool sig_only)
if (!todo || time_after(jiffies, end_time))
break;
- if (!pm_check_wakeup_events()) {
+ if (pm_wakeup_pending()) {
wakeup = true;
break;
}
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 031d5e3a6197..8850df68794d 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -164,7 +164,7 @@ static int suspend_enter(suspend_state_t state)
error = sysdev_suspend(PMSG_SUSPEND);
if (!error) {
- if (!suspend_test(TEST_CORE) && pm_check_wakeup_events()) {
+ if (!(suspend_test(TEST_CORE) || pm_wakeup_pending())) {
error = suspend_ops->enter(state);
events_check_enabled = false;
}
diff --git a/kernel/printk.c b/kernel/printk.c
index 4642a5c439eb..53d9a9ec88e6 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -39,6 +39,7 @@
#include <linux/syslog.h>
#include <linux/cpu.h>
#include <linux/notifier.h>
+#include <linux/rculist.h>
#include <asm/uaccess.h>
@@ -273,12 +274,12 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
* at open time.
*/
if (type == SYSLOG_ACTION_OPEN || !from_file) {
- if (dmesg_restrict && !capable(CAP_SYS_ADMIN))
- return -EPERM;
+ if (dmesg_restrict && !capable(CAP_SYSLOG))
+ goto warn; /* switch to return -EPERM after 2.6.39 */
if ((type != SYSLOG_ACTION_READ_ALL &&
type != SYSLOG_ACTION_SIZE_BUFFER) &&
- !capable(CAP_SYS_ADMIN))
- return -EPERM;
+ !capable(CAP_SYSLOG))
+ goto warn; /* switch to return -EPERM after 2.6.39 */
}
error = security_syslog(type);
@@ -422,6 +423,12 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
}
out:
return error;
+warn:
+ /* remove after 2.6.39 */
+ if (capable(CAP_SYS_ADMIN))
+ WARN_ONCE(1, "Attempt to access syslog with CAP_SYS_ADMIN "
+ "but no CAP_SYSLOG (deprecated and denied).\n");
+ return -EPERM;
}
SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len)
@@ -1496,7 +1503,7 @@ int kmsg_dump_register(struct kmsg_dumper *dumper)
/* Don't allow registering multiple times */
if (!dumper->registered) {
dumper->registered = 1;
- list_add_tail(&dumper->list, &dump_list);
+ list_add_tail_rcu(&dumper->list, &dump_list);
err = 0;
}
spin_unlock_irqrestore(&dump_list_lock, flags);
@@ -1520,29 +1527,16 @@ int kmsg_dump_unregister(struct kmsg_dumper *dumper)
spin_lock_irqsave(&dump_list_lock, flags);
if (dumper->registered) {
dumper->registered = 0;
- list_del(&dumper->list);
+ list_del_rcu(&dumper->list);
err = 0;
}
spin_unlock_irqrestore(&dump_list_lock, flags);
+ synchronize_rcu();
return err;
}
EXPORT_SYMBOL_GPL(kmsg_dump_unregister);
-static const char * const kmsg_reasons[] = {
- [KMSG_DUMP_OOPS] = "oops",
- [KMSG_DUMP_PANIC] = "panic",
- [KMSG_DUMP_KEXEC] = "kexec",
-};
-
-static const char *kmsg_to_str(enum kmsg_dump_reason reason)
-{
- if (reason >= ARRAY_SIZE(kmsg_reasons) || reason < 0)
- return "unknown";
-
- return kmsg_reasons[reason];
-}
-
/**
* kmsg_dump - dump kernel log to kernel message dumpers.
* @reason: the reason (oops, panic etc) for dumping
@@ -1581,13 +1575,9 @@ void kmsg_dump(enum kmsg_dump_reason reason)
l2 = chars;
}
- if (!spin_trylock_irqsave(&dump_list_lock, flags)) {
- printk(KERN_ERR "dump_kmsg: dump list lock is held during %s, skipping dump\n",
- kmsg_to_str(reason));
- return;
- }
- list_for_each_entry(dumper, &dump_list, list)
+ rcu_read_lock();
+ list_for_each_entry_rcu(dumper, &dump_list, list)
dumper->dump(dumper, reason, s1, l1, s2, l2);
- spin_unlock_irqrestore(&dump_list_lock, flags);
+ rcu_read_unlock();
}
#endif
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index d0ddfea6579d..dd4aea806f8e 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -364,8 +364,8 @@ void rcu_irq_exit(void)
WARN_ON_ONCE(rdtp->dynticks & 0x1);
/* If the interrupt queued a callback, get out of dyntick mode. */
- if (__get_cpu_var(rcu_sched_data).nxtlist ||
- __get_cpu_var(rcu_bh_data).nxtlist)
+ if (__this_cpu_read(rcu_sched_data.nxtlist) ||
+ __this_cpu_read(rcu_bh_data.nxtlist))
set_need_resched();
}
diff --git a/kernel/sched.c b/kernel/sched.c
index 04949089e760..a0eb0941fa84 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -278,14 +278,12 @@ struct task_group {
#endif
};
-#define root_task_group init_task_group
-
/* task_group_lock serializes the addition/removal of task groups */
static DEFINE_SPINLOCK(task_group_lock);
#ifdef CONFIG_FAIR_GROUP_SCHED
-# define INIT_TASK_GROUP_LOAD NICE_0_LOAD
+# define ROOT_TASK_GROUP_LOAD NICE_0_LOAD
/*
* A weight of 0 or 1 can cause arithmetics problems.
@@ -298,13 +296,13 @@ static DEFINE_SPINLOCK(task_group_lock);
#define MIN_SHARES 2
#define MAX_SHARES (1UL << 18)
-static int init_task_group_load = INIT_TASK_GROUP_LOAD;
+static int root_task_group_load = ROOT_TASK_GROUP_LOAD;
#endif
/* Default task group.
* Every task in system belong to this group at bootup.
*/
-struct task_group init_task_group;
+struct task_group root_task_group;
#endif /* CONFIG_CGROUP_SCHED */
@@ -743,7 +741,7 @@ sched_feat_write(struct file *filp, const char __user *ubuf,
buf[cnt] = 0;
cmp = strstrip(buf);
- if (strncmp(buf, "NO_", 3) == 0) {
+ if (strncmp(cmp, "NO_", 3) == 0) {
neg = 1;
cmp += 3;
}
@@ -7848,7 +7846,7 @@ static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
cfs_rq->tg = tg;
tg->se[cpu] = se;
- /* se could be NULL for init_task_group */
+ /* se could be NULL for root_task_group */
if (!se)
return;
@@ -7908,18 +7906,18 @@ void __init sched_init(void)
ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT);
#ifdef CONFIG_FAIR_GROUP_SCHED
- init_task_group.se = (struct sched_entity **)ptr;
+ root_task_group.se = (struct sched_entity **)ptr;
ptr += nr_cpu_ids * sizeof(void **);
- init_task_group.cfs_rq = (struct cfs_rq **)ptr;
+ root_task_group.cfs_rq = (struct cfs_rq **)ptr;
ptr += nr_cpu_ids * sizeof(void **);
#endif /* CONFIG_FAIR_GROUP_SCHED */
#ifdef CONFIG_RT_GROUP_SCHED
- init_task_group.rt_se = (struct sched_rt_entity **)ptr;
+ root_task_group.rt_se = (struct sched_rt_entity **)ptr;
ptr += nr_cpu_ids * sizeof(void **);
- init_task_group.rt_rq = (struct rt_rq **)ptr;
+ root_task_group.rt_rq = (struct rt_rq **)ptr;
ptr += nr_cpu_ids * sizeof(void **);
#endif /* CONFIG_RT_GROUP_SCHED */
@@ -7939,13 +7937,13 @@ void __init sched_init(void)
global_rt_period(), global_rt_runtime());
#ifdef CONFIG_RT_GROUP_SCHED
- init_rt_bandwidth(&init_task_group.rt_bandwidth,
+ init_rt_bandwidth(&root_task_group.rt_bandwidth,
global_rt_period(), global_rt_runtime());
#endif /* CONFIG_RT_GROUP_SCHED */
#ifdef CONFIG_CGROUP_SCHED
- list_add(&init_task_group.list, &task_groups);
- INIT_LIST_HEAD(&init_task_group.children);
+ list_add(&root_task_group.list, &task_groups);
+ INIT_LIST_HEAD(&root_task_group.children);
autogroup_init(&init_task);
#endif /* CONFIG_CGROUP_SCHED */
@@ -7960,34 +7958,34 @@ void __init sched_init(void)
init_cfs_rq(&rq->cfs, rq);
init_rt_rq(&rq->rt, rq);
#ifdef CONFIG_FAIR_GROUP_SCHED
- init_task_group.shares = init_task_group_load;
+ root_task_group.shares = root_task_group_load;
INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
/*
- * How much cpu bandwidth does init_task_group get?
+ * How much cpu bandwidth does root_task_group get?
*
* In case of task-groups formed thr' the cgroup filesystem, it
* gets 100% of the cpu resources in the system. This overall
* system cpu resource is divided among the tasks of
- * init_task_group and its child task-groups in a fair manner,
+ * root_task_group and its child task-groups in a fair manner,
* based on each entity's (task or task-group's) weight
* (se->load.weight).
*
- * In other words, if init_task_group has 10 tasks of weight
+ * In other words, if root_task_group has 10 tasks of weight
* 1024) and two child groups A0 and A1 (of weight 1024 each),
* then A0's share of the cpu resource is:
*
* A0's bandwidth = 1024 / (10*1024 + 1024 + 1024) = 8.33%
*
- * We achieve this by letting init_task_group's tasks sit
- * directly in rq->cfs (i.e init_task_group->se[] = NULL).
+ * We achieve this by letting root_task_group's tasks sit
+ * directly in rq->cfs (i.e root_task_group->se[] = NULL).
*/
- init_tg_cfs_entry(&init_task_group, &rq->cfs, NULL, i, NULL);
+ init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, NULL);
#endif /* CONFIG_FAIR_GROUP_SCHED */
rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime;
#ifdef CONFIG_RT_GROUP_SCHED
INIT_LIST_HEAD(&rq->leaf_rt_rq_list);
- init_tg_rt_entry(&init_task_group, &rq->rt, NULL, i, NULL);
+ init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL);
#endif
for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
@@ -8379,6 +8377,7 @@ static void free_sched_group(struct task_group *tg)
{
free_fair_sched_group(tg);
free_rt_sched_group(tg);
+ autogroup_free(tg);
kfree(tg);
}
@@ -8812,7 +8811,7 @@ cpu_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cgrp)
if (!cgrp->parent) {
/* This is early initialization for the top cgroup */
- return &init_task_group.css;
+ return &root_task_group.css;
}
parent = cgroup_tg(cgrp->parent);
diff --git a/kernel/sched_autogroup.c b/kernel/sched_autogroup.c
index c80fedcd476b..32a723b8f84c 100644
--- a/kernel/sched_autogroup.c
+++ b/kernel/sched_autogroup.c
@@ -9,10 +9,10 @@ unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1;
static struct autogroup autogroup_default;
static atomic_t autogroup_seq_nr;
-static void autogroup_init(struct task_struct *init_task)
+static void __init autogroup_init(struct task_struct *init_task)
{
- autogroup_default.tg = &init_task_group;
- init_task_group.autogroup = &autogroup_default;
+ autogroup_default.tg = &root_task_group;
+ root_task_group.autogroup = &autogroup_default;
kref_init(&autogroup_default.kref);
init_rwsem(&autogroup_default.lock);
init_task->signal->autogroup = &autogroup_default;
@@ -63,7 +63,7 @@ static inline struct autogroup *autogroup_create(void)
if (!ag)
goto out_fail;
- tg = sched_create_group(&init_task_group);
+ tg = sched_create_group(&root_task_group);
if (IS_ERR(tg))
goto out_free;
diff --git a/kernel/smp.c b/kernel/smp.c
index 12ed8b013e2d..4ec30e069987 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -13,6 +13,7 @@
#include <linux/smp.h>
#include <linux/cpu.h>
+#ifdef CONFIG_USE_GENERIC_SMP_HELPERS
static struct {
struct list_head queue;
raw_spinlock_t lock;
@@ -529,3 +530,21 @@ void ipi_call_unlock_irq(void)
{
raw_spin_unlock_irq(&call_function.lock);
}
+#endif /* USE_GENERIC_SMP_HELPERS */
+
+/*
+ * Call a function on all processors
+ */
+int on_each_cpu(void (*func) (void *info), void *info, int wait)
+{
+ int ret = 0;
+
+ preempt_disable();
+ ret = smp_call_function(func, info, wait);
+ local_irq_disable();
+ func(info);
+ local_irq_enable();
+ preempt_enable();
+ return ret;
+}
+EXPORT_SYMBOL(on_each_cpu);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index d4d918a91881..68eb5efec388 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -70,7 +70,7 @@ char *softirq_to_name[NR_SOFTIRQS] = {
static void wakeup_softirqd(void)
{
/* Interrupts are disabled: no need to stop preemption */
- struct task_struct *tsk = __get_cpu_var(ksoftirqd);
+ struct task_struct *tsk = __this_cpu_read(ksoftirqd);
if (tsk && tsk->state != TASK_RUNNING)
wake_up_process(tsk);
@@ -388,8 +388,8 @@ void __tasklet_schedule(struct tasklet_struct *t)
local_irq_save(flags);
t->next = NULL;
- *__get_cpu_var(tasklet_vec).tail = t;
- __get_cpu_var(tasklet_vec).tail = &(t->next);
+ *__this_cpu_read(tasklet_vec.tail) = t;
+ __this_cpu_write(tasklet_vec.tail, &(t->next));
raise_softirq_irqoff(TASKLET_SOFTIRQ);
local_irq_restore(flags);
}
@@ -402,8 +402,8 @@ void __tasklet_hi_schedule(struct tasklet_struct *t)
local_irq_save(flags);
t->next = NULL;
- *__get_cpu_var(tasklet_hi_vec).tail = t;
- __get_cpu_var(tasklet_hi_vec).tail = &(t->next);
+ *__this_cpu_read(tasklet_hi_vec.tail) = t;
+ __this_cpu_write(tasklet_hi_vec.tail, &(t->next));
raise_softirq_irqoff(HI_SOFTIRQ);
local_irq_restore(flags);
}
@@ -414,8 +414,8 @@ void __tasklet_hi_schedule_first(struct tasklet_struct *t)
{
BUG_ON(!irqs_disabled());
- t->next = __get_cpu_var(tasklet_hi_vec).head;
- __get_cpu_var(tasklet_hi_vec).head = t;
+ t->next = __this_cpu_read(tasklet_hi_vec.head);
+ __this_cpu_write(tasklet_hi_vec.head, t);
__raise_softirq_irqoff(HI_SOFTIRQ);
}
@@ -426,9 +426,9 @@ static void tasklet_action(struct softirq_action *a)
struct tasklet_struct *list;
local_irq_disable();
- list = __get_cpu_var(tasklet_vec).head;
- __get_cpu_var(tasklet_vec).head = NULL;
- __get_cpu_var(tasklet_vec).tail = &__get_cpu_var(tasklet_vec).head;
+ list = __this_cpu_read(tasklet_vec.head);
+ __this_cpu_write(tasklet_vec.head, NULL);
+ __this_cpu_write(tasklet_vec.tail, &__get_cpu_var(tasklet_vec).head);
local_irq_enable();
while (list) {
@@ -449,8 +449,8 @@ static void tasklet_action(struct softirq_action *a)
local_irq_disable();
t->next = NULL;
- *__get_cpu_var(tasklet_vec).tail = t;
- __get_cpu_var(tasklet_vec).tail = &(t->next);
+ *__this_cpu_read(tasklet_vec.tail) = t;
+ __this_cpu_write(tasklet_vec.tail, &(t->next));
__raise_softirq_irqoff(TASKLET_SOFTIRQ);
local_irq_enable();
}
@@ -461,9 +461,9 @@ static void tasklet_hi_action(struct softirq_action *a)
struct tasklet_struct *list;
local_irq_disable();
- list = __get_cpu_var(tasklet_hi_vec).head;
- __get_cpu_var(tasklet_hi_vec).head = NULL;
- __get_cpu_var(tasklet_hi_vec).tail = &__get_cpu_var(tasklet_hi_vec).head;
+ list = __this_cpu_read(tasklet_hi_vec.head);
+ __this_cpu_write(tasklet_hi_vec.head, NULL);
+ __this_cpu_write(tasklet_hi_vec.tail, &__get_cpu_var(tasklet_hi_vec).head);
local_irq_enable();
while (list) {
@@ -484,8 +484,8 @@ static void tasklet_hi_action(struct softirq_action *a)
local_irq_disable();
t->next = NULL;
- *__get_cpu_var(tasklet_hi_vec).tail = t;
- __get_cpu_var(tasklet_hi_vec).tail = &(t->next);
+ *__this_cpu_read(tasklet_hi_vec.tail) = t;
+ __this_cpu_write(tasklet_hi_vec.tail, &(t->next));
__raise_softirq_irqoff(HI_SOFTIRQ);
local_irq_enable();
}
@@ -802,16 +802,16 @@ static void takeover_tasklets(unsigned int cpu)
/* Find end, append list for that CPU. */
if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
- *(__get_cpu_var(tasklet_vec).tail) = per_cpu(tasklet_vec, cpu).head;
- __get_cpu_var(tasklet_vec).tail = per_cpu(tasklet_vec, cpu).tail;
+ *__this_cpu_read(tasklet_vec.tail) = per_cpu(tasklet_vec, cpu).head;
+ this_cpu_write(tasklet_vec.tail, per_cpu(tasklet_vec, cpu).tail);
per_cpu(tasklet_vec, cpu).head = NULL;
per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
}
raise_softirq_irqoff(TASKLET_SOFTIRQ);
if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
- *__get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).head;
- __get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).tail;
+ *__this_cpu_read(tasklet_hi_vec.tail) = per_cpu(tasklet_hi_vec, cpu).head;
+ __this_cpu_write(tasklet_hi_vec.tail, per_cpu(tasklet_hi_vec, cpu).tail);
per_cpu(tasklet_hi_vec, cpu).head = NULL;
per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
}
@@ -853,7 +853,7 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb,
cpumask_any(cpu_online_mask));
case CPU_DEAD:
case CPU_DEAD_FROZEN: {
- static struct sched_param param = {
+ static const struct sched_param param = {
.sched_priority = MAX_RT_PRIO-1
};
@@ -885,25 +885,6 @@ static __init int spawn_ksoftirqd(void)
}
early_initcall(spawn_ksoftirqd);
-#ifdef CONFIG_SMP
-/*
- * Call a function on all processors
- */
-int on_each_cpu(void (*func) (void *info), void *info, int wait)
-{
- int ret = 0;
-
- preempt_disable();
- ret = smp_call_function(func, info, wait);
- local_irq_disable();
- func(info);
- local_irq_enable();
- preempt_enable();
- return ret;
-}
-EXPORT_SYMBOL(on_each_cpu);
-#endif
-
/*
* [ These __weak aliases are kept in a separate compilation unit, so that
* GCC does not inline them incorrectly. ]
diff --git a/kernel/sys.c b/kernel/sys.c
index 2745dcdb6c6c..31b71a276b40 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -43,6 +43,8 @@
#include <linux/kprobes.h>
#include <linux/user_namespace.h>
+#include <linux/kmsg_dump.h>
+
#include <asm/uaccess.h>
#include <asm/io.h>
#include <asm/unistd.h>
@@ -285,6 +287,7 @@ out_unlock:
*/
void emergency_restart(void)
{
+ kmsg_dump(KMSG_DUMP_EMERG);
machine_emergency_restart();
}
EXPORT_SYMBOL_GPL(emergency_restart);
@@ -312,6 +315,7 @@ void kernel_restart(char *cmd)
printk(KERN_EMERG "Restarting system.\n");
else
printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd);
+ kmsg_dump(KMSG_DUMP_RESTART);
machine_restart(cmd);
}
EXPORT_SYMBOL_GPL(kernel_restart);
@@ -333,6 +337,7 @@ void kernel_halt(void)
kernel_shutdown_prepare(SYSTEM_HALT);
sysdev_shutdown();
printk(KERN_EMERG "System halted.\n");
+ kmsg_dump(KMSG_DUMP_HALT);
machine_halt();
}
@@ -351,6 +356,7 @@ void kernel_power_off(void)
disable_nonboot_cpus();
sysdev_shutdown();
printk(KERN_EMERG "Power down.\n");
+ kmsg_dump(KMSG_DUMP_POWEROFF);
machine_power_off();
}
EXPORT_SYMBOL_GPL(kernel_power_off);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index ae5cbb1e3ced..bc86bb32e126 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -24,6 +24,7 @@
#include <linux/slab.h>
#include <linux/sysctl.h>
#include <linux/signal.h>
+#include <linux/printk.h>
#include <linux/proc_fs.h>
#include <linux/security.h>
#include <linux/ctype.h>
@@ -245,10 +246,6 @@ static struct ctl_table root_table[] = {
.mode = 0555,
.child = dev_table,
},
-/*
- * NOTE: do not add new entries to this table unless you have read
- * Documentation/sysctl/ctl_unnumbered.txt
- */
{ }
};
@@ -710,6 +707,15 @@ static struct ctl_table kern_table[] = {
.extra1 = &zero,
.extra2 = &one,
},
+ {
+ .procname = "kptr_restrict",
+ .data = &kptr_restrict,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &two,
+ },
#endif
{
.procname = "ngroups_max",
@@ -962,10 +968,6 @@ static struct ctl_table kern_table[] = {
.proc_handler = proc_dointvec,
},
#endif
-/*
- * NOTE: do not add new entries to this table unless you have read
- * Documentation/sysctl/ctl_unnumbered.txt
- */
{ }
};
@@ -1326,11 +1328,6 @@ static struct ctl_table vm_table[] = {
.extra2 = &one,
},
#endif
-
-/*
- * NOTE: do not add new entries to this table unless you have read
- * Documentation/sysctl/ctl_unnumbered.txt
- */
{ }
};
@@ -1486,10 +1483,6 @@ static struct ctl_table fs_table[] = {
.proc_handler = &pipe_proc_fn,
.extra1 = &pipe_min_size,
},
-/*
- * NOTE: do not add new entries to this table unless you have read
- * Documentation/sysctl/ctl_unnumbered.txt
- */
{ }
};
@@ -2899,7 +2892,7 @@ int proc_do_large_bitmap(struct ctl_table *table, int write,
}
}
-#else /* CONFIG_PROC_FS */
+#else /* CONFIG_PROC_SYSCTL */
int proc_dostring(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -2951,7 +2944,7 @@ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
}
-#endif /* CONFIG_PROC_FS */
+#endif /* CONFIG_PROC_SYSCTL */
/*
* No sense putting this after each symbol definition, twice,
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 3308fd7f1b52..3971c6b9d58d 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -89,8 +89,7 @@ static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp,
return -ENOMEM;
if (!info) {
- int seq = get_cpu_var(taskstats_seqnum)++;
- put_cpu_var(taskstats_seqnum);
+ int seq = this_cpu_inc_return(taskstats_seqnum) - 1;
reply = genlmsg_put(skb, 0, seq, &family, 0, cmd);
} else
@@ -349,7 +348,7 @@ static int parse(struct nlattr *na, struct cpumask *mask)
return ret;
}
-#ifdef CONFIG_IA64
+#if defined(CONFIG_64BIT) && !defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
#define TASKSTATS_NEEDS_PADDING 1
#endif
@@ -612,7 +611,7 @@ void taskstats_exit(struct task_struct *tsk, int group_dead)
fill_tgid_exit(tsk);
}
- listeners = &__raw_get_cpu_var(listener_array);
+ listeners = __this_cpu_ptr(&listener_array);
if (list_empty(&listeners->list))
return;
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index d2321891538f..5c00242fa921 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -14,6 +14,7 @@
#include <linux/timex.h>
#include <linux/time.h>
#include <linux/mm.h>
+#include <linux/module.h>
/*
* NTP timekeeping variables:
@@ -74,6 +75,162 @@ static long time_adjust;
/* constant (boot-param configurable) NTP tick adjustment (upscaled) */
static s64 ntp_tick_adj;
+#ifdef CONFIG_NTP_PPS
+
+/*
+ * The following variables are used when a pulse-per-second (PPS) signal
+ * is available. They establish the engineering parameters of the clock
+ * discipline loop when controlled by the PPS signal.
+ */
+#define PPS_VALID 10 /* PPS signal watchdog max (s) */
+#define PPS_POPCORN 4 /* popcorn spike threshold (shift) */
+#define PPS_INTMIN 2 /* min freq interval (s) (shift) */
+#define PPS_INTMAX 8 /* max freq interval (s) (shift) */
+#define PPS_INTCOUNT 4 /* number of consecutive good intervals to
+ increase pps_shift or consecutive bad
+ intervals to decrease it */
+#define PPS_MAXWANDER 100000 /* max PPS freq wander (ns/s) */
+
+static int pps_valid; /* signal watchdog counter */
+static long pps_tf[3]; /* phase median filter */
+static long pps_jitter; /* current jitter (ns) */
+static struct timespec pps_fbase; /* beginning of the last freq interval */
+static int pps_shift; /* current interval duration (s) (shift) */
+static int pps_intcnt; /* interval counter */
+static s64 pps_freq; /* frequency offset (scaled ns/s) */
+static long pps_stabil; /* current stability (scaled ns/s) */
+
+/*
+ * PPS signal quality monitors
+ */
+static long pps_calcnt; /* calibration intervals */
+static long pps_jitcnt; /* jitter limit exceeded */
+static long pps_stbcnt; /* stability limit exceeded */
+static long pps_errcnt; /* calibration errors */
+
+
+/* PPS kernel consumer compensates the whole phase error immediately.
+ * Otherwise, reduce the offset by a fixed factor times the time constant.
+ */
+static inline s64 ntp_offset_chunk(s64 offset)
+{
+ if (time_status & STA_PPSTIME && time_status & STA_PPSSIGNAL)
+ return offset;
+ else
+ return shift_right(offset, SHIFT_PLL + time_constant);
+}
+
+static inline void pps_reset_freq_interval(void)
+{
+ /* the PPS calibration interval may end
+ surprisingly early */
+ pps_shift = PPS_INTMIN;
+ pps_intcnt = 0;
+}
+
+/**
+ * pps_clear - Clears the PPS state variables
+ *
+ * Must be called while holding a write on the xtime_lock
+ */
+static inline void pps_clear(void)
+{
+ pps_reset_freq_interval();
+ pps_tf[0] = 0;
+ pps_tf[1] = 0;
+ pps_tf[2] = 0;
+ pps_fbase.tv_sec = pps_fbase.tv_nsec = 0;
+ pps_freq = 0;
+}
+
+/* Decrease pps_valid to indicate that another second has passed since
+ * the last PPS signal. When it reaches 0, indicate that PPS signal is
+ * missing.
+ *
+ * Must be called while holding a write on the xtime_lock
+ */
+static inline void pps_dec_valid(void)
+{
+ if (pps_valid > 0)
+ pps_valid--;
+ else {
+ time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER |
+ STA_PPSWANDER | STA_PPSERROR);
+ pps_clear();
+ }
+}
+
+static inline void pps_set_freq(s64 freq)
+{
+ pps_freq = freq;
+}
+
+static inline int is_error_status(int status)
+{
+ return (time_status & (STA_UNSYNC|STA_CLOCKERR))
+ /* PPS signal lost when either PPS time or
+ * PPS frequency synchronization requested
+ */
+ || ((time_status & (STA_PPSFREQ|STA_PPSTIME))
+ && !(time_status & STA_PPSSIGNAL))
+ /* PPS jitter exceeded when
+ * PPS time synchronization requested */
+ || ((time_status & (STA_PPSTIME|STA_PPSJITTER))
+ == (STA_PPSTIME|STA_PPSJITTER))
+ /* PPS wander exceeded or calibration error when
+ * PPS frequency synchronization requested
+ */
+ || ((time_status & STA_PPSFREQ)
+ && (time_status & (STA_PPSWANDER|STA_PPSERROR)));
+}
+
+static inline void pps_fill_timex(struct timex *txc)
+{
+ txc->ppsfreq = shift_right((pps_freq >> PPM_SCALE_INV_SHIFT) *
+ PPM_SCALE_INV, NTP_SCALE_SHIFT);
+ txc->jitter = pps_jitter;
+ if (!(time_status & STA_NANO))
+ txc->jitter /= NSEC_PER_USEC;
+ txc->shift = pps_shift;
+ txc->stabil = pps_stabil;
+ txc->jitcnt = pps_jitcnt;
+ txc->calcnt = pps_calcnt;
+ txc->errcnt = pps_errcnt;
+ txc->stbcnt = pps_stbcnt;
+}
+
+#else /* !CONFIG_NTP_PPS */
+
+static inline s64 ntp_offset_chunk(s64 offset)
+{
+ return shift_right(offset, SHIFT_PLL + time_constant);
+}
+
+static inline void pps_reset_freq_interval(void) {}
+static inline void pps_clear(void) {}
+static inline void pps_dec_valid(void) {}
+static inline void pps_set_freq(s64 freq) {}
+
+static inline int is_error_status(int status)
+{
+ return status & (STA_UNSYNC|STA_CLOCKERR);
+}
+
+static inline void pps_fill_timex(struct timex *txc)
+{
+ /* PPS is not implemented, so these are zero */
+ txc->ppsfreq = 0;
+ txc->jitter = 0;
+ txc->shift = 0;
+ txc->stabil = 0;
+ txc->jitcnt = 0;
+ txc->calcnt = 0;
+ txc->errcnt = 0;
+ txc->stbcnt = 0;
+}
+
+#endif /* CONFIG_NTP_PPS */
+
/*
* NTP methods:
*/
@@ -185,6 +342,9 @@ void ntp_clear(void)
tick_length = tick_length_base;
time_offset = 0;
+
+ /* Clear PPS state variables */
+ pps_clear();
}
/*
@@ -250,16 +410,16 @@ void second_overflow(void)
time_status |= STA_UNSYNC;
}
- /*
- * Compute the phase adjustment for the next second. The offset is
- * reduced by a fixed factor times the time constant.
- */
+ /* Compute the phase adjustment for the next second */
tick_length = tick_length_base;
- delta = shift_right(time_offset, SHIFT_PLL + time_constant);
+ delta = ntp_offset_chunk(time_offset);
time_offset -= delta;
tick_length += delta;
+ /* Check PPS signal */
+ pps_dec_valid();
+
if (!time_adjust)
return;
@@ -369,6 +529,8 @@ static inline void process_adj_status(struct timex *txc, struct timespec *ts)
if ((time_status & STA_PLL) && !(txc->status & STA_PLL)) {
time_state = TIME_OK;
time_status = STA_UNSYNC;
+ /* restart PPS frequency calibration */
+ pps_reset_freq_interval();
}
/*
@@ -418,6 +580,8 @@ static inline void process_adjtimex_modes(struct timex *txc, struct timespec *ts
time_freq = txc->freq * PPM_SCALE;
time_freq = min(time_freq, MAXFREQ_SCALED);
time_freq = max(time_freq, -MAXFREQ_SCALED);
+ /* update pps_freq */
+ pps_set_freq(time_freq);
}
if (txc->modes & ADJ_MAXERROR)
@@ -508,7 +672,8 @@ int do_adjtimex(struct timex *txc)
}
result = time_state; /* mostly `TIME_OK' */
- if (time_status & (STA_UNSYNC|STA_CLOCKERR))
+ /* check for errors */
+ if (is_error_status(time_status))
result = TIME_ERROR;
txc->freq = shift_right((time_freq >> PPM_SCALE_INV_SHIFT) *
@@ -522,15 +687,8 @@ int do_adjtimex(struct timex *txc)
txc->tick = tick_usec;
txc->tai = time_tai;
- /* PPS is not implemented, so these are zero */
- txc->ppsfreq = 0;
- txc->jitter = 0;
- txc->shift = 0;
- txc->stabil = 0;
- txc->jitcnt = 0;
- txc->calcnt = 0;
- txc->errcnt = 0;
- txc->stbcnt = 0;
+ /* fill PPS status fields */
+ pps_fill_timex(txc);
write_sequnlock_irq(&xtime_lock);
@@ -544,6 +702,243 @@ int do_adjtimex(struct timex *txc)
return result;
}
+#ifdef CONFIG_NTP_PPS
+
+/* actually struct pps_normtime is good old struct timespec, but it is
+ * semantically different (and it is the reason why it was invented):
+ * pps_normtime.nsec has a range of ( -NSEC_PER_SEC / 2, NSEC_PER_SEC / 2 ]
+ * while timespec.tv_nsec has a range of [0, NSEC_PER_SEC) */
+struct pps_normtime {
+ __kernel_time_t sec; /* seconds */
+ long nsec; /* nanoseconds */
+};
+
+/* normalize the timestamp so that nsec is in the
+ ( -NSEC_PER_SEC / 2, NSEC_PER_SEC / 2 ] interval */
+static inline struct pps_normtime pps_normalize_ts(struct timespec ts)
+{
+ struct pps_normtime norm = {
+ .sec = ts.tv_sec,
+ .nsec = ts.tv_nsec
+ };
+
+ if (norm.nsec > (NSEC_PER_SEC >> 1)) {
+ norm.nsec -= NSEC_PER_SEC;
+ norm.sec++;
+ }
+
+ return norm;
+}
+
+/* get current phase correction and jitter */
+static inline long pps_phase_filter_get(long *jitter)
+{
+ *jitter = pps_tf[0] - pps_tf[1];
+ if (*jitter < 0)
+ *jitter = -*jitter;
+
+ /* TODO: test various filters */
+ return pps_tf[0];
+}
+
+/* add the sample to the phase filter */
+static inline void pps_phase_filter_add(long err)
+{
+ pps_tf[2] = pps_tf[1];
+ pps_tf[1] = pps_tf[0];
+ pps_tf[0] = err;
+}
+
+/* decrease frequency calibration interval length.
+ * It is halved after four consecutive unstable intervals.
+ */
+static inline void pps_dec_freq_interval(void)
+{
+ if (--pps_intcnt <= -PPS_INTCOUNT) {
+ pps_intcnt = -PPS_INTCOUNT;
+ if (pps_shift > PPS_INTMIN) {
+ pps_shift--;
+ pps_intcnt = 0;
+ }
+ }
+}
+
+/* increase frequency calibration interval length.
+ * It is doubled after four consecutive stable intervals.
+ */
+static inline void pps_inc_freq_interval(void)
+{
+ if (++pps_intcnt >= PPS_INTCOUNT) {
+ pps_intcnt = PPS_INTCOUNT;
+ if (pps_shift < PPS_INTMAX) {
+ pps_shift++;
+ pps_intcnt = 0;
+ }
+ }
+}
+
+/* update clock frequency based on MONOTONIC_RAW clock PPS signal
+ * timestamps
+ *
+ * At the end of the calibration interval the difference between the
+ * first and last MONOTONIC_RAW clock timestamps divided by the length
+ * of the interval becomes the frequency update. If the interval was
+ * too long, the data are discarded.
+ * Returns the difference between old and new frequency values.
+ */
+static long hardpps_update_freq(struct pps_normtime freq_norm)
+{
+ long delta, delta_mod;
+ s64 ftemp;
+
+ /* check if the frequency interval was too long */
+ if (freq_norm.sec > (2 << pps_shift)) {
+ time_status |= STA_PPSERROR;
+ pps_errcnt++;
+ pps_dec_freq_interval();
+ pr_err("hardpps: PPSERROR: interval too long - %ld s\n",
+ freq_norm.sec);
+ return 0;
+ }
+
+ /* here the raw frequency offset and wander (stability) is
+ * calculated. If the wander is less than the wander threshold
+ * the interval is increased; otherwise it is decreased.
+ */
+ ftemp = div_s64(((s64)(-freq_norm.nsec)) << NTP_SCALE_SHIFT,
+ freq_norm.sec);
+ delta = shift_right(ftemp - pps_freq, NTP_SCALE_SHIFT);
+ pps_freq = ftemp;
+ if (delta > PPS_MAXWANDER || delta < -PPS_MAXWANDER) {
+ pr_warning("hardpps: PPSWANDER: change=%ld\n", delta);
+ time_status |= STA_PPSWANDER;
+ pps_stbcnt++;
+ pps_dec_freq_interval();
+ } else { /* good sample */
+ pps_inc_freq_interval();
+ }
+
+ /* the stability metric is calculated as the average of recent
+ * frequency changes, but is used only for performance
+ * monitoring
+ */
+ delta_mod = delta;
+ if (delta_mod < 0)
+ delta_mod = -delta_mod;
+ pps_stabil += (div_s64(((s64)delta_mod) <<
+ (NTP_SCALE_SHIFT - SHIFT_USEC),
+ NSEC_PER_USEC) - pps_stabil) >> PPS_INTMIN;
+
+ /* if enabled, the system clock frequency is updated */
+ if ((time_status & STA_PPSFREQ) != 0 &&
+ (time_status & STA_FREQHOLD) == 0) {
+ time_freq = pps_freq;
+ ntp_update_frequency();
+ }
+
+ return delta;
+}
+
+/* correct REALTIME clock phase error against PPS signal */
+static void hardpps_update_phase(long error)
+{
+ long correction = -error;
+ long jitter;
+
+ /* add the sample to the median filter */
+ pps_phase_filter_add(correction);
+ correction = pps_phase_filter_get(&jitter);
+
+ /* Nominal jitter is due to PPS signal noise. If it exceeds the
+ * threshold, the sample is discarded; otherwise, if so enabled,
+ * the time offset is updated.
+ */
+ if (jitter > (pps_jitter << PPS_POPCORN)) {
+ pr_warning("hardpps: PPSJITTER: jitter=%ld, limit=%ld\n",
+ jitter, (pps_jitter << PPS_POPCORN));
+ time_status |= STA_PPSJITTER;
+ pps_jitcnt++;
+ } else if (time_status & STA_PPSTIME) {
+ /* correct the time using the phase offset */
+ time_offset = div_s64(((s64)correction) << NTP_SCALE_SHIFT,
+ NTP_INTERVAL_FREQ);
+ /* cancel running adjtime() */
+ time_adjust = 0;
+ }
+ /* update jitter */
+ pps_jitter += (jitter - pps_jitter) >> PPS_INTMIN;
+}
+
+/*
+ * hardpps() - discipline CPU clock oscillator to external PPS signal
+ *
+ * This routine is called at each PPS signal arrival in order to
+ * discipline the CPU clock oscillator to the PPS signal. It takes two
+ * parameters: REALTIME and MONOTONIC_RAW clock timestamps. The former
+ * is used to correct clock phase error and the latter is used to
+ * correct the frequency.
+ *
+ * This code is based on David Mills's reference nanokernel
+ * implementation. It was mostly rewritten but keeps the same idea.
+ */
+void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
+{
+ struct pps_normtime pts_norm, freq_norm;
+ unsigned long flags;
+
+ pts_norm = pps_normalize_ts(*phase_ts);
+
+ write_seqlock_irqsave(&xtime_lock, flags);
+
+ /* clear the error bits, they will be set again if needed */
+ time_status &= ~(STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR);
+
+ /* indicate signal presence */
+ time_status |= STA_PPSSIGNAL;
+ pps_valid = PPS_VALID;
+
+ /* when called for the first time,
+ * just start the frequency interval */
+ if (unlikely(pps_fbase.tv_sec == 0)) {
+ pps_fbase = *raw_ts;
+ write_sequnlock_irqrestore(&xtime_lock, flags);
+ return;
+ }
+
+ /* ok, now we have a base for frequency calculation */
+ freq_norm = pps_normalize_ts(timespec_sub(*raw_ts, pps_fbase));
+
+ /* check that the signal is in the range
+ * [1s - MAXFREQ us, 1s + MAXFREQ us], otherwise reject it */
+ if ((freq_norm.sec == 0) ||
+ (freq_norm.nsec > MAXFREQ * freq_norm.sec) ||
+ (freq_norm.nsec < -MAXFREQ * freq_norm.sec)) {
+ time_status |= STA_PPSJITTER;
+ /* restart the frequency calibration interval */
+ pps_fbase = *raw_ts;
+ write_sequnlock_irqrestore(&xtime_lock, flags);
+ pr_err("hardpps: PPSJITTER: bad pulse\n");
+ return;
+ }
+
+ /* signal is ok */
+
+ /* check if the current frequency interval is finished */
+ if (freq_norm.sec >= (1 << pps_shift)) {
+ pps_calcnt++;
+ /* restart the frequency calibration interval */
+ pps_fbase = *raw_ts;
+ hardpps_update_freq(freq_norm);
+ }
+
+ hardpps_update_phase(pts_norm.nsec);
+
+ write_sequnlock_irqrestore(&xtime_lock, flags);
+}
+EXPORT_SYMBOL(hardpps);
+
+#endif /* CONFIG_NTP_PPS */
+
static int __init ntp_tick_adj_setup(char *str)
{
ntp_tick_adj = simple_strtol(str, NULL, 0);
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index b6b898d2eeef..051bc80a0c43 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -49,7 +49,7 @@ struct tick_device *tick_get_device(int cpu)
*/
int tick_is_oneshot_available(void)
{
- struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
+ struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
return dev && (dev->features & CLOCK_EVT_FEAT_ONESHOT);
}
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c
index aada0e52680a..5cbc101f908b 100644
--- a/kernel/time/tick-oneshot.c
+++ b/kernel/time/tick-oneshot.c
@@ -95,7 +95,7 @@ int tick_dev_program_event(struct clock_event_device *dev, ktime_t expires,
*/
int tick_program_event(ktime_t expires, int force)
{
- struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
+ struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
return tick_dev_program_event(dev, expires, force);
}
@@ -167,7 +167,7 @@ int tick_oneshot_mode_active(void)
int ret;
local_irq_save(flags);
- ret = __get_cpu_var(tick_cpu_device).mode == TICKDEV_MODE_ONESHOT;
+ ret = __this_cpu_read(tick_cpu_device.mode) == TICKDEV_MODE_ONESHOT;
local_irq_restore(flags);
return ret;
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 5bb86da82003..5536aaf3ba36 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -288,6 +288,49 @@ void ktime_get_ts(struct timespec *ts)
}
EXPORT_SYMBOL_GPL(ktime_get_ts);
+#ifdef CONFIG_NTP_PPS
+
+/**
+ * getnstime_raw_and_real - get day and raw monotonic time in timespec format
+ * @ts_raw: pointer to the timespec to be set to raw monotonic time
+ * @ts_real: pointer to the timespec to be set to the time of day
+ *
+ * This function reads both the time of day and raw monotonic time at the
+ * same time atomically and stores the resulting timestamps in timespec
+ * format.
+ */
+void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
+{
+ unsigned long seq;
+ s64 nsecs_raw, nsecs_real;
+
+ WARN_ON_ONCE(timekeeping_suspended);
+
+ do {
+ u32 arch_offset;
+
+ seq = read_seqbegin(&xtime_lock);
+
+ *ts_raw = raw_time;
+ *ts_real = xtime;
+
+ nsecs_raw = timekeeping_get_ns_raw();
+ nsecs_real = timekeeping_get_ns();
+
+ /* If arch requires, add in gettimeoffset() */
+ arch_offset = arch_gettimeoffset();
+ nsecs_raw += arch_offset;
+ nsecs_real += arch_offset;
+
+ } while (read_seqretry(&xtime_lock, seq));
+
+ timespec_add_ns(ts_raw, nsecs_raw);
+ timespec_add_ns(ts_real, nsecs_real);
+}
+EXPORT_SYMBOL(getnstime_raw_and_real);
+
+#endif /* CONFIG_NTP_PPS */
+
/**
* do_gettimeofday - Returns the time of day in a timeval
* @tv: pointer to the timeval to be set
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 53f338190b26..761c510a06c5 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -52,7 +52,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o
endif
obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
-obj-$(CONFIG_EVENT_TRACING) += power-traces.o
+obj-$(CONFIG_TRACEPOINTS) += power-traces.o
ifeq ($(CONFIG_TRACING),y)
obj-$(CONFIG_KGDB_KDB) += trace_kdb.o
endif
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index f8cf959bad45..dc53ecb80589 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1313,12 +1313,10 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
__this_cpu_inc(user_stack_count);
-
-
event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
sizeof(*entry), flags, pc);
if (!event)
- return;
+ goto out_drop_count;
entry = ring_buffer_event_data(event);
entry->tgid = current->tgid;
@@ -1333,8 +1331,8 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
if (!filter_check_discard(call, entry, buffer, event))
ring_buffer_unlock_commit(buffer, event);
+ out_drop_count:
__this_cpu_dec(user_stack_count);
-
out:
preempt_enable();
}
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 562c56e048fd..659732eba07c 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -558,7 +558,7 @@ trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr)
static int trace_wakeup_test_thread(void *data)
{
/* Make this a RT thread, doesn't need to be too high */
- static struct sched_param param = { .sched_priority = 5 };
+ static const struct sched_param param = { .sched_priority = 5 };
struct completion *x = data;
sched_setscheduler(current, SCHED_FIFO, &param);
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 25915832291a..9da289c34f22 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -12,6 +12,8 @@
#include <linux/highuid.h>
#include <linux/cred.h>
+static struct kmem_cache *user_ns_cachep __read_mostly;
+
/*
* Create a new user namespace, deriving the creator from the user in the
* passed credentials, and replacing that user with the new root user for the
@@ -26,7 +28,7 @@ int create_user_ns(struct cred *new)
struct user_struct *root_user;
int n;
- ns = kmalloc(sizeof(struct user_namespace), GFP_KERNEL);
+ ns = kmem_cache_alloc(user_ns_cachep, GFP_KERNEL);
if (!ns)
return -ENOMEM;
@@ -38,7 +40,7 @@ int create_user_ns(struct cred *new)
/* Alloc new root user. */
root_user = alloc_uid(ns, 0);
if (!root_user) {
- kfree(ns);
+ kmem_cache_free(user_ns_cachep, ns);
return -ENOMEM;
}
@@ -71,7 +73,7 @@ static void free_user_ns_work(struct work_struct *work)
struct user_namespace *ns =
container_of(work, struct user_namespace, destroyer);
free_uid(ns->creator);
- kfree(ns);
+ kmem_cache_free(user_ns_cachep, ns);
}
void free_user_ns(struct kref *kref)
@@ -126,3 +128,10 @@ gid_t user_ns_map_gid(struct user_namespace *to, const struct cred *cred, gid_t
/* No useful relationship so no mapping */
return overflowgid;
}
+
+static __init int user_namespaces_init(void)
+{
+ user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC);
+ return 0;
+}
+module_init(user_namespaces_init);
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 6e7b575ac33c..d7ebdf4cea98 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -118,12 +118,12 @@ static void __touch_watchdog(void)
{
int this_cpu = smp_processor_id();
- __get_cpu_var(watchdog_touch_ts) = get_timestamp(this_cpu);
+ __this_cpu_write(watchdog_touch_ts, get_timestamp(this_cpu));
}
void touch_softlockup_watchdog(void)
{
- __raw_get_cpu_var(watchdog_touch_ts) = 0;
+ __this_cpu_write(watchdog_touch_ts, 0);
}
EXPORT_SYMBOL(touch_softlockup_watchdog);
@@ -167,12 +167,12 @@ void touch_softlockup_watchdog_sync(void)
/* watchdog detector functions */
static int is_hardlockup(void)
{
- unsigned long hrint = __get_cpu_var(hrtimer_interrupts);
+ unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
- if (__get_cpu_var(hrtimer_interrupts_saved) == hrint)
+ if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
return 1;
- __get_cpu_var(hrtimer_interrupts_saved) = hrint;
+ __this_cpu_write(hrtimer_interrupts_saved, hrint);
return 0;
}
#endif
@@ -205,8 +205,8 @@ static void watchdog_overflow_callback(struct perf_event *event, int nmi,
/* Ensure the watchdog never gets throttled */
event->hw.interrupts = 0;
- if (__get_cpu_var(watchdog_nmi_touch) == true) {
- __get_cpu_var(watchdog_nmi_touch) = false;
+ if (__this_cpu_read(watchdog_nmi_touch) == true) {
+ __this_cpu_write(watchdog_nmi_touch, false);
return;
}
@@ -220,7 +220,7 @@ static void watchdog_overflow_callback(struct perf_event *event, int nmi,
int this_cpu = smp_processor_id();
/* only print hardlockups once */
- if (__get_cpu_var(hard_watchdog_warn) == true)
+ if (__this_cpu_read(hard_watchdog_warn) == true)
return;
if (hardlockup_panic)
@@ -228,16 +228,16 @@ static void watchdog_overflow_callback(struct perf_event *event, int nmi,
else
WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu);
- __get_cpu_var(hard_watchdog_warn) = true;
+ __this_cpu_write(hard_watchdog_warn, true);
return;
}
- __get_cpu_var(hard_watchdog_warn) = false;
+ __this_cpu_write(hard_watchdog_warn, false);
return;
}
static void watchdog_interrupt_count(void)
{
- __get_cpu_var(hrtimer_interrupts)++;
+ __this_cpu_inc(hrtimer_interrupts);
}
#else
static inline void watchdog_interrupt_count(void) { return; }
@@ -246,7 +246,7 @@ static inline void watchdog_interrupt_count(void) { return; }
/* watchdog kicker functions */
static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
{
- unsigned long touch_ts = __get_cpu_var(watchdog_touch_ts);
+ unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts);
struct pt_regs *regs = get_irq_regs();
int duration;
@@ -254,18 +254,18 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
watchdog_interrupt_count();
/* kick the softlockup detector */
- wake_up_process(__get_cpu_var(softlockup_watchdog));
+ wake_up_process(__this_cpu_read(softlockup_watchdog));
/* .. and repeat */
hrtimer_forward_now(hrtimer, ns_to_ktime(get_sample_period()));
if (touch_ts == 0) {
- if (unlikely(__get_cpu_var(softlockup_touch_sync))) {
+ if (unlikely(__this_cpu_read(softlockup_touch_sync))) {
/*
* If the time stamp was touched atomically
* make sure the scheduler tick is up to date.
*/
- __get_cpu_var(softlockup_touch_sync) = false;
+ __this_cpu_write(softlockup_touch_sync, false);
sched_clock_tick();
}
__touch_watchdog();
@@ -281,7 +281,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
duration = is_softlockup(touch_ts);
if (unlikely(duration)) {
/* only warn once */
- if (__get_cpu_var(soft_watchdog_warn) == true)
+ if (__this_cpu_read(soft_watchdog_warn) == true)
return HRTIMER_RESTART;
printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
@@ -296,9 +296,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
if (softlockup_panic)
panic("softlockup: hung tasks");
- __get_cpu_var(soft_watchdog_warn) = true;
+ __this_cpu_write(soft_watchdog_warn, true);
} else
- __get_cpu_var(soft_watchdog_warn) = false;
+ __this_cpu_write(soft_watchdog_warn, false);
return HRTIMER_RESTART;
}
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index e785b0f2aea5..8ee6ec82f88a 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -932,6 +932,38 @@ static void insert_work(struct cpu_workqueue_struct *cwq,
wake_up_worker(gcwq);
}
+/*
+ * Test whether @work is being queued from another work executing on the
+ * same workqueue. This is rather expensive and should only be used from
+ * cold paths.
+ */
+static bool is_chained_work(struct workqueue_struct *wq)
+{
+ unsigned long flags;
+ unsigned int cpu;
+
+ for_each_gcwq_cpu(cpu) {
+ struct global_cwq *gcwq = get_gcwq(cpu);
+ struct worker *worker;
+ struct hlist_node *pos;
+ int i;
+
+ spin_lock_irqsave(&gcwq->lock, flags);
+ for_each_busy_worker(worker, i, pos, gcwq) {
+ if (worker->task != current)
+ continue;
+ spin_unlock_irqrestore(&gcwq->lock, flags);
+ /*
+ * I'm @worker, no locking necessary. See if @work
+ * is headed to the same workqueue.
+ */
+ return worker->current_cwq->wq == wq;
+ }
+ spin_unlock_irqrestore(&gcwq->lock, flags);
+ }
+ return false;
+}
+
static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
struct work_struct *work)
{
@@ -943,7 +975,9 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
debug_work_activate(work);
- if (WARN_ON_ONCE(wq->flags & WQ_DYING))
+ /* if dying, only works from the same workqueue are allowed */
+ if (unlikely(wq->flags & WQ_DYING) &&
+ WARN_ON_ONCE(!is_chained_work(wq)))
return;
/* determine gcwq to use */
@@ -2936,11 +2970,35 @@ EXPORT_SYMBOL_GPL(__alloc_workqueue_key);
*/
void destroy_workqueue(struct workqueue_struct *wq)
{
+ unsigned int flush_cnt = 0;
unsigned int cpu;
+ /*
+ * Mark @wq dying and drain all pending works. Once WQ_DYING is
+ * set, only chain queueing is allowed. IOW, only currently
+ * pending or running work items on @wq can queue further work
+ * items on it. @wq is flushed repeatedly until it becomes empty.
+ * The number of flushing is detemined by the depth of chaining and
+ * should be relatively short. Whine if it takes too long.
+ */
wq->flags |= WQ_DYING;
+reflush:
flush_workqueue(wq);
+ for_each_cwq_cpu(cpu, wq) {
+ struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+
+ if (!cwq->nr_active && list_empty(&cwq->delayed_works))
+ continue;
+
+ if (++flush_cnt == 10 ||
+ (flush_cnt % 100 == 0 && flush_cnt <= 1000))
+ printk(KERN_WARNING "workqueue %s: flush on "
+ "destruction isn't complete after %u tries\n",
+ wq->name, flush_cnt);
+ goto reflush;
+ }
+
/*
* wq list is used to freeze wq, remove from list after
* flushing is complete in case freeze races us.